aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 15:05:10 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 15:05:10 -0500
commit7b1cd95d65eb3b1e13f8a90eb757e0ea232c7899 (patch)
treecbc3ec5d45b04666c24f7c0b1df04a85d29c7d0f
parent2155e69a9d9acd42488ef994a4e1ff535438c128 (diff)
parente7996a9a77fc669387da43ff4823b91cc4872bd0 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull RDMA subsystem updates from Jason Gunthorpe: "Overall this cycle did not have any major excitement, and did not require any shared branch with netdev. Lots of driver updates, particularly of the scale-up and performance variety. The largest body of core work was Parav's patches fixing and restructing some of the core code to make way for future RDMA containerization. Summary: - misc small driver fixups to bnxt_re/hfi1/qib/hns/ocrdma/rdmavt/vmw_pvrdma/nes - several major feature adds to bnxt_re driver: SRIOV VF RoCE support, HugePages support, extended hardware stats support, and SRQ support - a notable number of fixes to the i40iw driver from debugging scale up testing - more work to enable the new hip08 chip in the hns driver - misc small ULP fixups to srp/srpt//ipoib - preparation for srp initiator and target to support the RDMA-CM protocol for connections - add RDMA-CM support to srp initiator, srp target is still a WIP - fixes for a couple of places where ipoib could spam the dmesg log - fix encode/decode of FDR/EDR data rates in the core - many patches from Parav with ongoing work to clean up inconsistencies and bugs in RoCE support around the rdma_cm - mlx5 driver support for the userspace features 'thread domain', 'wallclock timestamps' and 'DV Direct Connected transport'. Support for the firmware dual port rocee capability - core support for more than 32 rdma devices in the char dev allocation - kernel doc updates from Randy Dunlap - new netlink uAPI for inspecting RDMA objects similar in spirit to 'ss' - one minor change to the kobject code acked by Greg KH" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (259 commits) RDMA/nldev: Provide detailed QP information RDMA/nldev: Provide global resource utilization RDMA/core: Add resource tracking for create and destroy PDs RDMA/core: Add resource tracking for create and destroy CQs RDMA/core: Add resource tracking for create and destroy QPs RDMA/restrack: Add general infrastructure to track RDMA resources RDMA/core: Save kernel caller name when creating PD and CQ objects RDMA/core: Use the MODNAME instead of the function name for pd callers RDMA: Move enum ib_cq_creation_flags to uapi headers IB/rxe: Change RDMA_RXE kconfig to use select IB/qib: remove qib_keys.c IB/mthca: remove mthca_user.h RDMA/cm: Fix access to uninitialized variable RDMA/cma: Use existing netif_is_bond_master function IB/core: Avoid SGID attributes query while converting GID from OPA to IB RDMA/mlx5: Avoid memory leak in case of XRCD dealloc failure IB/umad: Fix use of unprotected device pointer IB/iser: Combine substrings for three messages IB/iser: Delete an unnecessary variable initialisation in iser_send_data_out() IB/iser: Delete an error message for a failed memory allocation in iser_send_data_out() ...
-rw-r--r--MAINTAINERS7
-rw-r--r--drivers/infiniband/core/Makefile2
-rw-r--r--drivers/infiniband/core/addr.c65
-rw-r--r--drivers/infiniband/core/cache.c23
-rw-r--r--drivers/infiniband/core/cm.c227
-rw-r--r--drivers/infiniband/core/cma.c252
-rw-r--r--drivers/infiniband/core/cma_configfs.c2
-rw-r--r--drivers/infiniband/core/core_priv.h52
-rw-r--r--drivers/infiniband/core/cq.c39
-rw-r--r--drivers/infiniband/core/device.c42
-rw-r--r--drivers/infiniband/core/fmr_pool.c12
-rw-r--r--drivers/infiniband/core/iwpm_util.c1
-rw-r--r--drivers/infiniband/core/mad.c1
-rw-r--r--drivers/infiniband/core/netlink.c10
-rw-r--r--drivers/infiniband/core/nldev.c394
-rw-r--r--drivers/infiniband/core/restrack.c164
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c13
-rw-r--r--drivers/infiniband/core/sa_query.c18
-rw-r--r--drivers/infiniband/core/security.c10
-rw-r--r--drivers/infiniband/core/sysfs.c1
-rw-r--r--drivers/infiniband/core/ucm.c73
-rw-r--r--drivers/infiniband/core/ucma.c19
-rw-r--r--drivers/infiniband/core/umem.c2
-rw-r--r--drivers/infiniband/core/user_mad.c123
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c14
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c19
-rw-r--r--drivers/infiniband/core/uverbs_main.c95
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c3
-rw-r--r--drivers/infiniband/core/verbs.c312
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h43
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c145
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.h39
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c404
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h20
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c251
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c463
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h78
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c5
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h7
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c9
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c141
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h91
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h127
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c27
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c36
-rw-r--r--drivers/infiniband/hw/cxgb4/ev.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h4
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c6
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h4
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c87
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h2
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c16
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c64
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h25
-rw-r--r--drivers/infiniband/hw/hfi1/init.c2
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c6
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c10
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c8
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c6
-rw-r--r--drivers/infiniband/hw/hns/Makefile2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.c1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.h10
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_common.h11
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c19
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h103
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_eq.c759
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_eq.h134
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c758
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.h44
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c1837
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h283
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c16
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c72
-rw-r--r--drivers/infiniband/hw/i40iw/Kconfig1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw.h3
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c68
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.h8
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_ctrl.c25
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_d.h1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c3
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c13
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.c5
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.h1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_uk.c18
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_user.h3
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_utils.c50
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c5
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c4
-rw-r--r--drivers/infiniband/hw/mlx4/main.c19
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c20
-rw-r--r--drivers/infiniband/hw/mlx5/cong.c83
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c2
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c23
-rw-r--r--drivers/infiniband/hw/mlx5/main.c1353
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h111
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c3
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c9
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c432
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c7
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h112
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h3
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c19
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c8
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c10
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c15
-rw-r--r--drivers/infiniband/hw/qib/qib.h8
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c16
-rw-r--r--drivers/infiniband/hw/qib/qib_eeprom.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c68
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c235
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c1
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c1
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma.h4
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c13
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c21
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c15
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c11
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c15
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.c4
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c27
-rw-r--r--drivers/infiniband/sw/rdmavt/srq.c16
-rw-r--r--drivers/infiniband/sw/rdmavt/trace.h4
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_qp.h42
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c10
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.h6
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig4
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c18
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.h1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c12
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c18
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c5
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c98
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c6
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c16
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c7
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h1
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c2
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c795
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h43
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c962
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h100
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/qp.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c55
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c125
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c91
-rw-r--r--include/linux/mlx5/device.h16
-rw-r--r--include/linux/mlx5/driver.h43
-rw-r--r--include/linux/mlx5/mlx5_ifc.h32
-rw-r--r--include/linux/mlx5/qp.h12
-rw-r--r--include/linux/mlx5/vport.h4
-rw-r--r--include/rdma/ib_addr.h38
-rw-r--r--include/rdma/ib_sa.h10
-rw-r--r--include/rdma/ib_verbs.h64
-rw-r--r--include/rdma/opa_addr.h16
-rw-r--r--include/rdma/rdma_cm.h19
-rw-r--r--include/rdma/rdma_cm_ib.h8
-rw-r--r--include/rdma/rdma_vt.h31
-rw-r--r--include/rdma/restrack.h157
-rw-r--r--include/scsi/srp.h17
-rw-r--r--include/uapi/rdma/bnxt_re-abi.h9
-rw-r--r--include/uapi/rdma/ib_user_verbs.h11
-rw-r--r--include/uapi/rdma/mlx4-abi.h7
-rw-r--r--include/uapi/rdma/mlx5-abi.h53
-rw-r--r--include/uapi/rdma/rdma_netlink.h49
-rw-r--r--include/uapi/rdma/vmw_pvrdma-abi.h12
-rw-r--r--lib/kobject.c2
-rw-r--r--net/rds/ib.c6
187 files changed, 10211 insertions, 3999 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 7b5ef9a58c38..88cdd2925cef 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6892,7 +6892,7 @@ M: Jason Gunthorpe <jgg@mellanox.com>
6892L: linux-rdma@vger.kernel.org 6892L: linux-rdma@vger.kernel.org
6893W: http://www.openfabrics.org/ 6893W: http://www.openfabrics.org/
6894Q: http://patchwork.kernel.org/project/linux-rdma/list/ 6894Q: http://patchwork.kernel.org/project/linux-rdma/list/
6895T: git git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma.git 6895T: git git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git
6896S: Supported 6896S: Supported
6897F: Documentation/devicetree/bindings/infiniband/ 6897F: Documentation/devicetree/bindings/infiniband/
6898F: Documentation/infiniband/ 6898F: Documentation/infiniband/
@@ -11218,7 +11218,8 @@ S: Maintained
11218F: drivers/firmware/qemu_fw_cfg.c 11218F: drivers/firmware/qemu_fw_cfg.c
11219 11219
11220QIB DRIVER 11220QIB DRIVER
11221M: Mike Marciniszyn <infinipath@intel.com> 11221M: Dennis Dalessandro <dennis.dalessandro@intel.com>
11222M: Mike Marciniszyn <mike.marciniszyn@intel.com>
11222L: linux-rdma@vger.kernel.org 11223L: linux-rdma@vger.kernel.org
11223S: Supported 11224S: Supported
11224F: drivers/infiniband/hw/qib/ 11225F: drivers/infiniband/hw/qib/
@@ -11245,7 +11246,6 @@ F: include/linux/qed/
11245F: drivers/net/ethernet/qlogic/qede/ 11246F: drivers/net/ethernet/qlogic/qede/
11246 11247
11247QLOGIC QL4xxx RDMA DRIVER 11248QLOGIC QL4xxx RDMA DRIVER
11248M: Ram Amrani <Ram.Amrani@cavium.com>
11249M: Michal Kalderon <Michal.Kalderon@cavium.com> 11249M: Michal Kalderon <Michal.Kalderon@cavium.com>
11250M: Ariel Elior <Ariel.Elior@cavium.com> 11250M: Ariel Elior <Ariel.Elior@cavium.com>
11251L: linux-rdma@vger.kernel.org 11251L: linux-rdma@vger.kernel.org
@@ -11507,6 +11507,7 @@ F: drivers/net/ethernet/rdc/r6040.c
11507 11507
11508RDMAVT - RDMA verbs software 11508RDMAVT - RDMA verbs software
11509M: Dennis Dalessandro <dennis.dalessandro@intel.com> 11509M: Dennis Dalessandro <dennis.dalessandro@intel.com>
11510M: Mike Marciniszyn <mike.marciniszyn@intel.com>
11510L: linux-rdma@vger.kernel.org 11511L: linux-rdma@vger.kernel.org
11511S: Supported 11512S: Supported
11512F: drivers/infiniband/sw/rdmavt 11513F: drivers/infiniband/sw/rdmavt
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 504b926552c6..f69833db0a32 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -12,7 +12,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
12 device.o fmr_pool.o cache.o netlink.o \ 12 device.o fmr_pool.o cache.o netlink.o \
13 roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ 13 roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
14 multicast.o mad.o smi.o agent.o mad_rmpp.o \ 14 multicast.o mad.o smi.o agent.o mad_rmpp.o \
15 security.o nldev.o 15 security.o nldev.o restrack.o
16 16
17ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o 17ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
18ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o 18ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index f4e8185bccd3..a5b4cf030c11 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -243,8 +243,7 @@ void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
243EXPORT_SYMBOL(rdma_copy_addr); 243EXPORT_SYMBOL(rdma_copy_addr);
244 244
245int rdma_translate_ip(const struct sockaddr *addr, 245int rdma_translate_ip(const struct sockaddr *addr,
246 struct rdma_dev_addr *dev_addr, 246 struct rdma_dev_addr *dev_addr)
247 u16 *vlan_id)
248{ 247{
249 struct net_device *dev; 248 struct net_device *dev;
250 249
@@ -266,9 +265,6 @@ int rdma_translate_ip(const struct sockaddr *addr,
266 return -EADDRNOTAVAIL; 265 return -EADDRNOTAVAIL;
267 266
268 rdma_copy_addr(dev_addr, dev, NULL); 267 rdma_copy_addr(dev_addr, dev, NULL);
269 dev_addr->bound_dev_if = dev->ifindex;
270 if (vlan_id)
271 *vlan_id = rdma_vlan_dev_vlan_id(dev);
272 dev_put(dev); 268 dev_put(dev);
273 break; 269 break;
274#if IS_ENABLED(CONFIG_IPV6) 270#if IS_ENABLED(CONFIG_IPV6)
@@ -279,9 +275,6 @@ int rdma_translate_ip(const struct sockaddr *addr,
279 &((const struct sockaddr_in6 *)addr)->sin6_addr, 275 &((const struct sockaddr_in6 *)addr)->sin6_addr,
280 dev, 1)) { 276 dev, 1)) {
281 rdma_copy_addr(dev_addr, dev, NULL); 277 rdma_copy_addr(dev_addr, dev, NULL);
282 dev_addr->bound_dev_if = dev->ifindex;
283 if (vlan_id)
284 *vlan_id = rdma_vlan_dev_vlan_id(dev);
285 break; 278 break;
286 } 279 }
287 } 280 }
@@ -481,7 +474,7 @@ static int addr_resolve_neigh(struct dst_entry *dst,
481 if (dst->dev->flags & IFF_LOOPBACK) { 474 if (dst->dev->flags & IFF_LOOPBACK) {
482 int ret; 475 int ret;
483 476
484 ret = rdma_translate_ip(dst_in, addr, NULL); 477 ret = rdma_translate_ip(dst_in, addr);
485 if (!ret) 478 if (!ret)
486 memcpy(addr->dst_dev_addr, addr->src_dev_addr, 479 memcpy(addr->dst_dev_addr, addr->src_dev_addr,
487 MAX_ADDR_LEN); 480 MAX_ADDR_LEN);
@@ -558,7 +551,7 @@ static int addr_resolve(struct sockaddr *src_in,
558 } 551 }
559 552
560 if (ndev->flags & IFF_LOOPBACK) { 553 if (ndev->flags & IFF_LOOPBACK) {
561 ret = rdma_translate_ip(dst_in, addr, NULL); 554 ret = rdma_translate_ip(dst_in, addr);
562 /* 555 /*
563 * Put the loopback device and get the translated 556 * Put the loopback device and get the translated
564 * device instead. 557 * device instead.
@@ -744,7 +737,6 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
744EXPORT_SYMBOL(rdma_addr_cancel); 737EXPORT_SYMBOL(rdma_addr_cancel);
745 738
746struct resolve_cb_context { 739struct resolve_cb_context {
747 struct rdma_dev_addr *addr;
748 struct completion comp; 740 struct completion comp;
749 int status; 741 int status;
750}; 742};
@@ -752,39 +744,31 @@ struct resolve_cb_context {
752static void resolve_cb(int status, struct sockaddr *src_addr, 744static void resolve_cb(int status, struct sockaddr *src_addr,
753 struct rdma_dev_addr *addr, void *context) 745 struct rdma_dev_addr *addr, void *context)
754{ 746{
755 if (!status)
756 memcpy(((struct resolve_cb_context *)context)->addr,
757 addr, sizeof(struct rdma_dev_addr));
758 ((struct resolve_cb_context *)context)->status = status; 747 ((struct resolve_cb_context *)context)->status = status;
759 complete(&((struct resolve_cb_context *)context)->comp); 748 complete(&((struct resolve_cb_context *)context)->comp);
760} 749}
761 750
762int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, 751int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
763 const union ib_gid *dgid, 752 const union ib_gid *dgid,
764 u8 *dmac, u16 *vlan_id, int *if_index, 753 u8 *dmac, const struct net_device *ndev,
765 int *hoplimit) 754 int *hoplimit)
766{ 755{
767 int ret = 0;
768 struct rdma_dev_addr dev_addr; 756 struct rdma_dev_addr dev_addr;
769 struct resolve_cb_context ctx; 757 struct resolve_cb_context ctx;
770 struct net_device *dev;
771
772 union { 758 union {
773 struct sockaddr _sockaddr; 759 struct sockaddr _sockaddr;
774 struct sockaddr_in _sockaddr_in; 760 struct sockaddr_in _sockaddr_in;
775 struct sockaddr_in6 _sockaddr_in6; 761 struct sockaddr_in6 _sockaddr_in6;
776 } sgid_addr, dgid_addr; 762 } sgid_addr, dgid_addr;
777 763 int ret;
778 764
779 rdma_gid2ip(&sgid_addr._sockaddr, sgid); 765 rdma_gid2ip(&sgid_addr._sockaddr, sgid);
780 rdma_gid2ip(&dgid_addr._sockaddr, dgid); 766 rdma_gid2ip(&dgid_addr._sockaddr, dgid);
781 767
782 memset(&dev_addr, 0, sizeof(dev_addr)); 768 memset(&dev_addr, 0, sizeof(dev_addr));
783 if (if_index) 769 dev_addr.bound_dev_if = ndev->ifindex;
784 dev_addr.bound_dev_if = *if_index;
785 dev_addr.net = &init_net; 770 dev_addr.net = &init_net;
786 771
787 ctx.addr = &dev_addr;
788 init_completion(&ctx.comp); 772 init_completion(&ctx.comp);
789 ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr, 773 ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
790 &dev_addr, 1000, resolve_cb, &ctx); 774 &dev_addr, 1000, resolve_cb, &ctx);
@@ -798,42 +782,9 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
798 return ret; 782 return ret;
799 783
800 memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); 784 memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
801 dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if); 785 *hoplimit = dev_addr.hoplimit;
802 if (!dev) 786 return 0;
803 return -ENODEV;
804 if (if_index)
805 *if_index = dev_addr.bound_dev_if;
806 if (vlan_id)
807 *vlan_id = rdma_vlan_dev_vlan_id(dev);
808 if (hoplimit)
809 *hoplimit = dev_addr.hoplimit;
810 dev_put(dev);
811 return ret;
812}
813EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
814
815int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
816{
817 int ret = 0;
818 struct rdma_dev_addr dev_addr;
819 union {
820 struct sockaddr _sockaddr;
821 struct sockaddr_in _sockaddr_in;
822 struct sockaddr_in6 _sockaddr_in6;
823 } gid_addr;
824
825 rdma_gid2ip(&gid_addr._sockaddr, sgid);
826
827 memset(&dev_addr, 0, sizeof(dev_addr));
828 dev_addr.net = &init_net;
829 ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
830 if (ret)
831 return ret;
832
833 memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
834 return ret;
835} 787}
836EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
837 788
838static int netevent_callback(struct notifier_block *self, unsigned long event, 789static int netevent_callback(struct notifier_block *self, unsigned long event,
839 void *ctx) 790 void *ctx)
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 77515638c55c..e9a409d7f4e2 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -573,27 +573,24 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
573 struct ib_gid_attr attr; 573 struct ib_gid_attr attr;
574 574
575 if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) 575 if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
576 goto next; 576 continue;
577 577
578 if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) 578 if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
579 goto next; 579 continue;
580 580
581 memcpy(&attr, &table->data_vec[i].attr, sizeof(attr)); 581 memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
582 582
583 if (filter(gid, &attr, context)) 583 if (filter(gid, &attr, context)) {
584 found = true; 584 found = true;
585 585 if (index)
586next: 586 *index = i;
587 if (found)
588 break; 587 break;
588 }
589 } 589 }
590 read_unlock_irqrestore(&table->rwlock, flags); 590 read_unlock_irqrestore(&table->rwlock, flags);
591 591
592 if (!found) 592 if (!found)
593 return -ENOENT; 593 return -ENOENT;
594
595 if (index)
596 *index = i;
597 return 0; 594 return 0;
598} 595}
599 596
@@ -824,12 +821,7 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
824 if (err) 821 if (err)
825 return err; 822 return err;
826 823
827 err = roce_rescan_device(ib_dev); 824 rdma_roce_rescan_device(ib_dev);
828
829 if (err) {
830 gid_table_cleanup_one(ib_dev);
831 gid_table_release_one(ib_dev);
832 }
833 825
834 return err; 826 return err;
835} 827}
@@ -883,7 +875,6 @@ int ib_find_gid_by_filter(struct ib_device *device,
883 port_num, filter, 875 port_num, filter,
884 context, index); 876 context, index);
885} 877}
886EXPORT_SYMBOL(ib_find_gid_by_filter);
887 878
888int ib_get_cached_pkey(struct ib_device *device, 879int ib_get_cached_pkey(struct ib_device *device,
889 u8 port_num, 880 u8 port_num,
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index f6b159d79977..e6749157fd86 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -452,13 +452,14 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv,
452 cm_id_priv->private_data_len = private_data_len; 452 cm_id_priv->private_data_len = private_data_len;
453} 453}
454 454
455static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, 455static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
456 struct ib_grh *grh, struct cm_av *av) 456 struct ib_grh *grh, struct cm_av *av)
457{ 457{
458 av->port = port; 458 av->port = port;
459 av->pkey_index = wc->pkey_index; 459 av->pkey_index = wc->pkey_index;
460 ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc, 460 return ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
461 grh, &av->ah_attr); 461 port->port_num, wc,
462 grh, &av->ah_attr);
462} 463}
463 464
464static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, 465static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
@@ -494,8 +495,11 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
494 return ret; 495 return ret;
495 496
496 av->port = port; 497 av->port = port;
497 ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path, 498 ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
498 &av->ah_attr); 499 &av->ah_attr);
500 if (ret)
501 return ret;
502
499 av->timeout = path->packet_life_time + 1; 503 av->timeout = path->packet_life_time + 1;
500 504
501 spin_lock_irqsave(&cm.lock, flags); 505 spin_lock_irqsave(&cm.lock, flags);
@@ -1560,6 +1564,35 @@ static u16 cm_get_bth_pkey(struct cm_work *work)
1560 return pkey; 1564 return pkey;
1561} 1565}
1562 1566
1567/**
1568 * Convert OPA SGID to IB SGID
1569 * ULPs (such as IPoIB) do not understand OPA GIDs and will
1570 * reject them as the local_gid will not match the sgid. Therefore,
1571 * change the pathrec's SGID to an IB SGID.
1572 *
1573 * @work: Work completion
1574 * @path: Path record
1575 */
1576static void cm_opa_to_ib_sgid(struct cm_work *work,
1577 struct sa_path_rec *path)
1578{
1579 struct ib_device *dev = work->port->cm_dev->ib_device;
1580 u8 port_num = work->port->port_num;
1581
1582 if (rdma_cap_opa_ah(dev, port_num) &&
1583 (ib_is_opa_gid(&path->sgid))) {
1584 union ib_gid sgid;
1585
1586 if (ib_get_cached_gid(dev, port_num, 0, &sgid, NULL)) {
1587 dev_warn(&dev->dev,
1588 "Error updating sgid in CM request\n");
1589 return;
1590 }
1591
1592 path->sgid = sgid;
1593 }
1594}
1595
1563static void cm_format_req_event(struct cm_work *work, 1596static void cm_format_req_event(struct cm_work *work,
1564 struct cm_id_private *cm_id_priv, 1597 struct cm_id_private *cm_id_priv,
1565 struct ib_cm_id *listen_id) 1598 struct ib_cm_id *listen_id)
@@ -1573,10 +1606,13 @@ static void cm_format_req_event(struct cm_work *work,
1573 param->bth_pkey = cm_get_bth_pkey(work); 1606 param->bth_pkey = cm_get_bth_pkey(work);
1574 param->port = cm_id_priv->av.port->port_num; 1607 param->port = cm_id_priv->av.port->port_num;
1575 param->primary_path = &work->path[0]; 1608 param->primary_path = &work->path[0];
1576 if (cm_req_has_alt_path(req_msg)) 1609 cm_opa_to_ib_sgid(work, param->primary_path);
1610 if (cm_req_has_alt_path(req_msg)) {
1577 param->alternate_path = &work->path[1]; 1611 param->alternate_path = &work->path[1];
1578 else 1612 cm_opa_to_ib_sgid(work, param->alternate_path);
1613 } else {
1579 param->alternate_path = NULL; 1614 param->alternate_path = NULL;
1615 }
1580 param->remote_ca_guid = req_msg->local_ca_guid; 1616 param->remote_ca_guid = req_msg->local_ca_guid;
1581 param->remote_qkey = be32_to_cpu(req_msg->local_qkey); 1617 param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
1582 param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg)); 1618 param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
@@ -1826,9 +1862,11 @@ static int cm_req_handler(struct cm_work *work)
1826 1862
1827 cm_id_priv = container_of(cm_id, struct cm_id_private, id); 1863 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1828 cm_id_priv->id.remote_id = req_msg->local_comm_id; 1864 cm_id_priv->id.remote_id = req_msg->local_comm_id;
1829 cm_init_av_for_response(work->port, work->mad_recv_wc->wc, 1865 ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1830 work->mad_recv_wc->recv_buf.grh, 1866 work->mad_recv_wc->recv_buf.grh,
1831 &cm_id_priv->av); 1867 &cm_id_priv->av);
1868 if (ret)
1869 goto destroy;
1832 cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> 1870 cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1833 id.local_id); 1871 id.local_id);
1834 if (IS_ERR(cm_id_priv->timewait_info)) { 1872 if (IS_ERR(cm_id_priv->timewait_info)) {
@@ -1841,9 +1879,10 @@ static int cm_req_handler(struct cm_work *work)
1841 1879
1842 listen_cm_id_priv = cm_match_req(work, cm_id_priv); 1880 listen_cm_id_priv = cm_match_req(work, cm_id_priv);
1843 if (!listen_cm_id_priv) { 1881 if (!listen_cm_id_priv) {
1882 pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
1883 be32_to_cpu(cm_id->local_id));
1844 ret = -EINVAL; 1884 ret = -EINVAL;
1845 kfree(cm_id_priv->timewait_info); 1885 goto free_timeinfo;
1846 goto destroy;
1847 } 1886 }
1848 1887
1849 cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; 1888 cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
@@ -1861,56 +1900,50 @@ static int cm_req_handler(struct cm_work *work)
1861 work->port->port_num, 1900 work->port->port_num,
1862 grh->sgid_index, 1901 grh->sgid_index,
1863 &gid, &gid_attr); 1902 &gid, &gid_attr);
1864 if (!ret) { 1903 if (ret) {
1865 if (gid_attr.ndev) { 1904 ib_send_cm_rej(cm_id, IB_CM_REJ_UNSUPPORTED, NULL, 0, NULL, 0);
1866 work->path[0].rec_type = 1905 goto rejected;
1867 sa_conv_gid_to_pathrec_type(gid_attr.gid_type); 1906 }
1868 sa_path_set_ifindex(&work->path[0], 1907
1869 gid_attr.ndev->ifindex); 1908 if (gid_attr.ndev) {
1870 sa_path_set_ndev(&work->path[0], 1909 work->path[0].rec_type =
1871 dev_net(gid_attr.ndev)); 1910 sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
1872 dev_put(gid_attr.ndev); 1911 sa_path_set_ifindex(&work->path[0],
1873 } else { 1912 gid_attr.ndev->ifindex);
1874 cm_path_set_rec_type(work->port->cm_dev->ib_device, 1913 sa_path_set_ndev(&work->path[0],
1875 work->port->port_num, 1914 dev_net(gid_attr.ndev));
1876 &work->path[0], 1915 dev_put(gid_attr.ndev);
1877 &req_msg->primary_local_gid); 1916 } else {
1878 } 1917 cm_path_set_rec_type(work->port->cm_dev->ib_device,
1879 if (cm_req_has_alt_path(req_msg)) 1918 work->port->port_num,
1880 work->path[1].rec_type = work->path[0].rec_type; 1919 &work->path[0],
1881 cm_format_paths_from_req(req_msg, &work->path[0], 1920 &req_msg->primary_local_gid);
1882 &work->path[1]);
1883 if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
1884 sa_path_set_dmac(&work->path[0],
1885 cm_id_priv->av.ah_attr.roce.dmac);
1886 work->path[0].hop_limit = grh->hop_limit;
1887 ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
1888 cm_id_priv);
1889 } 1921 }
1922 if (cm_req_has_alt_path(req_msg))
1923 work->path[1].rec_type = work->path[0].rec_type;
1924 cm_format_paths_from_req(req_msg, &work->path[0],
1925 &work->path[1]);
1926 if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
1927 sa_path_set_dmac(&work->path[0],
1928 cm_id_priv->av.ah_attr.roce.dmac);
1929 work->path[0].hop_limit = grh->hop_limit;
1930 ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
1931 cm_id_priv);
1890 if (ret) { 1932 if (ret) {
1891 int err = ib_get_cached_gid(work->port->cm_dev->ib_device, 1933 int err;
1892 work->port->port_num, 0, 1934
1893 &work->path[0].sgid, 1935 err = ib_get_cached_gid(work->port->cm_dev->ib_device,
1894 &gid_attr); 1936 work->port->port_num, 0,
1895 if (!err && gid_attr.ndev) { 1937 &work->path[0].sgid,
1896 work->path[0].rec_type = 1938 NULL);
1897 sa_conv_gid_to_pathrec_type(gid_attr.gid_type); 1939 if (err)
1898 sa_path_set_ifindex(&work->path[0], 1940 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1899 gid_attr.ndev->ifindex); 1941 NULL, 0, NULL, 0);
1900 sa_path_set_ndev(&work->path[0], 1942 else
1901 dev_net(gid_attr.ndev)); 1943 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1902 dev_put(gid_attr.ndev); 1944 &work->path[0].sgid,
1903 } else { 1945 sizeof(work->path[0].sgid),
1904 cm_path_set_rec_type(work->port->cm_dev->ib_device, 1946 NULL, 0);
1905 work->port->port_num,
1906 &work->path[0],
1907 &req_msg->primary_local_gid);
1908 }
1909 if (cm_req_has_alt_path(req_msg))
1910 work->path[1].rec_type = work->path[0].rec_type;
1911 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1912 &work->path[0].sgid, sizeof work->path[0].sgid,
1913 NULL, 0);
1914 goto rejected; 1947 goto rejected;
1915 } 1948 }
1916 if (cm_req_has_alt_path(req_msg)) { 1949 if (cm_req_has_alt_path(req_msg)) {
@@ -1919,7 +1952,7 @@ static int cm_req_handler(struct cm_work *work)
1919 if (ret) { 1952 if (ret) {
1920 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, 1953 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
1921 &work->path[0].sgid, 1954 &work->path[0].sgid,
1922 sizeof work->path[0].sgid, NULL, 0); 1955 sizeof(work->path[0].sgid), NULL, 0);
1923 goto rejected; 1956 goto rejected;
1924 } 1957 }
1925 } 1958 }
@@ -1945,6 +1978,8 @@ static int cm_req_handler(struct cm_work *work)
1945rejected: 1978rejected:
1946 atomic_dec(&cm_id_priv->refcount); 1979 atomic_dec(&cm_id_priv->refcount);
1947 cm_deref_id(listen_cm_id_priv); 1980 cm_deref_id(listen_cm_id_priv);
1981free_timeinfo:
1982 kfree(cm_id_priv->timewait_info);
1948destroy: 1983destroy:
1949 ib_destroy_cm_id(cm_id); 1984 ib_destroy_cm_id(cm_id);
1950 return ret; 1985 return ret;
@@ -1997,6 +2032,8 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
1997 spin_lock_irqsave(&cm_id_priv->lock, flags); 2032 spin_lock_irqsave(&cm_id_priv->lock, flags);
1998 if (cm_id->state != IB_CM_REQ_RCVD && 2033 if (cm_id->state != IB_CM_REQ_RCVD &&
1999 cm_id->state != IB_CM_MRA_REQ_SENT) { 2034 cm_id->state != IB_CM_MRA_REQ_SENT) {
2035 pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
2036 be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
2000 ret = -EINVAL; 2037 ret = -EINVAL;
2001 goto out; 2038 goto out;
2002 } 2039 }
@@ -2063,6 +2100,8 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
2063 spin_lock_irqsave(&cm_id_priv->lock, flags); 2100 spin_lock_irqsave(&cm_id_priv->lock, flags);
2064 if (cm_id->state != IB_CM_REP_RCVD && 2101 if (cm_id->state != IB_CM_REP_RCVD &&
2065 cm_id->state != IB_CM_MRA_REP_SENT) { 2102 cm_id->state != IB_CM_MRA_REP_SENT) {
2103 pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
2104 be32_to_cpu(cm_id->local_id), cm_id->state);
2066 ret = -EINVAL; 2105 ret = -EINVAL;
2067 goto error; 2106 goto error;
2068 } 2107 }
@@ -2170,6 +2209,8 @@ static int cm_rep_handler(struct cm_work *work)
2170 cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0); 2209 cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
2171 if (!cm_id_priv) { 2210 if (!cm_id_priv) {
2172 cm_dup_rep_handler(work); 2211 cm_dup_rep_handler(work);
2212 pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
2213 be32_to_cpu(rep_msg->remote_comm_id));
2173 return -EINVAL; 2214 return -EINVAL;
2174 } 2215 }
2175 2216
@@ -2183,6 +2224,10 @@ static int cm_rep_handler(struct cm_work *work)
2183 default: 2224 default:
2184 spin_unlock_irq(&cm_id_priv->lock); 2225 spin_unlock_irq(&cm_id_priv->lock);
2185 ret = -EINVAL; 2226 ret = -EINVAL;
2227 pr_debug("%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
2228 __func__, cm_id_priv->id.state,
2229 be32_to_cpu(rep_msg->local_comm_id),
2230 be32_to_cpu(rep_msg->remote_comm_id));
2186 goto error; 2231 goto error;
2187 } 2232 }
2188 2233
@@ -2196,6 +2241,8 @@ static int cm_rep_handler(struct cm_work *work)
2196 spin_unlock(&cm.lock); 2241 spin_unlock(&cm.lock);
2197 spin_unlock_irq(&cm_id_priv->lock); 2242 spin_unlock_irq(&cm_id_priv->lock);
2198 ret = -EINVAL; 2243 ret = -EINVAL;
2244 pr_debug("%s: Failed to insert remote id %d\n", __func__,
2245 be32_to_cpu(rep_msg->remote_comm_id));
2199 goto error; 2246 goto error;
2200 } 2247 }
2201 /* Check for a stale connection. */ 2248 /* Check for a stale connection. */
@@ -2213,6 +2260,10 @@ static int cm_rep_handler(struct cm_work *work)
2213 IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, 2260 IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
2214 NULL, 0); 2261 NULL, 0);
2215 ret = -EINVAL; 2262 ret = -EINVAL;
2263 pr_debug("%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
2264 __func__, be32_to_cpu(rep_msg->local_comm_id),
2265 be32_to_cpu(rep_msg->remote_comm_id));
2266
2216 if (cur_cm_id_priv) { 2267 if (cur_cm_id_priv) {
2217 cm_id = &cur_cm_id_priv->id; 2268 cm_id = &cur_cm_id_priv->id;
2218 ib_send_cm_dreq(cm_id, NULL, 0); 2269 ib_send_cm_dreq(cm_id, NULL, 0);
@@ -2359,6 +2410,8 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
2359 cm_id_priv = container_of(cm_id, struct cm_id_private, id); 2410 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2360 spin_lock_irqsave(&cm_id_priv->lock, flags); 2411 spin_lock_irqsave(&cm_id_priv->lock, flags);
2361 if (cm_id->state != IB_CM_ESTABLISHED) { 2412 if (cm_id->state != IB_CM_ESTABLISHED) {
2413 pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
2414 be32_to_cpu(cm_id->local_id), cm_id->state);
2362 ret = -EINVAL; 2415 ret = -EINVAL;
2363 goto out; 2416 goto out;
2364 } 2417 }
@@ -2428,6 +2481,8 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id,
2428 if (cm_id->state != IB_CM_DREQ_RCVD) { 2481 if (cm_id->state != IB_CM_DREQ_RCVD) {
2429 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 2482 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2430 kfree(data); 2483 kfree(data);
2484 pr_debug("%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
2485 __func__, be32_to_cpu(cm_id->local_id), cm_id->state);
2431 return -EINVAL; 2486 return -EINVAL;
2432 } 2487 }
2433 2488
@@ -2493,6 +2548,9 @@ static int cm_dreq_handler(struct cm_work *work)
2493 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. 2548 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2494 counter[CM_DREQ_COUNTER]); 2549 counter[CM_DREQ_COUNTER]);
2495 cm_issue_drep(work->port, work->mad_recv_wc); 2550 cm_issue_drep(work->port, work->mad_recv_wc);
2551 pr_debug("%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
2552 __func__, be32_to_cpu(dreq_msg->local_comm_id),
2553 be32_to_cpu(dreq_msg->remote_comm_id));
2496 return -EINVAL; 2554 return -EINVAL;
2497 } 2555 }
2498 2556
@@ -2535,6 +2593,9 @@ static int cm_dreq_handler(struct cm_work *work)
2535 counter[CM_DREQ_COUNTER]); 2593 counter[CM_DREQ_COUNTER]);
2536 goto unlock; 2594 goto unlock;
2537 default: 2595 default:
2596 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
2597 __func__, be32_to_cpu(cm_id_priv->id.local_id),
2598 cm_id_priv->id.state);
2538 goto unlock; 2599 goto unlock;
2539 } 2600 }
2540 cm_id_priv->id.state = IB_CM_DREQ_RCVD; 2601 cm_id_priv->id.state = IB_CM_DREQ_RCVD;
@@ -2638,6 +2699,8 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
2638 cm_enter_timewait(cm_id_priv); 2699 cm_enter_timewait(cm_id_priv);
2639 break; 2700 break;
2640 default: 2701 default:
2702 pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
2703 be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
2641 ret = -EINVAL; 2704 ret = -EINVAL;
2642 goto out; 2705 goto out;
2643 } 2706 }
@@ -2748,6 +2811,9 @@ static int cm_rej_handler(struct cm_work *work)
2748 /* fall through */ 2811 /* fall through */
2749 default: 2812 default:
2750 spin_unlock_irq(&cm_id_priv->lock); 2813 spin_unlock_irq(&cm_id_priv->lock);
2814 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
2815 __func__, be32_to_cpu(cm_id_priv->id.local_id),
2816 cm_id_priv->id.state);
2751 ret = -EINVAL; 2817 ret = -EINVAL;
2752 goto out; 2818 goto out;
2753 } 2819 }
@@ -2811,6 +2877,9 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
2811 } 2877 }
2812 /* fall through */ 2878 /* fall through */
2813 default: 2879 default:
2880 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
2881 __func__, be32_to_cpu(cm_id_priv->id.local_id),
2882 cm_id_priv->id.state);
2814 ret = -EINVAL; 2883 ret = -EINVAL;
2815 goto error1; 2884 goto error1;
2816 } 2885 }
@@ -2912,6 +2981,9 @@ static int cm_mra_handler(struct cm_work *work)
2912 counter[CM_MRA_COUNTER]); 2981 counter[CM_MRA_COUNTER]);
2913 /* fall through */ 2982 /* fall through */
2914 default: 2983 default:
2984 pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
2985 __func__, be32_to_cpu(cm_id_priv->id.local_id),
2986 cm_id_priv->id.state);
2915 goto out; 2987 goto out;
2916 } 2988 }
2917 2989
@@ -3085,6 +3157,12 @@ static int cm_lap_handler(struct cm_work *work)
3085 if (!cm_id_priv) 3157 if (!cm_id_priv)
3086 return -EINVAL; 3158 return -EINVAL;
3087 3159
3160 ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
3161 work->mad_recv_wc->recv_buf.grh,
3162 &cm_id_priv->av);
3163 if (ret)
3164 goto deref;
3165
3088 param = &work->cm_event.param.lap_rcvd; 3166 param = &work->cm_event.param.lap_rcvd;
3089 memset(&work->path[0], 0, sizeof(work->path[1])); 3167 memset(&work->path[0], 0, sizeof(work->path[1]));
3090 cm_path_set_rec_type(work->port->cm_dev->ib_device, 3168 cm_path_set_rec_type(work->port->cm_dev->ib_device,
@@ -3131,9 +3209,6 @@ static int cm_lap_handler(struct cm_work *work)
3131 3209
3132 cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; 3210 cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
3133 cm_id_priv->tid = lap_msg->hdr.tid; 3211 cm_id_priv->tid = lap_msg->hdr.tid;
3134 cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
3135 work->mad_recv_wc->recv_buf.grh,
3136 &cm_id_priv->av);
3137 cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av, 3212 cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
3138 cm_id_priv); 3213 cm_id_priv);
3139 ret = atomic_inc_and_test(&cm_id_priv->work_count); 3214 ret = atomic_inc_and_test(&cm_id_priv->work_count);
@@ -3386,6 +3461,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
3386 struct cm_id_private *cm_id_priv, *cur_cm_id_priv; 3461 struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
3387 struct cm_sidr_req_msg *sidr_req_msg; 3462 struct cm_sidr_req_msg *sidr_req_msg;
3388 struct ib_wc *wc; 3463 struct ib_wc *wc;
3464 int ret;
3389 3465
3390 cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL); 3466 cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
3391 if (IS_ERR(cm_id)) 3467 if (IS_ERR(cm_id))
@@ -3398,9 +3474,12 @@ static int cm_sidr_req_handler(struct cm_work *work)
3398 wc = work->mad_recv_wc->wc; 3474 wc = work->mad_recv_wc->wc;
3399 cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid); 3475 cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
3400 cm_id_priv->av.dgid.global.interface_id = 0; 3476 cm_id_priv->av.dgid.global.interface_id = 0;
3401 cm_init_av_for_response(work->port, work->mad_recv_wc->wc, 3477 ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
3402 work->mad_recv_wc->recv_buf.grh, 3478 work->mad_recv_wc->recv_buf.grh,
3403 &cm_id_priv->av); 3479 &cm_id_priv->av);
3480 if (ret)
3481 goto out;
3482
3404 cm_id_priv->id.remote_id = sidr_req_msg->request_id; 3483 cm_id_priv->id.remote_id = sidr_req_msg->request_id;
3405 cm_id_priv->tid = sidr_req_msg->hdr.tid; 3484 cm_id_priv->tid = sidr_req_msg->hdr.tid;
3406 atomic_inc(&cm_id_priv->work_count); 3485 atomic_inc(&cm_id_priv->work_count);
@@ -3692,6 +3771,7 @@ static void cm_work_handler(struct work_struct *_work)
3692 ret = cm_timewait_handler(work); 3771 ret = cm_timewait_handler(work);
3693 break; 3772 break;
3694 default: 3773 default:
3774 pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
3695 ret = -EINVAL; 3775 ret = -EINVAL;
3696 break; 3776 break;
3697 } 3777 }
@@ -3727,6 +3807,8 @@ static int cm_establish(struct ib_cm_id *cm_id)
3727 ret = -EISCONN; 3807 ret = -EISCONN;
3728 break; 3808 break;
3729 default: 3809 default:
3810 pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
3811 be32_to_cpu(cm_id->local_id), cm_id->state);
3730 ret = -EINVAL; 3812 ret = -EINVAL;
3731 break; 3813 break;
3732 } 3814 }
@@ -3924,6 +4006,9 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
3924 ret = 0; 4006 ret = 0;
3925 break; 4007 break;
3926 default: 4008 default:
4009 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
4010 __func__, be32_to_cpu(cm_id_priv->id.local_id),
4011 cm_id_priv->id.state);
3927 ret = -EINVAL; 4012 ret = -EINVAL;
3928 break; 4013 break;
3929 } 4014 }
@@ -3971,6 +4056,9 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3971 ret = 0; 4056 ret = 0;
3972 break; 4057 break;
3973 default: 4058 default:
4059 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
4060 __func__, be32_to_cpu(cm_id_priv->id.local_id),
4061 cm_id_priv->id.state);
3974 ret = -EINVAL; 4062 ret = -EINVAL;
3975 break; 4063 break;
3976 } 4064 }
@@ -4030,6 +4118,9 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
4030 ret = 0; 4118 ret = 0;
4031 break; 4119 break;
4032 default: 4120 default:
4121 pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
4122 __func__, be32_to_cpu(cm_id_priv->id.local_id),
4123 cm_id_priv->id.state);
4033 ret = -EINVAL; 4124 ret = -EINVAL;
4034 break; 4125 break;
4035 } 4126 }
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 6294a7001d33..e66963ca58bd 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -601,7 +601,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
601 int ret; 601 int ret;
602 602
603 if (addr->sa_family != AF_IB) { 603 if (addr->sa_family != AF_IB) {
604 ret = rdma_translate_ip(addr, dev_addr, NULL); 604 ret = rdma_translate_ip(addr, dev_addr);
605 } else { 605 } else {
606 cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); 606 cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
607 ret = 0; 607 ret = 0;
@@ -612,11 +612,14 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
612 612
613static inline int cma_validate_port(struct ib_device *device, u8 port, 613static inline int cma_validate_port(struct ib_device *device, u8 port,
614 enum ib_gid_type gid_type, 614 enum ib_gid_type gid_type,
615 union ib_gid *gid, int dev_type, 615 union ib_gid *gid,
616 int bound_if_index) 616 struct rdma_id_private *id_priv)
617{ 617{
618 int ret = -ENODEV; 618 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
619 int bound_if_index = dev_addr->bound_dev_if;
620 int dev_type = dev_addr->dev_type;
619 struct net_device *ndev = NULL; 621 struct net_device *ndev = NULL;
622 int ret = -ENODEV;
620 623
621 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) 624 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
622 return ret; 625 return ret;
@@ -624,11 +627,13 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
624 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) 627 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
625 return ret; 628 return ret;
626 629
627 if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) 630 if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
628 ndev = dev_get_by_index(&init_net, bound_if_index); 631 ndev = dev_get_by_index(dev_addr->net, bound_if_index);
629 else 632 if (!ndev)
633 return ret;
634 } else {
630 gid_type = IB_GID_TYPE_IB; 635 gid_type = IB_GID_TYPE_IB;
631 636 }
632 637
633 ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, 638 ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
634 ndev, NULL); 639 ndev, NULL);
@@ -669,8 +674,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
669 rdma_protocol_ib(cma_dev->device, port) ? 674 rdma_protocol_ib(cma_dev->device, port) ?
670 IB_GID_TYPE_IB : 675 IB_GID_TYPE_IB :
671 listen_id_priv->gid_type, gidp, 676 listen_id_priv->gid_type, gidp,
672 dev_addr->dev_type, 677 id_priv);
673 dev_addr->bound_dev_if);
674 if (!ret) { 678 if (!ret) {
675 id_priv->id.port_num = port; 679 id_priv->id.port_num = port;
676 goto out; 680 goto out;
@@ -691,8 +695,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
691 rdma_protocol_ib(cma_dev->device, port) ? 695 rdma_protocol_ib(cma_dev->device, port) ?
692 IB_GID_TYPE_IB : 696 IB_GID_TYPE_IB :
693 cma_dev->default_gid_type[port - 1], 697 cma_dev->default_gid_type[port - 1],
694 gidp, dev_addr->dev_type, 698 gidp, id_priv);
695 dev_addr->bound_dev_if);
696 if (!ret) { 699 if (!ret) {
697 id_priv->id.port_num = port; 700 id_priv->id.port_num = port;
698 goto out; 701 goto out;
@@ -2036,6 +2039,33 @@ __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr)
2036} 2039}
2037EXPORT_SYMBOL(rdma_get_service_id); 2040EXPORT_SYMBOL(rdma_get_service_id);
2038 2041
2042void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid,
2043 union ib_gid *dgid)
2044{
2045 struct rdma_addr *addr = &cm_id->route.addr;
2046
2047 if (!cm_id->device) {
2048 if (sgid)
2049 memset(sgid, 0, sizeof(*sgid));
2050 if (dgid)
2051 memset(dgid, 0, sizeof(*dgid));
2052 return;
2053 }
2054
2055 if (rdma_protocol_roce(cm_id->device, cm_id->port_num)) {
2056 if (sgid)
2057 rdma_ip2gid((struct sockaddr *)&addr->src_addr, sgid);
2058 if (dgid)
2059 rdma_ip2gid((struct sockaddr *)&addr->dst_addr, dgid);
2060 } else {
2061 if (sgid)
2062 rdma_addr_get_sgid(&addr->dev_addr, sgid);
2063 if (dgid)
2064 rdma_addr_get_dgid(&addr->dev_addr, dgid);
2065 }
2066}
2067EXPORT_SYMBOL(rdma_read_gids);
2068
2039static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) 2069static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
2040{ 2070{
2041 struct rdma_id_private *id_priv = iw_id->context; 2071 struct rdma_id_private *id_priv = iw_id->context;
@@ -2132,7 +2162,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
2132 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2162 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
2133 conn_id->state = RDMA_CM_CONNECT; 2163 conn_id->state = RDMA_CM_CONNECT;
2134 2164
2135 ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL); 2165 ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
2136 if (ret) { 2166 if (ret) {
2137 mutex_unlock(&conn_id->handler_mutex); 2167 mutex_unlock(&conn_id->handler_mutex);
2138 rdma_destroy_id(new_cm_id); 2168 rdma_destroy_id(new_cm_id);
@@ -2414,6 +2444,26 @@ out:
2414 kfree(work); 2444 kfree(work);
2415} 2445}
2416 2446
2447static void cma_init_resolve_route_work(struct cma_work *work,
2448 struct rdma_id_private *id_priv)
2449{
2450 work->id = id_priv;
2451 INIT_WORK(&work->work, cma_work_handler);
2452 work->old_state = RDMA_CM_ROUTE_QUERY;
2453 work->new_state = RDMA_CM_ROUTE_RESOLVED;
2454 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
2455}
2456
2457static void cma_init_resolve_addr_work(struct cma_work *work,
2458 struct rdma_id_private *id_priv)
2459{
2460 work->id = id_priv;
2461 INIT_WORK(&work->work, cma_work_handler);
2462 work->old_state = RDMA_CM_ADDR_QUERY;
2463 work->new_state = RDMA_CM_ADDR_RESOLVED;
2464 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2465}
2466
2417static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) 2467static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
2418{ 2468{
2419 struct rdma_route *route = &id_priv->id.route; 2469 struct rdma_route *route = &id_priv->id.route;
@@ -2424,11 +2474,7 @@ static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
2424 if (!work) 2474 if (!work)
2425 return -ENOMEM; 2475 return -ENOMEM;
2426 2476
2427 work->id = id_priv; 2477 cma_init_resolve_route_work(work, id_priv);
2428 INIT_WORK(&work->work, cma_work_handler);
2429 work->old_state = RDMA_CM_ROUTE_QUERY;
2430 work->new_state = RDMA_CM_ROUTE_RESOLVED;
2431 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
2432 2478
2433 route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); 2479 route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
2434 if (!route->path_rec) { 2480 if (!route->path_rec) {
@@ -2449,10 +2495,63 @@ err1:
2449 return ret; 2495 return ret;
2450} 2496}
2451 2497
2452int rdma_set_ib_paths(struct rdma_cm_id *id, 2498static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
2453 struct sa_path_rec *path_rec, int num_paths) 2499 unsigned long supported_gids,
2500 enum ib_gid_type default_gid)
2501{
2502 if ((network_type == RDMA_NETWORK_IPV4 ||
2503 network_type == RDMA_NETWORK_IPV6) &&
2504 test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
2505 return IB_GID_TYPE_ROCE_UDP_ENCAP;
2506
2507 return default_gid;
2508}
2509
2510/*
2511 * cma_iboe_set_path_rec_l2_fields() is helper function which sets
2512 * path record type based on GID type.
2513 * It also sets up other L2 fields which includes destination mac address
2514 * netdev ifindex, of the path record.
2515 * It returns the netdev of the bound interface for this path record entry.
2516 */
2517static struct net_device *
2518cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv)
2519{
2520 struct rdma_route *route = &id_priv->id.route;
2521 enum ib_gid_type gid_type = IB_GID_TYPE_ROCE;
2522 struct rdma_addr *addr = &route->addr;
2523 unsigned long supported_gids;
2524 struct net_device *ndev;
2525
2526 if (!addr->dev_addr.bound_dev_if)
2527 return NULL;
2528
2529 ndev = dev_get_by_index(addr->dev_addr.net,
2530 addr->dev_addr.bound_dev_if);
2531 if (!ndev)
2532 return NULL;
2533
2534 supported_gids = roce_gid_type_mask_support(id_priv->id.device,
2535 id_priv->id.port_num);
2536 gid_type = cma_route_gid_type(addr->dev_addr.network,
2537 supported_gids,
2538 id_priv->gid_type);
2539 /* Use the hint from IP Stack to select GID Type */
2540 if (gid_type < ib_network_to_gid_type(addr->dev_addr.network))
2541 gid_type = ib_network_to_gid_type(addr->dev_addr.network);
2542 route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
2543
2544 sa_path_set_ndev(route->path_rec, addr->dev_addr.net);
2545 sa_path_set_ifindex(route->path_rec, ndev->ifindex);
2546 sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
2547 return ndev;
2548}
2549
2550int rdma_set_ib_path(struct rdma_cm_id *id,
2551 struct sa_path_rec *path_rec)
2454{ 2552{
2455 struct rdma_id_private *id_priv; 2553 struct rdma_id_private *id_priv;
2554 struct net_device *ndev;
2456 int ret; 2555 int ret;
2457 2556
2458 id_priv = container_of(id, struct rdma_id_private, id); 2557 id_priv = container_of(id, struct rdma_id_private, id);
@@ -2460,20 +2559,33 @@ int rdma_set_ib_paths(struct rdma_cm_id *id,
2460 RDMA_CM_ROUTE_RESOLVED)) 2559 RDMA_CM_ROUTE_RESOLVED))
2461 return -EINVAL; 2560 return -EINVAL;
2462 2561
2463 id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths, 2562 id->route.path_rec = kmemdup(path_rec, sizeof(*path_rec),
2464 GFP_KERNEL); 2563 GFP_KERNEL);
2465 if (!id->route.path_rec) { 2564 if (!id->route.path_rec) {
2466 ret = -ENOMEM; 2565 ret = -ENOMEM;
2467 goto err; 2566 goto err;
2468 } 2567 }
2469 2568
2470 id->route.num_paths = num_paths; 2569 if (rdma_protocol_roce(id->device, id->port_num)) {
2570 ndev = cma_iboe_set_path_rec_l2_fields(id_priv);
2571 if (!ndev) {
2572 ret = -ENODEV;
2573 goto err_free;
2574 }
2575 dev_put(ndev);
2576 }
2577
2578 id->route.num_paths = 1;
2471 return 0; 2579 return 0;
2580
2581err_free:
2582 kfree(id->route.path_rec);
2583 id->route.path_rec = NULL;
2472err: 2584err:
2473 cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); 2585 cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED);
2474 return ret; 2586 return ret;
2475} 2587}
2476EXPORT_SYMBOL(rdma_set_ib_paths); 2588EXPORT_SYMBOL(rdma_set_ib_path);
2477 2589
2478static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) 2590static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
2479{ 2591{
@@ -2483,11 +2595,7 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
2483 if (!work) 2595 if (!work)
2484 return -ENOMEM; 2596 return -ENOMEM;
2485 2597
2486 work->id = id_priv; 2598 cma_init_resolve_route_work(work, id_priv);
2487 INIT_WORK(&work->work, cma_work_handler);
2488 work->old_state = RDMA_CM_ROUTE_QUERY;
2489 work->new_state = RDMA_CM_ROUTE_RESOLVED;
2490 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
2491 queue_work(cma_wq, &work->work); 2599 queue_work(cma_wq, &work->work);
2492 return 0; 2600 return 0;
2493} 2601}
@@ -2510,26 +2618,14 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
2510 return 0; 2618 return 0;
2511} 2619}
2512 2620
2513static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
2514 unsigned long supported_gids,
2515 enum ib_gid_type default_gid)
2516{
2517 if ((network_type == RDMA_NETWORK_IPV4 ||
2518 network_type == RDMA_NETWORK_IPV6) &&
2519 test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
2520 return IB_GID_TYPE_ROCE_UDP_ENCAP;
2521
2522 return default_gid;
2523}
2524
2525static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) 2621static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
2526{ 2622{
2527 struct rdma_route *route = &id_priv->id.route; 2623 struct rdma_route *route = &id_priv->id.route;
2528 struct rdma_addr *addr = &route->addr; 2624 struct rdma_addr *addr = &route->addr;
2529 struct cma_work *work; 2625 struct cma_work *work;
2530 int ret; 2626 int ret;
2531 struct net_device *ndev = NULL; 2627 struct net_device *ndev;
2532 enum ib_gid_type gid_type = IB_GID_TYPE_IB; 2628
2533 u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num - 2629 u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
2534 rdma_start_port(id_priv->cma_dev->device)]; 2630 rdma_start_port(id_priv->cma_dev->device)];
2535 u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos; 2631 u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
@@ -2539,9 +2635,6 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
2539 if (!work) 2635 if (!work)
2540 return -ENOMEM; 2636 return -ENOMEM;
2541 2637
2542 work->id = id_priv;
2543 INIT_WORK(&work->work, cma_work_handler);
2544
2545 route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); 2638 route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
2546 if (!route->path_rec) { 2639 if (!route->path_rec) {
2547 ret = -ENOMEM; 2640 ret = -ENOMEM;
@@ -2550,42 +2643,17 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
2550 2643
2551 route->num_paths = 1; 2644 route->num_paths = 1;
2552 2645
2553 if (addr->dev_addr.bound_dev_if) { 2646 ndev = cma_iboe_set_path_rec_l2_fields(id_priv);
2554 unsigned long supported_gids;
2555
2556 ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
2557 if (!ndev) {
2558 ret = -ENODEV;
2559 goto err2;
2560 }
2561
2562 supported_gids = roce_gid_type_mask_support(id_priv->id.device,
2563 id_priv->id.port_num);
2564 gid_type = cma_route_gid_type(addr->dev_addr.network,
2565 supported_gids,
2566 id_priv->gid_type);
2567 route->path_rec->rec_type =
2568 sa_conv_gid_to_pathrec_type(gid_type);
2569 sa_path_set_ndev(route->path_rec, &init_net);
2570 sa_path_set_ifindex(route->path_rec, ndev->ifindex);
2571 }
2572 if (!ndev) { 2647 if (!ndev) {
2573 ret = -ENODEV; 2648 ret = -ENODEV;
2574 goto err2; 2649 goto err2;
2575 } 2650 }
2576 2651
2577 sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
2578
2579 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 2652 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
2580 &route->path_rec->sgid); 2653 &route->path_rec->sgid);
2581 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, 2654 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
2582 &route->path_rec->dgid); 2655 &route->path_rec->dgid);
2583 2656
2584 /* Use the hint from IP Stack to select GID Type */
2585 if (gid_type < ib_network_to_gid_type(addr->dev_addr.network))
2586 gid_type = ib_network_to_gid_type(addr->dev_addr.network);
2587 route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
2588
2589 if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB) 2657 if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
2590 /* TODO: get the hoplimit from the inet/inet6 device */ 2658 /* TODO: get the hoplimit from the inet/inet6 device */
2591 route->path_rec->hop_limit = addr->dev_addr.hoplimit; 2659 route->path_rec->hop_limit = addr->dev_addr.hoplimit;
@@ -2607,11 +2675,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
2607 goto err2; 2675 goto err2;
2608 } 2676 }
2609 2677
2610 work->old_state = RDMA_CM_ROUTE_QUERY; 2678 cma_init_resolve_route_work(work, id_priv);
2611 work->new_state = RDMA_CM_ROUTE_RESOLVED;
2612 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
2613 work->event.status = 0;
2614
2615 queue_work(cma_wq, &work->work); 2679 queue_work(cma_wq, &work->work);
2616 2680
2617 return 0; 2681 return 0;
@@ -2791,11 +2855,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
2791 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2855 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
2792 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); 2856 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
2793 2857
2794 work->id = id_priv; 2858 cma_init_resolve_addr_work(work, id_priv);
2795 INIT_WORK(&work->work, cma_work_handler);
2796 work->old_state = RDMA_CM_ADDR_QUERY;
2797 work->new_state = RDMA_CM_ADDR_RESOLVED;
2798 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2799 queue_work(cma_wq, &work->work); 2859 queue_work(cma_wq, &work->work);
2800 return 0; 2860 return 0;
2801err: 2861err:
@@ -2821,11 +2881,7 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
2821 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) 2881 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
2822 &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); 2882 &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
2823 2883
2824 work->id = id_priv; 2884 cma_init_resolve_addr_work(work, id_priv);
2825 INIT_WORK(&work->work, cma_work_handler);
2826 work->old_state = RDMA_CM_ADDR_QUERY;
2827 work->new_state = RDMA_CM_ADDR_RESOLVED;
2828 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2829 queue_work(cma_wq, &work->work); 2885 queue_work(cma_wq, &work->work);
2830 return 0; 2886 return 0;
2831err: 2887err:
@@ -3404,9 +3460,10 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
3404 event.status = ret; 3460 event.status = ret;
3405 break; 3461 break;
3406 } 3462 }
3407 ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num, 3463 ib_init_ah_attr_from_path(id_priv->id.device,
3408 id_priv->id.route.path_rec, 3464 id_priv->id.port_num,
3409 &event.param.ud.ah_attr); 3465 id_priv->id.route.path_rec,
3466 &event.param.ud.ah_attr);
3410 event.param.ud.qp_num = rep->qpn; 3467 event.param.ud.qp_num = rep->qpn;
3411 event.param.ud.qkey = rep->qkey; 3468 event.param.ud.qkey = rep->qkey;
3412 event.event = RDMA_CM_EVENT_ESTABLISHED; 3469 event.event = RDMA_CM_EVENT_ESTABLISHED;
@@ -3873,7 +3930,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
3873 struct rdma_dev_addr *dev_addr = 3930 struct rdma_dev_addr *dev_addr =
3874 &id_priv->id.route.addr.dev_addr; 3931 &id_priv->id.route.addr.dev_addr;
3875 struct net_device *ndev = 3932 struct net_device *ndev =
3876 dev_get_by_index(&init_net, dev_addr->bound_dev_if); 3933 dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
3877 enum ib_gid_type gid_type = 3934 enum ib_gid_type gid_type =
3878 id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 3935 id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
3879 rdma_start_port(id_priv->cma_dev->device)]; 3936 rdma_start_port(id_priv->cma_dev->device)];
@@ -4010,8 +4067,10 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
4010 } else if (addr->sa_family == AF_INET6) { 4067 } else if (addr->sa_family == AF_INET6) {
4011 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 4068 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
4012 } else { 4069 } else {
4013 mgid->raw[0] = (gid_type == IB_GID_TYPE_IB) ? 0xff : 0; 4070 mgid->raw[0] =
4014 mgid->raw[1] = (gid_type == IB_GID_TYPE_IB) ? 0x0e : 0; 4071 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0xff;
4072 mgid->raw[1] =
4073 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0x0e;
4015 mgid->raw[2] = 0; 4074 mgid->raw[2] = 0;
4016 mgid->raw[3] = 0; 4075 mgid->raw[3] = 0;
4017 mgid->raw[4] = 0; 4076 mgid->raw[4] = 0;
@@ -4061,7 +4120,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
4061 mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); 4120 mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
4062 4121
4063 if (dev_addr->bound_dev_if) 4122 if (dev_addr->bound_dev_if)
4064 ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); 4123 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
4065 if (!ndev) { 4124 if (!ndev) {
4066 err = -ENODEV; 4125 err = -ENODEV;
4067 goto out2; 4126 goto out2;
@@ -4179,7 +4238,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
4179 struct net_device *ndev = NULL; 4238 struct net_device *ndev = NULL;
4180 4239
4181 if (dev_addr->bound_dev_if) 4240 if (dev_addr->bound_dev_if)
4182 ndev = dev_get_by_index(&init_net, 4241 ndev = dev_get_by_index(dev_addr->net,
4183 dev_addr->bound_dev_if); 4242 dev_addr->bound_dev_if);
4184 if (ndev) { 4243 if (ndev) {
4185 cma_igmp_send(ndev, 4244 cma_igmp_send(ndev,
@@ -4235,7 +4294,7 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
4235 if (event != NETDEV_BONDING_FAILOVER) 4294 if (event != NETDEV_BONDING_FAILOVER)
4236 return NOTIFY_DONE; 4295 return NOTIFY_DONE;
4237 4296
4238 if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING)) 4297 if (!netif_is_bond_master(ndev))
4239 return NOTIFY_DONE; 4298 return NOTIFY_DONE;
4240 4299
4241 mutex_lock(&lock); 4300 mutex_lock(&lock);
@@ -4432,7 +4491,7 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
4432 RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) 4491 RDMA_NL_RDMA_CM_ATTR_SRC_ADDR))
4433 goto out; 4492 goto out;
4434 if (ibnl_put_attr(skb, nlh, 4493 if (ibnl_put_attr(skb, nlh,
4435 rdma_addr_size(cma_src_addr(id_priv)), 4494 rdma_addr_size(cma_dst_addr(id_priv)),
4436 cma_dst_addr(id_priv), 4495 cma_dst_addr(id_priv),
4437 RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) 4496 RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
4438 goto out; 4497 goto out;
@@ -4444,6 +4503,7 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
4444 id_stats->qp_type = id->qp_type; 4503 id_stats->qp_type = id->qp_type;
4445 4504
4446 i_id++; 4505 i_id++;
4506 nlmsg_end(skb, nlh);
4447 } 4507 }
4448 4508
4449 cb->args[1] = 0; 4509 cb->args[1] = 0;
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 31dfee0c8295..eee38b40be99 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -295,7 +295,7 @@ static struct config_group *make_cma_dev(struct config_group *group,
295 goto fail; 295 goto fail;
296 } 296 }
297 297
298 strncpy(cma_dev_group->name, name, sizeof(cma_dev_group->name)); 298 strlcpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
299 299
300 config_group_init_type_name(&cma_dev_group->ports_group, "ports", 300 config_group_init_type_name(&cma_dev_group->ports_group, "ports",
301 &cma_ports_group_type); 301 &cma_ports_group_type);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 66f0268f37a6..c4560d84dfae 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -40,8 +40,12 @@
40#include <rdma/ib_verbs.h> 40#include <rdma/ib_verbs.h>
41#include <rdma/opa_addr.h> 41#include <rdma/opa_addr.h>
42#include <rdma/ib_mad.h> 42#include <rdma/ib_mad.h>
43#include <rdma/restrack.h>
43#include "mad_priv.h" 44#include "mad_priv.h"
44 45
46/* Total number of ports combined across all struct ib_devices's */
47#define RDMA_MAX_PORTS 1024
48
45struct pkey_index_qp_list { 49struct pkey_index_qp_list {
46 struct list_head pkey_index_list; 50 struct list_head pkey_index_list;
47 u16 pkey_index; 51 u16 pkey_index;
@@ -137,7 +141,6 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
137int roce_gid_mgmt_init(void); 141int roce_gid_mgmt_init(void);
138void roce_gid_mgmt_cleanup(void); 142void roce_gid_mgmt_cleanup(void);
139 143
140int roce_rescan_device(struct ib_device *ib_dev);
141unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port); 144unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
142 145
143int ib_cache_setup_one(struct ib_device *device); 146int ib_cache_setup_one(struct ib_device *device);
@@ -191,13 +194,6 @@ void ib_sa_cleanup(void);
191int rdma_nl_init(void); 194int rdma_nl_init(void);
192void rdma_nl_exit(void); 195void rdma_nl_exit(void);
193 196
194/**
195 * Check if there are any listeners to the netlink group
196 * @group: the netlink group ID
197 * Returns 0 on success or a negative for no listeners.
198 */
199int ibnl_chk_listeners(unsigned int group);
200
201int ib_nl_handle_resolve_resp(struct sk_buff *skb, 197int ib_nl_handle_resolve_resp(struct sk_buff *skb,
202 struct nlmsghdr *nlh, 198 struct nlmsghdr *nlh,
203 struct netlink_ext_ack *extack); 199 struct netlink_ext_ack *extack);
@@ -213,11 +209,6 @@ int ib_get_cached_subnet_prefix(struct ib_device *device,
213 u64 *sn_pfx); 209 u64 *sn_pfx);
214 210
215#ifdef CONFIG_SECURITY_INFINIBAND 211#ifdef CONFIG_SECURITY_INFINIBAND
216int ib_security_pkey_access(struct ib_device *dev,
217 u8 port_num,
218 u16 pkey_index,
219 void *sec);
220
221void ib_security_destroy_port_pkey_list(struct ib_device *device); 212void ib_security_destroy_port_pkey_list(struct ib_device *device);
222 213
223void ib_security_cache_change(struct ib_device *device, 214void ib_security_cache_change(struct ib_device *device,
@@ -240,14 +231,6 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
240void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent); 231void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent);
241int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index); 232int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index);
242#else 233#else
243static inline int ib_security_pkey_access(struct ib_device *dev,
244 u8 port_num,
245 u16 pkey_index,
246 void *sec)
247{
248 return 0;
249}
250
251static inline void ib_security_destroy_port_pkey_list(struct ib_device *device) 234static inline void ib_security_destroy_port_pkey_list(struct ib_device *device)
252{ 235{
253} 236}
@@ -318,4 +301,31 @@ struct ib_device *ib_device_get_by_index(u32 ifindex);
318/* RDMA device netlink */ 301/* RDMA device netlink */
319void nldev_init(void); 302void nldev_init(void);
320void nldev_exit(void); 303void nldev_exit(void);
304
305static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
306 struct ib_pd *pd,
307 struct ib_qp_init_attr *attr,
308 struct ib_udata *udata)
309{
310 struct ib_qp *qp;
311
312 qp = dev->create_qp(pd, attr, udata);
313 if (IS_ERR(qp))
314 return qp;
315
316 qp->device = dev;
317 qp->pd = pd;
318 /*
319 * We don't track XRC QPs for now, because they don't have PD
320 * and more importantly they are created internaly by driver,
321 * see mlx5 create_dev_resources() as an example.
322 */
323 if (attr->qp_type < IB_QPT_XRC_INI) {
324 qp->res.type = RDMA_RESTRACK_QP;
325 rdma_restrack_add(&qp->res);
326 } else
327 qp->res.valid = false;
328
329 return qp;
330}
321#endif /* _CORE_PRIV_H */ 331#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index f2ae75fa3128..bc79ca8215d7 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -25,9 +25,10 @@
25#define IB_POLL_FLAGS \ 25#define IB_POLL_FLAGS \
26 (IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS) 26 (IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
27 27
28static int __ib_process_cq(struct ib_cq *cq, int budget) 28static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *poll_wc)
29{ 29{
30 int i, n, completed = 0; 30 int i, n, completed = 0;
31 struct ib_wc *wcs = poll_wc ? : cq->wc;
31 32
32 /* 33 /*
33 * budget might be (-1) if the caller does not 34 * budget might be (-1) if the caller does not
@@ -35,9 +36,9 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
35 * minimum here. 36 * minimum here.
36 */ 37 */
37 while ((n = ib_poll_cq(cq, min_t(u32, IB_POLL_BATCH, 38 while ((n = ib_poll_cq(cq, min_t(u32, IB_POLL_BATCH,
38 budget - completed), cq->wc)) > 0) { 39 budget - completed), wcs)) > 0) {
39 for (i = 0; i < n; i++) { 40 for (i = 0; i < n; i++) {
40 struct ib_wc *wc = &cq->wc[i]; 41 struct ib_wc *wc = &wcs[i];
41 42
42 if (wc->wr_cqe) 43 if (wc->wr_cqe)
43 wc->wr_cqe->done(cq, wc); 44 wc->wr_cqe->done(cq, wc);
@@ -60,18 +61,20 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
60 * @cq: CQ to process 61 * @cq: CQ to process
61 * @budget: number of CQEs to poll for 62 * @budget: number of CQEs to poll for
62 * 63 *
63 * This function is used to process all outstanding CQ entries on a 64 * This function is used to process all outstanding CQ entries.
64 * %IB_POLL_DIRECT CQ. It does not offload CQ processing to a different 65 * It does not offload CQ processing to a different context and does
65 * context and does not ask for completion interrupts from the HCA. 66 * not ask for completion interrupts from the HCA.
67 * Using direct processing on CQ with non IB_POLL_DIRECT type may trigger
68 * concurrent processing.
66 * 69 *
67 * Note: do not pass -1 as %budget unless it is guaranteed that the number 70 * Note: do not pass -1 as %budget unless it is guaranteed that the number
68 * of completions that will be processed is small. 71 * of completions that will be processed is small.
69 */ 72 */
70int ib_process_cq_direct(struct ib_cq *cq, int budget) 73int ib_process_cq_direct(struct ib_cq *cq, int budget)
71{ 74{
72 WARN_ON_ONCE(cq->poll_ctx != IB_POLL_DIRECT); 75 struct ib_wc wcs[IB_POLL_BATCH];
73 76
74 return __ib_process_cq(cq, budget); 77 return __ib_process_cq(cq, budget, wcs);
75} 78}
76EXPORT_SYMBOL(ib_process_cq_direct); 79EXPORT_SYMBOL(ib_process_cq_direct);
77 80
@@ -85,7 +88,7 @@ static int ib_poll_handler(struct irq_poll *iop, int budget)
85 struct ib_cq *cq = container_of(iop, struct ib_cq, iop); 88 struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
86 int completed; 89 int completed;
87 90
88 completed = __ib_process_cq(cq, budget); 91 completed = __ib_process_cq(cq, budget, NULL);
89 if (completed < budget) { 92 if (completed < budget) {
90 irq_poll_complete(&cq->iop); 93 irq_poll_complete(&cq->iop);
91 if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) 94 if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
@@ -105,7 +108,7 @@ static void ib_cq_poll_work(struct work_struct *work)
105 struct ib_cq *cq = container_of(work, struct ib_cq, work); 108 struct ib_cq *cq = container_of(work, struct ib_cq, work);
106 int completed; 109 int completed;
107 110
108 completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE); 111 completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE, NULL);
109 if (completed >= IB_POLL_BUDGET_WORKQUEUE || 112 if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
110 ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) 113 ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
111 queue_work(ib_comp_wq, &cq->work); 114 queue_work(ib_comp_wq, &cq->work);
@@ -117,20 +120,22 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
117} 120}
118 121
119/** 122/**
120 * ib_alloc_cq - allocate a completion queue 123 * __ib_alloc_cq - allocate a completion queue
121 * @dev: device to allocate the CQ for 124 * @dev: device to allocate the CQ for
122 * @private: driver private data, accessible from cq->cq_context 125 * @private: driver private data, accessible from cq->cq_context
123 * @nr_cqe: number of CQEs to allocate 126 * @nr_cqe: number of CQEs to allocate
124 * @comp_vector: HCA completion vectors for this CQ 127 * @comp_vector: HCA completion vectors for this CQ
125 * @poll_ctx: context to poll the CQ from. 128 * @poll_ctx: context to poll the CQ from.
129 * @caller: module owner name.
126 * 130 *
127 * This is the proper interface to allocate a CQ for in-kernel users. A 131 * This is the proper interface to allocate a CQ for in-kernel users. A
128 * CQ allocated with this interface will automatically be polled from the 132 * CQ allocated with this interface will automatically be polled from the
129 * specified context. The ULP must use wr->wr_cqe instead of wr->wr_id 133 * specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
130 * to use this CQ abstraction. 134 * to use this CQ abstraction.
131 */ 135 */
132struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, 136struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
133 int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx) 137 int nr_cqe, int comp_vector,
138 enum ib_poll_context poll_ctx, const char *caller)
134{ 139{
135 struct ib_cq_init_attr cq_attr = { 140 struct ib_cq_init_attr cq_attr = {
136 .cqe = nr_cqe, 141 .cqe = nr_cqe,
@@ -154,6 +159,10 @@ struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
154 if (!cq->wc) 159 if (!cq->wc)
155 goto out_destroy_cq; 160 goto out_destroy_cq;
156 161
162 cq->res.type = RDMA_RESTRACK_CQ;
163 cq->res.kern_name = caller;
164 rdma_restrack_add(&cq->res);
165
157 switch (cq->poll_ctx) { 166 switch (cq->poll_ctx) {
158 case IB_POLL_DIRECT: 167 case IB_POLL_DIRECT:
159 cq->comp_handler = ib_cq_completion_direct; 168 cq->comp_handler = ib_cq_completion_direct;
@@ -178,11 +187,12 @@ struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
178 187
179out_free_wc: 188out_free_wc:
180 kfree(cq->wc); 189 kfree(cq->wc);
190 rdma_restrack_del(&cq->res);
181out_destroy_cq: 191out_destroy_cq:
182 cq->device->destroy_cq(cq); 192 cq->device->destroy_cq(cq);
183 return ERR_PTR(ret); 193 return ERR_PTR(ret);
184} 194}
185EXPORT_SYMBOL(ib_alloc_cq); 195EXPORT_SYMBOL(__ib_alloc_cq);
186 196
187/** 197/**
188 * ib_free_cq - free a completion queue 198 * ib_free_cq - free a completion queue
@@ -209,6 +219,7 @@ void ib_free_cq(struct ib_cq *cq)
209 } 219 }
210 220
211 kfree(cq->wc); 221 kfree(cq->wc);
222 rdma_restrack_del(&cq->res);
212 ret = cq->device->destroy_cq(cq); 223 ret = cq->device->destroy_cq(cq);
213 WARN_ON_ONCE(ret); 224 WARN_ON_ONCE(ret);
214} 225}
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 465520627e4b..e8010e73a1cf 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -263,6 +263,8 @@ struct ib_device *ib_alloc_device(size_t size)
263 if (!device) 263 if (!device)
264 return NULL; 264 return NULL;
265 265
266 rdma_restrack_init(&device->res);
267
266 device->dev.class = &ib_class; 268 device->dev.class = &ib_class;
267 device_initialize(&device->dev); 269 device_initialize(&device->dev);
268 270
@@ -288,7 +290,7 @@ void ib_dealloc_device(struct ib_device *device)
288{ 290{
289 WARN_ON(device->reg_state != IB_DEV_UNREGISTERED && 291 WARN_ON(device->reg_state != IB_DEV_UNREGISTERED &&
290 device->reg_state != IB_DEV_UNINITIALIZED); 292 device->reg_state != IB_DEV_UNINITIALIZED);
291 kobject_put(&device->dev.kobj); 293 put_device(&device->dev);
292} 294}
293EXPORT_SYMBOL(ib_dealloc_device); 295EXPORT_SYMBOL(ib_dealloc_device);
294 296
@@ -462,7 +464,6 @@ int ib_register_device(struct ib_device *device,
462 struct ib_udata uhw = {.outlen = 0, .inlen = 0}; 464 struct ib_udata uhw = {.outlen = 0, .inlen = 0};
463 struct device *parent = device->dev.parent; 465 struct device *parent = device->dev.parent;
464 466
465 WARN_ON_ONCE(!parent);
466 WARN_ON_ONCE(device->dma_device); 467 WARN_ON_ONCE(device->dma_device);
467 if (device->dev.dma_ops) { 468 if (device->dev.dma_ops) {
468 /* 469 /*
@@ -471,16 +472,25 @@ int ib_register_device(struct ib_device *device,
471 * into device->dev. 472 * into device->dev.
472 */ 473 */
473 device->dma_device = &device->dev; 474 device->dma_device = &device->dev;
474 if (!device->dev.dma_mask) 475 if (!device->dev.dma_mask) {
475 device->dev.dma_mask = parent->dma_mask; 476 if (parent)
476 if (!device->dev.coherent_dma_mask) 477 device->dev.dma_mask = parent->dma_mask;
477 device->dev.coherent_dma_mask = 478 else
478 parent->coherent_dma_mask; 479 WARN_ON_ONCE(true);
480 }
481 if (!device->dev.coherent_dma_mask) {
482 if (parent)
483 device->dev.coherent_dma_mask =
484 parent->coherent_dma_mask;
485 else
486 WARN_ON_ONCE(true);
487 }
479 } else { 488 } else {
480 /* 489 /*
481 * The caller did not provide custom DMA operations. Use the 490 * The caller did not provide custom DMA operations. Use the
482 * DMA mapping operations of the parent device. 491 * DMA mapping operations of the parent device.
483 */ 492 */
493 WARN_ON_ONCE(!parent);
484 device->dma_device = parent; 494 device->dma_device = parent;
485 } 495 }
486 496
@@ -588,6 +598,8 @@ void ib_unregister_device(struct ib_device *device)
588 } 598 }
589 up_read(&lists_rwsem); 599 up_read(&lists_rwsem);
590 600
601 rdma_restrack_clean(&device->res);
602
591 ib_device_unregister_rdmacg(device); 603 ib_device_unregister_rdmacg(device);
592 ib_device_unregister_sysfs(device); 604 ib_device_unregister_sysfs(device);
593 605
@@ -1033,32 +1045,22 @@ EXPORT_SYMBOL(ib_modify_port);
1033 1045
1034/** 1046/**
1035 * ib_find_gid - Returns the port number and GID table index where 1047 * ib_find_gid - Returns the port number and GID table index where
1036 * a specified GID value occurs. 1048 * a specified GID value occurs. Its searches only for IB link layer.
1037 * @device: The device to query. 1049 * @device: The device to query.
1038 * @gid: The GID value to search for. 1050 * @gid: The GID value to search for.
1039 * @gid_type: Type of GID.
1040 * @ndev: The ndev related to the GID to search for. 1051 * @ndev: The ndev related to the GID to search for.
1041 * @port_num: The port number of the device where the GID value was found. 1052 * @port_num: The port number of the device where the GID value was found.
1042 * @index: The index into the GID table where the GID was found. This 1053 * @index: The index into the GID table where the GID was found. This
1043 * parameter may be NULL. 1054 * parameter may be NULL.
1044 */ 1055 */
1045int ib_find_gid(struct ib_device *device, union ib_gid *gid, 1056int ib_find_gid(struct ib_device *device, union ib_gid *gid,
1046 enum ib_gid_type gid_type, struct net_device *ndev, 1057 struct net_device *ndev, u8 *port_num, u16 *index)
1047 u8 *port_num, u16 *index)
1048{ 1058{
1049 union ib_gid tmp_gid; 1059 union ib_gid tmp_gid;
1050 int ret, port, i; 1060 int ret, port, i;
1051 1061
1052 for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) { 1062 for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
1053 if (rdma_cap_roce_gid_table(device, port)) { 1063 if (rdma_cap_roce_gid_table(device, port))
1054 if (!ib_find_cached_gid_by_port(device, gid, gid_type, port,
1055 ndev, index)) {
1056 *port_num = port;
1057 return 0;
1058 }
1059 }
1060
1061 if (gid_type != IB_GID_TYPE_IB)
1062 continue; 1064 continue;
1063 1065
1064 for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { 1066 for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 84d2615b5d4b..a0a9ed719031 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -388,13 +388,11 @@ int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
388EXPORT_SYMBOL(ib_flush_fmr_pool); 388EXPORT_SYMBOL(ib_flush_fmr_pool);
389 389
390/** 390/**
391 * ib_fmr_pool_map_phys - 391 * ib_fmr_pool_map_phys - Map an FMR from an FMR pool.
392 * @pool:FMR pool to allocate FMR from 392 * @pool_handle: FMR pool to allocate FMR from
393 * @page_list:List of pages to map 393 * @page_list: List of pages to map
394 * @list_len:Number of pages in @page_list 394 * @list_len: Number of pages in @page_list
395 * @io_virtual_address:I/O virtual address for new FMR 395 * @io_virtual_address: I/O virtual address for new FMR
396 *
397 * Map an FMR from an FMR pool.
398 */ 396 */
399struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, 397struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
400 u64 *page_list, 398 u64 *page_list,
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 3c4faadb8cdd..81528f64061a 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -654,6 +654,7 @@ int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid)
654 } 654 }
655 skb_num++; 655 skb_num++;
656 spin_lock_irqsave(&iwpm_mapinfo_lock, flags); 656 spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
657 ret = -EINVAL;
657 for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) { 658 for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) {
658 hlist_for_each_entry(map_info, &iwpm_hash_bucket[i], 659 hlist_for_each_entry(map_info, &iwpm_hash_bucket[i],
659 hlist_node) { 660 hlist_node) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index cb91245e9163..c50596f7f98a 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -49,7 +49,6 @@
49#include "smi.h" 49#include "smi.h"
50#include "opa_smi.h" 50#include "opa_smi.h"
51#include "agent.h" 51#include "agent.h"
52#include "core_priv.h"
53 52
54static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; 53static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
55static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; 54static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 1fb72c356e36..3ccaae18ad75 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -41,8 +41,6 @@
41#include <linux/module.h> 41#include <linux/module.h>
42#include "core_priv.h" 42#include "core_priv.h"
43 43
44#include "core_priv.h"
45
46static DEFINE_MUTEX(rdma_nl_mutex); 44static DEFINE_MUTEX(rdma_nl_mutex);
47static struct sock *nls; 45static struct sock *nls;
48static struct { 46static struct {
@@ -83,15 +81,13 @@ static bool is_nl_valid(unsigned int type, unsigned int op)
83 if (!is_nl_msg_valid(type, op)) 81 if (!is_nl_msg_valid(type, op))
84 return false; 82 return false;
85 83
86 cb_table = rdma_nl_types[type].cb_table; 84 if (!rdma_nl_types[type].cb_table) {
87#ifdef CONFIG_MODULES
88 if (!cb_table) {
89 mutex_unlock(&rdma_nl_mutex); 85 mutex_unlock(&rdma_nl_mutex);
90 request_module("rdma-netlink-subsys-%d", type); 86 request_module("rdma-netlink-subsys-%d", type);
91 mutex_lock(&rdma_nl_mutex); 87 mutex_lock(&rdma_nl_mutex);
92 cb_table = rdma_nl_types[type].cb_table;
93 } 88 }
94#endif 89
90 cb_table = rdma_nl_types[type].cb_table;
95 91
96 if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit)) 92 if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
97 return false; 93 return false;
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 0dcd1aa6f683..fa8655e3b3ed 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -31,6 +31,8 @@
31 */ 31 */
32 32
33#include <linux/module.h> 33#include <linux/module.h>
34#include <linux/pid.h>
35#include <linux/pid_namespace.h>
34#include <net/netlink.h> 36#include <net/netlink.h>
35#include <rdma/rdma_netlink.h> 37#include <rdma/rdma_netlink.h>
36 38
@@ -52,16 +54,42 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
52 [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 }, 54 [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 },
53 [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 }, 55 [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
54 [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 }, 56 [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
57 [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED },
58 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED },
59 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING,
60 .len = 16 },
61 [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 },
62 [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
63 [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
64 [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 },
65 [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
66 [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
67 [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 },
68 [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
69 [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 },
70 [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 },
71 [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
72 [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
73 .len = TASK_COMM_LEN },
55}; 74};
56 75
57static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) 76static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
58{ 77{
59 char fw[IB_FW_VERSION_NAME_MAX];
60
61 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index)) 78 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
62 return -EMSGSIZE; 79 return -EMSGSIZE;
63 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name)) 80 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
64 return -EMSGSIZE; 81 return -EMSGSIZE;
82
83 return 0;
84}
85
86static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
87{
88 char fw[IB_FW_VERSION_NAME_MAX];
89
90 if (fill_nldev_handle(msg, device))
91 return -EMSGSIZE;
92
65 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device))) 93 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
66 return -EMSGSIZE; 94 return -EMSGSIZE;
67 95
@@ -92,10 +120,9 @@ static int fill_port_info(struct sk_buff *msg,
92 struct ib_port_attr attr; 120 struct ib_port_attr attr;
93 int ret; 121 int ret;
94 122
95 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index)) 123 if (fill_nldev_handle(msg, device))
96 return -EMSGSIZE;
97 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name))
98 return -EMSGSIZE; 124 return -EMSGSIZE;
125
99 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) 126 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
100 return -EMSGSIZE; 127 return -EMSGSIZE;
101 128
@@ -126,6 +153,137 @@ static int fill_port_info(struct sk_buff *msg,
126 return 0; 153 return 0;
127} 154}
128 155
156static int fill_res_info_entry(struct sk_buff *msg,
157 const char *name, u64 curr)
158{
159 struct nlattr *entry_attr;
160
161 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
162 if (!entry_attr)
163 return -EMSGSIZE;
164
165 if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
166 goto err;
167 if (nla_put_u64_64bit(msg,
168 RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, 0))
169 goto err;
170
171 nla_nest_end(msg, entry_attr);
172 return 0;
173
174err:
175 nla_nest_cancel(msg, entry_attr);
176 return -EMSGSIZE;
177}
178
179static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
180{
181 static const char * const names[RDMA_RESTRACK_MAX] = {
182 [RDMA_RESTRACK_PD] = "pd",
183 [RDMA_RESTRACK_CQ] = "cq",
184 [RDMA_RESTRACK_QP] = "qp",
185 };
186
187 struct rdma_restrack_root *res = &device->res;
188 struct nlattr *table_attr;
189 int ret, i, curr;
190
191 if (fill_nldev_handle(msg, device))
192 return -EMSGSIZE;
193
194 table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
195 if (!table_attr)
196 return -EMSGSIZE;
197
198 for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
199 if (!names[i])
200 continue;
201 curr = rdma_restrack_count(res, i, task_active_pid_ns(current));
202 ret = fill_res_info_entry(msg, names[i], curr);
203 if (ret)
204 goto err;
205 }
206
207 nla_nest_end(msg, table_attr);
208 return 0;
209
210err:
211 nla_nest_cancel(msg, table_attr);
212 return ret;
213}
214
215static int fill_res_qp_entry(struct sk_buff *msg,
216 struct ib_qp *qp, uint32_t port)
217{
218 struct rdma_restrack_entry *res = &qp->res;
219 struct ib_qp_init_attr qp_init_attr;
220 struct nlattr *entry_attr;
221 struct ib_qp_attr qp_attr;
222 int ret;
223
224 ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
225 if (ret)
226 return ret;
227
228 if (port && port != qp_attr.port_num)
229 return 0;
230
231 entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
232 if (!entry_attr)
233 goto out;
234
235 /* In create_qp() port is not set yet */
236 if (qp_attr.port_num &&
237 nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
238 goto err;
239
240 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
241 goto err;
242 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
243 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
244 qp_attr.dest_qp_num))
245 goto err;
246 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
247 qp_attr.rq_psn))
248 goto err;
249 }
250
251 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
252 goto err;
253
254 if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
255 qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
256 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
257 qp_attr.path_mig_state))
258 goto err;
259 }
260 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
261 goto err;
262 if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
263 goto err;
264
265 /*
266 * Existence of task means that it is user QP and netlink
267 * user is invited to go and read /proc/PID/comm to get name
268 * of the task file and res->task_com should be NULL.
269 */
270 if (rdma_is_kernel_res(res)) {
271 if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, res->kern_name))
272 goto err;
273 } else {
274 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, task_pid_vnr(res->task)))
275 goto err;
276 }
277
278 nla_nest_end(msg, entry_attr);
279 return 0;
280
281err:
282 nla_nest_cancel(msg, entry_attr);
283out:
284 return -EMSGSIZE;
285}
286
129static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, 287static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
130 struct netlink_ext_ack *extack) 288 struct netlink_ext_ack *extack)
131{ 289{
@@ -321,6 +479,213 @@ out:
321 return skb->len; 479 return skb->len;
322} 480}
323 481
482static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
483 struct netlink_ext_ack *extack)
484{
485 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
486 struct ib_device *device;
487 struct sk_buff *msg;
488 u32 index;
489 int ret;
490
491 ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
492 nldev_policy, extack);
493 if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
494 return -EINVAL;
495
496 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
497 device = ib_device_get_by_index(index);
498 if (!device)
499 return -EINVAL;
500
501 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
502 if (!msg)
503 goto err;
504
505 nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
506 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
507 0, 0);
508
509 ret = fill_res_info(msg, device);
510 if (ret)
511 goto err_free;
512
513 nlmsg_end(msg, nlh);
514 put_device(&device->dev);
515 return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
516
517err_free:
518 nlmsg_free(msg);
519err:
520 put_device(&device->dev);
521 return ret;
522}
523
524static int _nldev_res_get_dumpit(struct ib_device *device,
525 struct sk_buff *skb,
526 struct netlink_callback *cb,
527 unsigned int idx)
528{
529 int start = cb->args[0];
530 struct nlmsghdr *nlh;
531
532 if (idx < start)
533 return 0;
534
535 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
536 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
537 0, NLM_F_MULTI);
538
539 if (fill_res_info(skb, device)) {
540 nlmsg_cancel(skb, nlh);
541 goto out;
542 }
543
544 nlmsg_end(skb, nlh);
545
546 idx++;
547
548out:
549 cb->args[0] = idx;
550 return skb->len;
551}
552
553static int nldev_res_get_dumpit(struct sk_buff *skb,
554 struct netlink_callback *cb)
555{
556 return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
557}
558
559static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
560 struct netlink_callback *cb)
561{
562 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
563 struct rdma_restrack_entry *res;
564 int err, ret = 0, idx = 0;
565 struct nlattr *table_attr;
566 struct ib_device *device;
567 int start = cb->args[0];
568 struct ib_qp *qp = NULL;
569 struct nlmsghdr *nlh;
570 u32 index, port = 0;
571
572 err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
573 nldev_policy, NULL);
574 /*
575 * Right now, we are expecting the device index to get QP information,
576 * but it is possible to extend this code to return all devices in
577 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
578 * if it doesn't exist, we will iterate over all devices.
579 *
580 * But it is not needed for now.
581 */
582 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
583 return -EINVAL;
584
585 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
586 device = ib_device_get_by_index(index);
587 if (!device)
588 return -EINVAL;
589
590 /*
591 * If no PORT_INDEX is supplied, we will return all QPs from that device
592 */
593 if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
594 port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
595 if (!rdma_is_port_valid(device, port)) {
596 ret = -EINVAL;
597 goto err_index;
598 }
599 }
600
601 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
602 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_QP_GET),
603 0, NLM_F_MULTI);
604
605 if (fill_nldev_handle(skb, device)) {
606 ret = -EMSGSIZE;
607 goto err;
608 }
609
610 table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_QP);
611 if (!table_attr) {
612 ret = -EMSGSIZE;
613 goto err;
614 }
615
616 down_read(&device->res.rwsem);
617 hash_for_each_possible(device->res.hash, res, node, RDMA_RESTRACK_QP) {
618 if (idx < start)
619 goto next;
620
621 if ((rdma_is_kernel_res(res) &&
622 task_active_pid_ns(current) != &init_pid_ns) ||
623 (!rdma_is_kernel_res(res) &&
624 task_active_pid_ns(current) != task_active_pid_ns(res->task)))
625 /*
626 * 1. Kernel QPs should be visible in init namspace only
627 * 2. Present only QPs visible in the current namespace
628 */
629 goto next;
630
631 if (!rdma_restrack_get(res))
632 /*
633 * Resource is under release now, but we are not
634 * relesing lock now, so it will be released in
635 * our next pass, once we will get ->next pointer.
636 */
637 goto next;
638
639 qp = container_of(res, struct ib_qp, res);
640
641 up_read(&device->res.rwsem);
642 ret = fill_res_qp_entry(skb, qp, port);
643 down_read(&device->res.rwsem);
644 /*
645 * Return resource back, but it won't be released till
646 * the &device->res.rwsem will be released for write.
647 */
648 rdma_restrack_put(res);
649
650 if (ret == -EMSGSIZE)
651 /*
652 * There is a chance to optimize here.
653 * It can be done by using list_prepare_entry
654 * and list_for_each_entry_continue afterwards.
655 */
656 break;
657 if (ret)
658 goto res_err;
659next: idx++;
660 }
661 up_read(&device->res.rwsem);
662
663 nla_nest_end(skb, table_attr);
664 nlmsg_end(skb, nlh);
665 cb->args[0] = idx;
666
667 /*
668 * No more QPs to fill, cancel the message and
669 * return 0 to mark end of dumpit.
670 */
671 if (!qp)
672 goto err;
673
674 put_device(&device->dev);
675 return skb->len;
676
677res_err:
678 nla_nest_cancel(skb, table_attr);
679 up_read(&device->res.rwsem);
680
681err:
682 nlmsg_cancel(skb, nlh);
683
684err_index:
685 put_device(&device->dev);
686 return ret;
687}
688
324static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { 689static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
325 [RDMA_NLDEV_CMD_GET] = { 690 [RDMA_NLDEV_CMD_GET] = {
326 .doit = nldev_get_doit, 691 .doit = nldev_get_doit,
@@ -330,6 +695,23 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
330 .doit = nldev_port_get_doit, 695 .doit = nldev_port_get_doit,
331 .dump = nldev_port_get_dumpit, 696 .dump = nldev_port_get_dumpit,
332 }, 697 },
698 [RDMA_NLDEV_CMD_RES_GET] = {
699 .doit = nldev_res_get_doit,
700 .dump = nldev_res_get_dumpit,
701 },
702 [RDMA_NLDEV_CMD_RES_QP_GET] = {
703 .dump = nldev_res_get_qp_dumpit,
704 /*
705 * .doit is not implemented yet for two reasons:
706 * 1. It is not needed yet.
707 * 2. There is a need to provide identifier, while it is easy
708 * for the QPs (device index + port index + LQPN), it is not
709 * the case for the rest of resources (PD and CQ). Because it
710 * is better to provide similar interface for all resources,
711 * let's wait till we will have other resources implemented
712 * too.
713 */
714 },
333}; 715};
334 716
335void __init nldev_init(void) 717void __init nldev_init(void)
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
new file mode 100644
index 000000000000..857637bf46da
--- /dev/null
+++ b/drivers/infiniband/core/restrack.c
@@ -0,0 +1,164 @@
1/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
2/*
3 * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
4 */
5
6#include <rdma/ib_verbs.h>
7#include <rdma/restrack.h>
8#include <linux/mutex.h>
9#include <linux/sched/task.h>
10#include <linux/uaccess.h>
11#include <linux/pid_namespace.h>
12
13void rdma_restrack_init(struct rdma_restrack_root *res)
14{
15 init_rwsem(&res->rwsem);
16}
17
18void rdma_restrack_clean(struct rdma_restrack_root *res)
19{
20 WARN_ON_ONCE(!hash_empty(res->hash));
21}
22
23int rdma_restrack_count(struct rdma_restrack_root *res,
24 enum rdma_restrack_type type,
25 struct pid_namespace *ns)
26{
27 struct rdma_restrack_entry *e;
28 u32 cnt = 0;
29
30 down_read(&res->rwsem);
31 hash_for_each_possible(res->hash, e, node, type) {
32 if (ns == &init_pid_ns ||
33 (!rdma_is_kernel_res(e) &&
34 ns == task_active_pid_ns(e->task)))
35 cnt++;
36 }
37 up_read(&res->rwsem);
38 return cnt;
39}
40EXPORT_SYMBOL(rdma_restrack_count);
41
42static void set_kern_name(struct rdma_restrack_entry *res)
43{
44 enum rdma_restrack_type type = res->type;
45 struct ib_qp *qp;
46
47 if (type != RDMA_RESTRACK_QP)
48 /* PD and CQ types already have this name embedded in */
49 return;
50
51 qp = container_of(res, struct ib_qp, res);
52 if (!qp->pd) {
53 WARN_ONCE(true, "XRC QPs are not supported\n");
54 /* Survive, despite the programmer's error */
55 res->kern_name = " ";
56 return;
57 }
58
59 res->kern_name = qp->pd->res.kern_name;
60}
61
62static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
63{
64 enum rdma_restrack_type type = res->type;
65 struct ib_device *dev;
66 struct ib_xrcd *xrcd;
67 struct ib_pd *pd;
68 struct ib_cq *cq;
69 struct ib_qp *qp;
70
71 switch (type) {
72 case RDMA_RESTRACK_PD:
73 pd = container_of(res, struct ib_pd, res);
74 dev = pd->device;
75 break;
76 case RDMA_RESTRACK_CQ:
77 cq = container_of(res, struct ib_cq, res);
78 dev = cq->device;
79 break;
80 case RDMA_RESTRACK_QP:
81 qp = container_of(res, struct ib_qp, res);
82 dev = qp->device;
83 break;
84 case RDMA_RESTRACK_XRCD:
85 xrcd = container_of(res, struct ib_xrcd, res);
86 dev = xrcd->device;
87 break;
88 default:
89 WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
90 return NULL;
91 }
92
93 return dev;
94}
95
96void rdma_restrack_add(struct rdma_restrack_entry *res)
97{
98 struct ib_device *dev = res_to_dev(res);
99
100 if (!dev)
101 return;
102
103 if (!uaccess_kernel()) {
104 get_task_struct(current);
105 res->task = current;
106 res->kern_name = NULL;
107 } else {
108 set_kern_name(res);
109 res->task = NULL;
110 }
111
112 kref_init(&res->kref);
113 init_completion(&res->comp);
114 res->valid = true;
115
116 down_write(&dev->res.rwsem);
117 hash_add(dev->res.hash, &res->node, res->type);
118 up_write(&dev->res.rwsem);
119}
120EXPORT_SYMBOL(rdma_restrack_add);
121
122int __must_check rdma_restrack_get(struct rdma_restrack_entry *res)
123{
124 return kref_get_unless_zero(&res->kref);
125}
126EXPORT_SYMBOL(rdma_restrack_get);
127
128static void restrack_release(struct kref *kref)
129{
130 struct rdma_restrack_entry *res;
131
132 res = container_of(kref, struct rdma_restrack_entry, kref);
133 complete(&res->comp);
134}
135
136int rdma_restrack_put(struct rdma_restrack_entry *res)
137{
138 return kref_put(&res->kref, restrack_release);
139}
140EXPORT_SYMBOL(rdma_restrack_put);
141
142void rdma_restrack_del(struct rdma_restrack_entry *res)
143{
144 struct ib_device *dev;
145
146 if (!res->valid)
147 return;
148
149 dev = res_to_dev(res);
150 if (!dev)
151 return;
152
153 rdma_restrack_put(res);
154
155 wait_for_completion(&res->comp);
156
157 down_write(&dev->res.rwsem);
158 hash_del(&res->node);
159 res->valid = false;
160 if (res->task)
161 put_task_struct(res->task);
162 up_write(&dev->res.rwsem);
163}
164EXPORT_SYMBOL(rdma_restrack_del);
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 90e3889b7fbe..5a52ec77940a 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -410,15 +410,18 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
410 rtnl_unlock(); 410 rtnl_unlock();
411} 411}
412 412
413/* This function will rescan all of the network devices in the system 413/**
414 * and add their gids, as needed, to the relevant RoCE devices. */ 414 * rdma_roce_rescan_device - Rescan all of the network devices in the system
415int roce_rescan_device(struct ib_device *ib_dev) 415 * and add their gids, as needed, to the relevant RoCE devices.
416 *
417 * @device: the rdma device
418 */
419void rdma_roce_rescan_device(struct ib_device *ib_dev)
416{ 420{
417 ib_enum_roce_netdev(ib_dev, pass_all_filter, NULL, 421 ib_enum_roce_netdev(ib_dev, pass_all_filter, NULL,
418 enum_all_gids_of_dev_cb, NULL); 422 enum_all_gids_of_dev_cb, NULL);
419
420 return 0;
421} 423}
424EXPORT_SYMBOL(rdma_roce_rescan_device);
422 425
423static void callback_for_addr_gid_device_scan(struct ib_device *device, 426static void callback_for_addr_gid_device_scan(struct ib_device *device,
424 u8 port, 427 u8 port,
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index ab5e1024fea9..8cf15d4a8ac4 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1227,9 +1227,9 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
1227 return src_path_mask; 1227 return src_path_mask;
1228} 1228}
1229 1229
1230int ib_init_ah_from_path(struct ib_device *device, u8 port_num, 1230int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
1231 struct sa_path_rec *rec, 1231 struct sa_path_rec *rec,
1232 struct rdma_ah_attr *ah_attr) 1232 struct rdma_ah_attr *ah_attr)
1233{ 1233{
1234 int ret; 1234 int ret;
1235 u16 gid_index; 1235 u16 gid_index;
@@ -1341,10 +1341,11 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
1341 1341
1342 return 0; 1342 return 0;
1343} 1343}
1344EXPORT_SYMBOL(ib_init_ah_from_path); 1344EXPORT_SYMBOL(ib_init_ah_attr_from_path);
1345 1345
1346static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask) 1346static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
1347{ 1347{
1348 struct rdma_ah_attr ah_attr;
1348 unsigned long flags; 1349 unsigned long flags;
1349 1350
1350 spin_lock_irqsave(&query->port->ah_lock, flags); 1351 spin_lock_irqsave(&query->port->ah_lock, flags);
@@ -1356,6 +1357,15 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
1356 query->sm_ah = query->port->sm_ah; 1357 query->sm_ah = query->port->sm_ah;
1357 spin_unlock_irqrestore(&query->port->ah_lock, flags); 1358 spin_unlock_irqrestore(&query->port->ah_lock, flags);
1358 1359
1360 /*
1361 * Always check if sm_ah has valid dlid assigned,
1362 * before querying for class port info
1363 */
1364 if ((rdma_query_ah(query->sm_ah->ah, &ah_attr) < 0) ||
1365 !rdma_is_valid_unicast_lid(&ah_attr)) {
1366 kref_put(&query->sm_ah->ref, free_sm_ah);
1367 return -EAGAIN;
1368 }
1359 query->mad_buf = ib_create_send_mad(query->port->agent, 1, 1369 query->mad_buf = ib_create_send_mad(query->port->agent, 1,
1360 query->sm_ah->pkey_index, 1370 query->sm_ah->pkey_index,
1361 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA, 1371 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 59b2f96d986a..b61dda6b04fc 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -653,12 +653,11 @@ int ib_security_modify_qp(struct ib_qp *qp,
653 } 653 }
654 return ret; 654 return ret;
655} 655}
656EXPORT_SYMBOL(ib_security_modify_qp);
657 656
658int ib_security_pkey_access(struct ib_device *dev, 657static int ib_security_pkey_access(struct ib_device *dev,
659 u8 port_num, 658 u8 port_num,
660 u16 pkey_index, 659 u16 pkey_index,
661 void *sec) 660 void *sec)
662{ 661{
663 u64 subnet_prefix; 662 u64 subnet_prefix;
664 u16 pkey; 663 u16 pkey;
@@ -678,7 +677,6 @@ int ib_security_pkey_access(struct ib_device *dev,
678 677
679 return security_ib_pkey_access(sec, subnet_prefix, pkey); 678 return security_ib_pkey_access(sec, subnet_prefix, pkey);
680} 679}
681EXPORT_SYMBOL(ib_security_pkey_access);
682 680
683static int ib_mad_agent_security_change(struct notifier_block *nb, 681static int ib_mad_agent_security_change(struct notifier_block *nb,
684 unsigned long event, 682 unsigned long event,
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index e30d86fa1855..8ae1308eecc7 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -1276,7 +1276,6 @@ int ib_device_register_sysfs(struct ib_device *device,
1276 int ret; 1276 int ret;
1277 int i; 1277 int i;
1278 1278
1279 WARN_ON_ONCE(!device->dev.parent);
1280 ret = dev_set_name(class_dev, "%s", device->name); 1279 ret = dev_set_name(class_dev, "%s", device->name);
1281 if (ret) 1280 if (ret)
1282 return ret; 1281 return ret;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index f7adae0adc19..8ae636bb09e5 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -53,6 +53,8 @@
53#include <rdma/ib_user_cm.h> 53#include <rdma/ib_user_cm.h>
54#include <rdma/ib_marshall.h> 54#include <rdma/ib_marshall.h>
55 55
56#include "core_priv.h"
57
56MODULE_AUTHOR("Libor Michalek"); 58MODULE_AUTHOR("Libor Michalek");
57MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); 59MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access");
58MODULE_LICENSE("Dual BSD/GPL"); 60MODULE_LICENSE("Dual BSD/GPL");
@@ -104,10 +106,13 @@ struct ib_ucm_event {
104enum { 106enum {
105 IB_UCM_MAJOR = 231, 107 IB_UCM_MAJOR = 231,
106 IB_UCM_BASE_MINOR = 224, 108 IB_UCM_BASE_MINOR = 224,
107 IB_UCM_MAX_DEVICES = 32 109 IB_UCM_MAX_DEVICES = RDMA_MAX_PORTS,
110 IB_UCM_NUM_FIXED_MINOR = 32,
111 IB_UCM_NUM_DYNAMIC_MINOR = IB_UCM_MAX_DEVICES - IB_UCM_NUM_FIXED_MINOR,
108}; 112};
109 113
110#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) 114#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
115static dev_t dynamic_ucm_dev;
111 116
112static void ib_ucm_add_one(struct ib_device *device); 117static void ib_ucm_add_one(struct ib_device *device);
113static void ib_ucm_remove_one(struct ib_device *device, void *client_data); 118static void ib_ucm_remove_one(struct ib_device *device, void *client_data);
@@ -1199,7 +1204,6 @@ static int ib_ucm_close(struct inode *inode, struct file *filp)
1199 return 0; 1204 return 0;
1200} 1205}
1201 1206
1202static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES);
1203static void ib_ucm_release_dev(struct device *dev) 1207static void ib_ucm_release_dev(struct device *dev)
1204{ 1208{
1205 struct ib_ucm_device *ucm_dev; 1209 struct ib_ucm_device *ucm_dev;
@@ -1210,10 +1214,7 @@ static void ib_ucm_release_dev(struct device *dev)
1210 1214
1211static void ib_ucm_free_dev(struct ib_ucm_device *ucm_dev) 1215static void ib_ucm_free_dev(struct ib_ucm_device *ucm_dev)
1212{ 1216{
1213 if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) 1217 clear_bit(ucm_dev->devnum, dev_map);
1214 clear_bit(ucm_dev->devnum, dev_map);
1215 else
1216 clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map);
1217} 1218}
1218 1219
1219static const struct file_operations ucm_fops = { 1220static const struct file_operations ucm_fops = {
@@ -1235,27 +1236,6 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
1235} 1236}
1236static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 1237static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
1237 1238
1238static dev_t overflow_maj;
1239static int find_overflow_devnum(void)
1240{
1241 int ret;
1242
1243 if (!overflow_maj) {
1244 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES,
1245 "infiniband_cm");
1246 if (ret) {
1247 pr_err("ucm: couldn't register dynamic device number\n");
1248 return ret;
1249 }
1250 }
1251
1252 ret = find_first_zero_bit(overflow_map, IB_UCM_MAX_DEVICES);
1253 if (ret >= IB_UCM_MAX_DEVICES)
1254 return -1;
1255
1256 return ret;
1257}
1258
1259static void ib_ucm_add_one(struct ib_device *device) 1239static void ib_ucm_add_one(struct ib_device *device)
1260{ 1240{
1261 int devnum; 1241 int devnum;
@@ -1274,19 +1254,14 @@ static void ib_ucm_add_one(struct ib_device *device)
1274 ucm_dev->dev.release = ib_ucm_release_dev; 1254 ucm_dev->dev.release = ib_ucm_release_dev;
1275 1255
1276 devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); 1256 devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
1277 if (devnum >= IB_UCM_MAX_DEVICES) { 1257 if (devnum >= IB_UCM_MAX_DEVICES)
1278 devnum = find_overflow_devnum(); 1258 goto err;
1279 if (devnum < 0) 1259 ucm_dev->devnum = devnum;
1280 goto err; 1260 set_bit(devnum, dev_map);
1281 1261 if (devnum >= IB_UCM_NUM_FIXED_MINOR)
1282 ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES; 1262 base = dynamic_ucm_dev + devnum - IB_UCM_NUM_FIXED_MINOR;
1283 base = devnum + overflow_maj; 1263 else
1284 set_bit(devnum, overflow_map); 1264 base = IB_UCM_BASE_DEV + devnum;
1285 } else {
1286 ucm_dev->devnum = devnum;
1287 base = devnum + IB_UCM_BASE_DEV;
1288 set_bit(devnum, dev_map);
1289 }
1290 1265
1291 cdev_init(&ucm_dev->cdev, &ucm_fops); 1266 cdev_init(&ucm_dev->cdev, &ucm_fops);
1292 ucm_dev->cdev.owner = THIS_MODULE; 1267 ucm_dev->cdev.owner = THIS_MODULE;
@@ -1334,13 +1309,20 @@ static int __init ib_ucm_init(void)
1334{ 1309{
1335 int ret; 1310 int ret;
1336 1311
1337 ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES, 1312 ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR,
1338 "infiniband_cm"); 1313 "infiniband_cm");
1339 if (ret) { 1314 if (ret) {
1340 pr_err("ucm: couldn't register device number\n"); 1315 pr_err("ucm: couldn't register device number\n");
1341 goto error1; 1316 goto error1;
1342 } 1317 }
1343 1318
1319 ret = alloc_chrdev_region(&dynamic_ucm_dev, 0, IB_UCM_NUM_DYNAMIC_MINOR,
1320 "infiniband_cm");
1321 if (ret) {
1322 pr_err("ucm: couldn't register dynamic device number\n");
1323 goto err_alloc;
1324 }
1325
1344 ret = class_create_file(&cm_class, &class_attr_abi_version.attr); 1326 ret = class_create_file(&cm_class, &class_attr_abi_version.attr);
1345 if (ret) { 1327 if (ret) {
1346 pr_err("ucm: couldn't create abi_version attribute\n"); 1328 pr_err("ucm: couldn't create abi_version attribute\n");
@@ -1357,7 +1339,9 @@ static int __init ib_ucm_init(void)
1357error3: 1339error3:
1358 class_remove_file(&cm_class, &class_attr_abi_version.attr); 1340 class_remove_file(&cm_class, &class_attr_abi_version.attr);
1359error2: 1341error2:
1360 unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); 1342 unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR);
1343err_alloc:
1344 unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR);
1361error1: 1345error1:
1362 return ret; 1346 return ret;
1363} 1347}
@@ -1366,9 +1350,8 @@ static void __exit ib_ucm_cleanup(void)
1366{ 1350{
1367 ib_unregister_client(&ucm_client); 1351 ib_unregister_client(&ucm_client);
1368 class_remove_file(&cm_class, &class_attr_abi_version.attr); 1352 class_remove_file(&cm_class, &class_attr_abi_version.attr);
1369 unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); 1353 unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR);
1370 if (overflow_maj) 1354 unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR);
1371 unregister_chrdev_region(overflow_maj, IB_UCM_MAX_DEVICES);
1372 idr_destroy(&ctx_id_table); 1355 idr_destroy(&ctx_id_table);
1373} 1356}
1374 1357
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index e4be89d1f3d8..6ba4231f2b07 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -904,13 +904,14 @@ static ssize_t ucma_query_path(struct ucma_context *ctx,
904 904
905 resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY | 905 resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY |
906 IB_PATH_BIDIRECTIONAL; 906 IB_PATH_BIDIRECTIONAL;
907 if (rec->rec_type == SA_PATH_REC_TYPE_IB) { 907 if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
908 ib_sa_pack_path(rec, &resp->path_data[i].path_rec);
909 } else {
910 struct sa_path_rec ib; 908 struct sa_path_rec ib;
911 909
912 sa_convert_path_opa_to_ib(&ib, rec); 910 sa_convert_path_opa_to_ib(&ib, rec);
913 ib_sa_pack_path(&ib, &resp->path_data[i].path_rec); 911 ib_sa_pack_path(&ib, &resp->path_data[i].path_rec);
912
913 } else {
914 ib_sa_pack_path(rec, &resp->path_data[i].path_rec);
914 } 915 }
915 } 916 }
916 917
@@ -943,8 +944,8 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
943 } else { 944 } else {
944 addr->sib_family = AF_IB; 945 addr->sib_family = AF_IB;
945 addr->sib_pkey = (__force __be16) resp.pkey; 946 addr->sib_pkey = (__force __be16) resp.pkey;
946 rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr, 947 rdma_read_gids(ctx->cm_id, (union ib_gid *)&addr->sib_addr,
947 (union ib_gid *) &addr->sib_addr); 948 NULL);
948 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) 949 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
949 &ctx->cm_id->route.addr.src_addr); 950 &ctx->cm_id->route.addr.src_addr);
950 } 951 }
@@ -956,8 +957,8 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
956 } else { 957 } else {
957 addr->sib_family = AF_IB; 958 addr->sib_family = AF_IB;
958 addr->sib_pkey = (__force __be16) resp.pkey; 959 addr->sib_pkey = (__force __be16) resp.pkey;
959 rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr, 960 rdma_read_gids(ctx->cm_id, NULL,
960 (union ib_gid *) &addr->sib_addr); 961 (union ib_gid *)&addr->sib_addr);
961 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) 962 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
962 &ctx->cm_id->route.addr.dst_addr); 963 &ctx->cm_id->route.addr.dst_addr);
963 } 964 }
@@ -1231,9 +1232,9 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
1231 struct sa_path_rec opa; 1232 struct sa_path_rec opa;
1232 1233
1233 sa_convert_path_ib_to_opa(&opa, &sa_path); 1234 sa_convert_path_ib_to_opa(&opa, &sa_path);
1234 ret = rdma_set_ib_paths(ctx->cm_id, &opa, 1); 1235 ret = rdma_set_ib_path(ctx->cm_id, &opa);
1235 } else { 1236 } else {
1236 ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1); 1237 ret = rdma_set_ib_path(ctx->cm_id, &sa_path);
1237 } 1238 }
1238 if (ret) 1239 if (ret)
1239 return ret; 1240 return ret;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 130606c3b07c..9a4e899d94b3 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -352,7 +352,7 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
352 return -EINVAL; 352 return -EINVAL;
353 } 353 }
354 354
355 ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->nmap, dst, length, 355 ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->npages, dst, length,
356 offset + ib_umem_offset(umem)); 356 offset + ib_umem_offset(umem));
357 357
358 if (ret < 0) 358 if (ret < 0)
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 0c32d10f23ff..78c77962422e 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -55,16 +55,21 @@
55#include <rdma/ib_mad.h> 55#include <rdma/ib_mad.h>
56#include <rdma/ib_user_mad.h> 56#include <rdma/ib_user_mad.h>
57 57
58#include "core_priv.h"
59
58MODULE_AUTHOR("Roland Dreier"); 60MODULE_AUTHOR("Roland Dreier");
59MODULE_DESCRIPTION("InfiniBand userspace MAD packet access"); 61MODULE_DESCRIPTION("InfiniBand userspace MAD packet access");
60MODULE_LICENSE("Dual BSD/GPL"); 62MODULE_LICENSE("Dual BSD/GPL");
61 63
62enum { 64enum {
63 IB_UMAD_MAX_PORTS = 64, 65 IB_UMAD_MAX_PORTS = RDMA_MAX_PORTS,
64 IB_UMAD_MAX_AGENTS = 32, 66 IB_UMAD_MAX_AGENTS = 32,
65 67
66 IB_UMAD_MAJOR = 231, 68 IB_UMAD_MAJOR = 231,
67 IB_UMAD_MINOR_BASE = 0 69 IB_UMAD_MINOR_BASE = 0,
70 IB_UMAD_NUM_FIXED_MINOR = 64,
71 IB_UMAD_NUM_DYNAMIC_MINOR = IB_UMAD_MAX_PORTS - IB_UMAD_NUM_FIXED_MINOR,
72 IB_ISSM_MINOR_BASE = IB_UMAD_NUM_FIXED_MINOR,
68}; 73};
69 74
70/* 75/*
@@ -127,9 +132,12 @@ struct ib_umad_packet {
127 132
128static struct class *umad_class; 133static struct class *umad_class;
129 134
130static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); 135static const dev_t base_umad_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
136static const dev_t base_issm_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE) +
137 IB_UMAD_NUM_FIXED_MINOR;
138static dev_t dynamic_umad_dev;
139static dev_t dynamic_issm_dev;
131 140
132static DEFINE_SPINLOCK(port_lock);
133static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); 141static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
134 142
135static void ib_umad_add_one(struct ib_device *device); 143static void ib_umad_add_one(struct ib_device *device);
@@ -233,8 +241,7 @@ static void recv_handler(struct ib_mad_agent *agent,
233 * On OPA devices it is okay to lose the upper 16 bits of LID as this 241 * On OPA devices it is okay to lose the upper 16 bits of LID as this
234 * information is obtained elsewhere. Mask off the upper 16 bits. 242 * information is obtained elsewhere. Mask off the upper 16 bits.
235 */ 243 */
236 if (agent->device->port_immutable[agent->port_num].core_cap_flags & 244 if (rdma_cap_opa_mad(agent->device, agent->port_num))
237 RDMA_CORE_PORT_INTEL_OPA)
238 packet->mad.hdr.lid = ib_lid_be16(0xFFFF & 245 packet->mad.hdr.lid = ib_lid_be16(0xFFFF &
239 mad_recv_wc->wc->slid); 246 mad_recv_wc->wc->slid);
240 else 247 else
@@ -246,10 +253,14 @@ static void recv_handler(struct ib_mad_agent *agent,
246 if (packet->mad.hdr.grh_present) { 253 if (packet->mad.hdr.grh_present) {
247 struct rdma_ah_attr ah_attr; 254 struct rdma_ah_attr ah_attr;
248 const struct ib_global_route *grh; 255 const struct ib_global_route *grh;
256 int ret;
249 257
250 ib_init_ah_from_wc(agent->device, agent->port_num, 258 ret = ib_init_ah_attr_from_wc(agent->device, agent->port_num,
251 mad_recv_wc->wc, mad_recv_wc->recv_buf.grh, 259 mad_recv_wc->wc,
252 &ah_attr); 260 mad_recv_wc->recv_buf.grh,
261 &ah_attr);
262 if (ret)
263 goto err2;
253 264
254 grh = rdma_ah_read_grh(&ah_attr); 265 grh = rdma_ah_read_grh(&ah_attr);
255 packet->mad.hdr.gid_index = grh->sgid_index; 266 packet->mad.hdr.gid_index = grh->sgid_index;
@@ -500,7 +511,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
500 } 511 }
501 512
502 memset(&ah_attr, 0, sizeof ah_attr); 513 memset(&ah_attr, 0, sizeof ah_attr);
503 ah_attr.type = rdma_ah_find_type(file->port->ib_dev, 514 ah_attr.type = rdma_ah_find_type(agent->device,
504 file->port->port_num); 515 file->port->port_num);
505 rdma_ah_set_dlid(&ah_attr, be16_to_cpu(packet->mad.hdr.lid)); 516 rdma_ah_set_dlid(&ah_attr, be16_to_cpu(packet->mad.hdr.lid));
506 rdma_ah_set_sl(&ah_attr, packet->mad.hdr.sl); 517 rdma_ah_set_sl(&ah_attr, packet->mad.hdr.sl);
@@ -1139,54 +1150,26 @@ static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
1139static CLASS_ATTR_STRING(abi_version, S_IRUGO, 1150static CLASS_ATTR_STRING(abi_version, S_IRUGO,
1140 __stringify(IB_USER_MAD_ABI_VERSION)); 1151 __stringify(IB_USER_MAD_ABI_VERSION));
1141 1152
1142static dev_t overflow_maj;
1143static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS);
1144static int find_overflow_devnum(struct ib_device *device)
1145{
1146 int ret;
1147
1148 if (!overflow_maj) {
1149 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2,
1150 "infiniband_mad");
1151 if (ret) {
1152 dev_err(&device->dev,
1153 "couldn't register dynamic device number\n");
1154 return ret;
1155 }
1156 }
1157
1158 ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS);
1159 if (ret >= IB_UMAD_MAX_PORTS)
1160 return -1;
1161
1162 return ret;
1163}
1164
1165static int ib_umad_init_port(struct ib_device *device, int port_num, 1153static int ib_umad_init_port(struct ib_device *device, int port_num,
1166 struct ib_umad_device *umad_dev, 1154 struct ib_umad_device *umad_dev,
1167 struct ib_umad_port *port) 1155 struct ib_umad_port *port)
1168{ 1156{
1169 int devnum; 1157 int devnum;
1170 dev_t base; 1158 dev_t base_umad;
1159 dev_t base_issm;
1171 1160
1172 spin_lock(&port_lock);
1173 devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); 1161 devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
1174 if (devnum >= IB_UMAD_MAX_PORTS) { 1162 if (devnum >= IB_UMAD_MAX_PORTS)
1175 spin_unlock(&port_lock); 1163 return -1;
1176 devnum = find_overflow_devnum(device); 1164 port->dev_num = devnum;
1177 if (devnum < 0) 1165 set_bit(devnum, dev_map);
1178 return -1; 1166 if (devnum >= IB_UMAD_NUM_FIXED_MINOR) {
1179 1167 base_umad = dynamic_umad_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
1180 spin_lock(&port_lock); 1168 base_issm = dynamic_issm_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
1181 port->dev_num = devnum + IB_UMAD_MAX_PORTS;
1182 base = devnum + overflow_maj;
1183 set_bit(devnum, overflow_map);
1184 } else { 1169 } else {
1185 port->dev_num = devnum; 1170 base_umad = devnum + base_umad_dev;
1186 base = devnum + base_dev; 1171 base_issm = devnum + base_issm_dev;
1187 set_bit(devnum, dev_map);
1188 } 1172 }
1189 spin_unlock(&port_lock);
1190 1173
1191 port->ib_dev = device; 1174 port->ib_dev = device;
1192 port->port_num = port_num; 1175 port->port_num = port_num;
@@ -1198,7 +1181,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
1198 port->cdev.owner = THIS_MODULE; 1181 port->cdev.owner = THIS_MODULE;
1199 cdev_set_parent(&port->cdev, &umad_dev->kobj); 1182 cdev_set_parent(&port->cdev, &umad_dev->kobj);
1200 kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num); 1183 kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
1201 if (cdev_add(&port->cdev, base, 1)) 1184 if (cdev_add(&port->cdev, base_umad, 1))
1202 goto err_cdev; 1185 goto err_cdev;
1203 1186
1204 port->dev = device_create(umad_class, device->dev.parent, 1187 port->dev = device_create(umad_class, device->dev.parent,
@@ -1212,12 +1195,11 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
1212 if (device_create_file(port->dev, &dev_attr_port)) 1195 if (device_create_file(port->dev, &dev_attr_port))
1213 goto err_dev; 1196 goto err_dev;
1214 1197
1215 base += IB_UMAD_MAX_PORTS;
1216 cdev_init(&port->sm_cdev, &umad_sm_fops); 1198 cdev_init(&port->sm_cdev, &umad_sm_fops);
1217 port->sm_cdev.owner = THIS_MODULE; 1199 port->sm_cdev.owner = THIS_MODULE;
1218 cdev_set_parent(&port->sm_cdev, &umad_dev->kobj); 1200 cdev_set_parent(&port->sm_cdev, &umad_dev->kobj);
1219 kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num); 1201 kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
1220 if (cdev_add(&port->sm_cdev, base, 1)) 1202 if (cdev_add(&port->sm_cdev, base_issm, 1))
1221 goto err_sm_cdev; 1203 goto err_sm_cdev;
1222 1204
1223 port->sm_dev = device_create(umad_class, device->dev.parent, 1205 port->sm_dev = device_create(umad_class, device->dev.parent,
@@ -1244,10 +1226,7 @@ err_dev:
1244 1226
1245err_cdev: 1227err_cdev:
1246 cdev_del(&port->cdev); 1228 cdev_del(&port->cdev);
1247 if (port->dev_num < IB_UMAD_MAX_PORTS) 1229 clear_bit(devnum, dev_map);
1248 clear_bit(devnum, dev_map);
1249 else
1250 clear_bit(devnum, overflow_map);
1251 1230
1252 return -1; 1231 return -1;
1253} 1232}
@@ -1281,11 +1260,7 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
1281 } 1260 }
1282 1261
1283 mutex_unlock(&port->file_mutex); 1262 mutex_unlock(&port->file_mutex);
1284 1263 clear_bit(port->dev_num, dev_map);
1285 if (port->dev_num < IB_UMAD_MAX_PORTS)
1286 clear_bit(port->dev_num, dev_map);
1287 else
1288 clear_bit(port->dev_num - IB_UMAD_MAX_PORTS, overflow_map);
1289} 1264}
1290 1265
1291static void ib_umad_add_one(struct ib_device *device) 1266static void ib_umad_add_one(struct ib_device *device)
@@ -1361,13 +1336,23 @@ static int __init ib_umad_init(void)
1361{ 1336{
1362 int ret; 1337 int ret;
1363 1338
1364 ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2, 1339 ret = register_chrdev_region(base_umad_dev,
1340 IB_UMAD_NUM_FIXED_MINOR * 2,
1365 "infiniband_mad"); 1341 "infiniband_mad");
1366 if (ret) { 1342 if (ret) {
1367 pr_err("couldn't register device number\n"); 1343 pr_err("couldn't register device number\n");
1368 goto out; 1344 goto out;
1369 } 1345 }
1370 1346
1347 ret = alloc_chrdev_region(&dynamic_umad_dev, 0,
1348 IB_UMAD_NUM_DYNAMIC_MINOR * 2,
1349 "infiniband_mad");
1350 if (ret) {
1351 pr_err("couldn't register dynamic device number\n");
1352 goto out_alloc;
1353 }
1354 dynamic_issm_dev = dynamic_umad_dev + IB_UMAD_NUM_DYNAMIC_MINOR;
1355
1371 umad_class = class_create(THIS_MODULE, "infiniband_mad"); 1356 umad_class = class_create(THIS_MODULE, "infiniband_mad");
1372 if (IS_ERR(umad_class)) { 1357 if (IS_ERR(umad_class)) {
1373 ret = PTR_ERR(umad_class); 1358 ret = PTR_ERR(umad_class);
@@ -1395,7 +1380,12 @@ out_class:
1395 class_destroy(umad_class); 1380 class_destroy(umad_class);
1396 1381
1397out_chrdev: 1382out_chrdev:
1398 unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); 1383 unregister_chrdev_region(dynamic_umad_dev,
1384 IB_UMAD_NUM_DYNAMIC_MINOR * 2);
1385
1386out_alloc:
1387 unregister_chrdev_region(base_umad_dev,
1388 IB_UMAD_NUM_FIXED_MINOR * 2);
1399 1389
1400out: 1390out:
1401 return ret; 1391 return ret;
@@ -1405,9 +1395,10 @@ static void __exit ib_umad_cleanup(void)
1405{ 1395{
1406 ib_unregister_client(&umad_client); 1396 ib_unregister_client(&umad_client);
1407 class_destroy(umad_class); 1397 class_destroy(umad_class);
1408 unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); 1398 unregister_chrdev_region(base_umad_dev,
1409 if (overflow_maj) 1399 IB_UMAD_NUM_FIXED_MINOR * 2);
1410 unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2); 1400 unregister_chrdev_region(dynamic_umad_dev,
1401 IB_UMAD_NUM_DYNAMIC_MINOR * 2);
1411} 1402}
1412 1403
1413module_init(ib_umad_init); 1404module_init(ib_umad_init);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 840b24096690..256934d1f64f 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -340,6 +340,8 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
340 uobj->object = pd; 340 uobj->object = pd;
341 memset(&resp, 0, sizeof resp); 341 memset(&resp, 0, sizeof resp);
342 resp.pd_handle = uobj->id; 342 resp.pd_handle = uobj->id;
343 pd->res.type = RDMA_RESTRACK_PD;
344 rdma_restrack_add(&pd->res);
343 345
344 if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { 346 if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
345 ret = -EFAULT; 347 ret = -EFAULT;
@@ -1033,6 +1035,8 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
1033 goto err_cb; 1035 goto err_cb;
1034 1036
1035 uobj_alloc_commit(&obj->uobject); 1037 uobj_alloc_commit(&obj->uobject);
1038 cq->res.type = RDMA_RESTRACK_CQ;
1039 rdma_restrack_add(&cq->res);
1036 1040
1037 return obj; 1041 return obj;
1038 1042
@@ -1145,10 +1149,7 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
1145 min(ucore->inlen, sizeof(cmd)), 1149 min(ucore->inlen, sizeof(cmd)),
1146 ib_uverbs_ex_create_cq_cb, NULL); 1150 ib_uverbs_ex_create_cq_cb, NULL);
1147 1151
1148 if (IS_ERR(obj)) 1152 return PTR_ERR_OR_ZERO(obj);
1149 return PTR_ERR(obj);
1150
1151 return 0;
1152} 1153}
1153 1154
1154ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, 1155ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
@@ -1199,7 +1200,7 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
1199 tmp.opcode = wc->opcode; 1200 tmp.opcode = wc->opcode;
1200 tmp.vendor_err = wc->vendor_err; 1201 tmp.vendor_err = wc->vendor_err;
1201 tmp.byte_len = wc->byte_len; 1202 tmp.byte_len = wc->byte_len;
1202 tmp.ex.imm_data = (__u32 __force) wc->ex.imm_data; 1203 tmp.ex.imm_data = wc->ex.imm_data;
1203 tmp.qp_num = wc->qp->qp_num; 1204 tmp.qp_num = wc->qp->qp_num;
1204 tmp.src_qp = wc->src_qp; 1205 tmp.src_qp = wc->src_qp;
1205 tmp.wc_flags = wc->wc_flags; 1206 tmp.wc_flags = wc->wc_flags;
@@ -1517,7 +1518,7 @@ static int create_qp(struct ib_uverbs_file *file,
1517 if (cmd->qp_type == IB_QPT_XRC_TGT) 1518 if (cmd->qp_type == IB_QPT_XRC_TGT)
1518 qp = ib_create_qp(pd, &attr); 1519 qp = ib_create_qp(pd, &attr);
1519 else 1520 else
1520 qp = device->create_qp(pd, &attr, uhw); 1521 qp = _ib_create_qp(device, pd, &attr, uhw);
1521 1522
1522 if (IS_ERR(qp)) { 1523 if (IS_ERR(qp)) {
1523 ret = PTR_ERR(qp); 1524 ret = PTR_ERR(qp);
@@ -1530,7 +1531,6 @@ static int create_qp(struct ib_uverbs_file *file,
1530 goto err_cb; 1531 goto err_cb;
1531 1532
1532 qp->real_qp = qp; 1533 qp->real_qp = qp;
1533 qp->device = device;
1534 qp->pd = pd; 1534 qp->pd = pd;
1535 qp->send_cq = attr.send_cq; 1535 qp->send_cq = attr.send_cq;
1536 qp->recv_cq = attr.recv_cq; 1536 qp->recv_cq = attr.recv_cq;
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 71ff2644e053..d96dc1d17be1 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -243,16 +243,13 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
243 size_t ctx_size; 243 size_t ctx_size;
244 uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)]; 244 uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)];
245 245
246 if (hdr->reserved)
247 return -EINVAL;
248
249 object_spec = uverbs_get_object(ib_dev, hdr->object_id); 246 object_spec = uverbs_get_object(ib_dev, hdr->object_id);
250 if (!object_spec) 247 if (!object_spec)
251 return -EOPNOTSUPP; 248 return -EPROTONOSUPPORT;
252 249
253 method_spec = uverbs_get_method(object_spec, hdr->method_id); 250 method_spec = uverbs_get_method(object_spec, hdr->method_id);
254 if (!method_spec) 251 if (!method_spec)
255 return -EOPNOTSUPP; 252 return -EPROTONOSUPPORT;
256 253
257 if ((method_spec->flags & UVERBS_ACTION_FLAG_CREATE_ROOT) ^ !file->ucontext) 254 if ((method_spec->flags & UVERBS_ACTION_FLAG_CREATE_ROOT) ^ !file->ucontext)
258 return -EINVAL; 255 return -EINVAL;
@@ -305,6 +302,16 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
305 302
306 err = uverbs_handle_method(buf, ctx->uattrs, hdr->num_attrs, ib_dev, 303 err = uverbs_handle_method(buf, ctx->uattrs, hdr->num_attrs, ib_dev,
307 file, method_spec, ctx->uverbs_attr_bundle); 304 file, method_spec, ctx->uverbs_attr_bundle);
305
306 /*
307 * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can
308 * not invoke the method because the request is not supported. No
309 * other cases should return this code.
310 */
311 if (unlikely(err == -EPROTONOSUPPORT)) {
312 WARN_ON_ONCE(err == -EPROTONOSUPPORT);
313 err = -EINVAL;
314 }
308out: 315out:
309 if (ctx != (void *)data) 316 if (ctx != (void *)data)
310 kfree(ctx); 317 kfree(ctx);
@@ -341,7 +348,7 @@ long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
341 } 348 }
342 349
343 if (hdr.reserved) { 350 if (hdr.reserved) {
344 err = -EOPNOTSUPP; 351 err = -EPROTONOSUPPORT;
345 goto out; 352 goto out;
346 } 353 }
347 354
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 5f216ffb465a..5b811bf574d6 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -62,14 +62,16 @@ MODULE_LICENSE("Dual BSD/GPL");
62enum { 62enum {
63 IB_UVERBS_MAJOR = 231, 63 IB_UVERBS_MAJOR = 231,
64 IB_UVERBS_BASE_MINOR = 192, 64 IB_UVERBS_BASE_MINOR = 192,
65 IB_UVERBS_MAX_DEVICES = 32 65 IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS,
66 IB_UVERBS_NUM_FIXED_MINOR = 32,
67 IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR,
66}; 68};
67 69
68#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) 70#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
69 71
72static dev_t dynamic_uverbs_dev;
70static struct class *uverbs_class; 73static struct class *uverbs_class;
71 74
72static DEFINE_SPINLOCK(map_lock);
73static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); 75static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
74 76
75static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, 77static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
@@ -1005,34 +1007,6 @@ static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
1005static CLASS_ATTR_STRING(abi_version, S_IRUGO, 1007static CLASS_ATTR_STRING(abi_version, S_IRUGO,
1006 __stringify(IB_USER_VERBS_ABI_VERSION)); 1008 __stringify(IB_USER_VERBS_ABI_VERSION));
1007 1009
1008static dev_t overflow_maj;
1009static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);
1010
1011/*
1012 * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
1013 * requesting a new major number and doubling the number of max devices we
1014 * support. It's stupid, but simple.
1015 */
1016static int find_overflow_devnum(void)
1017{
1018 int ret;
1019
1020 if (!overflow_maj) {
1021 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
1022 "infiniband_verbs");
1023 if (ret) {
1024 pr_err("user_verbs: couldn't register dynamic device number\n");
1025 return ret;
1026 }
1027 }
1028
1029 ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
1030 if (ret >= IB_UVERBS_MAX_DEVICES)
1031 return -1;
1032
1033 return ret;
1034}
1035
1036static void ib_uverbs_add_one(struct ib_device *device) 1010static void ib_uverbs_add_one(struct ib_device *device)
1037{ 1011{
1038 int devnum; 1012 int devnum;
@@ -1062,24 +1036,15 @@ static void ib_uverbs_add_one(struct ib_device *device)
1062 INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); 1036 INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
1063 INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list); 1037 INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
1064 1038
1065 spin_lock(&map_lock);
1066 devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); 1039 devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
1067 if (devnum >= IB_UVERBS_MAX_DEVICES) { 1040 if (devnum >= IB_UVERBS_MAX_DEVICES)
1068 spin_unlock(&map_lock); 1041 goto err;
1069 devnum = find_overflow_devnum(); 1042 uverbs_dev->devnum = devnum;
1070 if (devnum < 0) 1043 set_bit(devnum, dev_map);
1071 goto err; 1044 if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
1072 1045 base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
1073 spin_lock(&map_lock); 1046 else
1074 uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES; 1047 base = IB_UVERBS_BASE_DEV + devnum;
1075 base = devnum + overflow_maj;
1076 set_bit(devnum, overflow_map);
1077 } else {
1078 uverbs_dev->devnum = devnum;
1079 base = devnum + IB_UVERBS_BASE_DEV;
1080 set_bit(devnum, dev_map);
1081 }
1082 spin_unlock(&map_lock);
1083 1048
1084 rcu_assign_pointer(uverbs_dev->ib_dev, device); 1049 rcu_assign_pointer(uverbs_dev->ib_dev, device);
1085 uverbs_dev->num_comp_vectors = device->num_comp_vectors; 1050 uverbs_dev->num_comp_vectors = device->num_comp_vectors;
@@ -1124,10 +1089,7 @@ err_class:
1124 1089
1125err_cdev: 1090err_cdev:
1126 cdev_del(&uverbs_dev->cdev); 1091 cdev_del(&uverbs_dev->cdev);
1127 if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) 1092 clear_bit(devnum, dev_map);
1128 clear_bit(devnum, dev_map);
1129 else
1130 clear_bit(devnum, overflow_map);
1131 1093
1132err: 1094err:
1133 if (atomic_dec_and_test(&uverbs_dev->refcount)) 1095 if (atomic_dec_and_test(&uverbs_dev->refcount))
@@ -1219,11 +1181,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
1219 dev_set_drvdata(uverbs_dev->dev, NULL); 1181 dev_set_drvdata(uverbs_dev->dev, NULL);
1220 device_destroy(uverbs_class, uverbs_dev->cdev.dev); 1182 device_destroy(uverbs_class, uverbs_dev->cdev.dev);
1221 cdev_del(&uverbs_dev->cdev); 1183 cdev_del(&uverbs_dev->cdev);
1222 1184 clear_bit(uverbs_dev->devnum, dev_map);
1223 if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
1224 clear_bit(uverbs_dev->devnum, dev_map);
1225 else
1226 clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
1227 1185
1228 if (device->disassociate_ucontext) { 1186 if (device->disassociate_ucontext) {
1229 /* We disassociate HW resources and immediately return. 1187 /* We disassociate HW resources and immediately return.
@@ -1265,13 +1223,22 @@ static int __init ib_uverbs_init(void)
1265{ 1223{
1266 int ret; 1224 int ret;
1267 1225
1268 ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES, 1226 ret = register_chrdev_region(IB_UVERBS_BASE_DEV,
1227 IB_UVERBS_NUM_FIXED_MINOR,
1269 "infiniband_verbs"); 1228 "infiniband_verbs");
1270 if (ret) { 1229 if (ret) {
1271 pr_err("user_verbs: couldn't register device number\n"); 1230 pr_err("user_verbs: couldn't register device number\n");
1272 goto out; 1231 goto out;
1273 } 1232 }
1274 1233
1234 ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0,
1235 IB_UVERBS_NUM_DYNAMIC_MINOR,
1236 "infiniband_verbs");
1237 if (ret) {
1238 pr_err("couldn't register dynamic device number\n");
1239 goto out_alloc;
1240 }
1241
1275 uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); 1242 uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
1276 if (IS_ERR(uverbs_class)) { 1243 if (IS_ERR(uverbs_class)) {
1277 ret = PTR_ERR(uverbs_class); 1244 ret = PTR_ERR(uverbs_class);
@@ -1299,7 +1266,12 @@ out_class:
1299 class_destroy(uverbs_class); 1266 class_destroy(uverbs_class);
1300 1267
1301out_chrdev: 1268out_chrdev:
1302 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); 1269 unregister_chrdev_region(dynamic_uverbs_dev,
1270 IB_UVERBS_NUM_DYNAMIC_MINOR);
1271
1272out_alloc:
1273 unregister_chrdev_region(IB_UVERBS_BASE_DEV,
1274 IB_UVERBS_NUM_FIXED_MINOR);
1303 1275
1304out: 1276out:
1305 return ret; 1277 return ret;
@@ -1309,9 +1281,10 @@ static void __exit ib_uverbs_cleanup(void)
1309{ 1281{
1310 ib_unregister_client(&uverbs_client); 1282 ib_unregister_client(&uverbs_client);
1311 class_destroy(uverbs_class); 1283 class_destroy(uverbs_class);
1312 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); 1284 unregister_chrdev_region(IB_UVERBS_BASE_DEV,
1313 if (overflow_maj) 1285 IB_UVERBS_NUM_FIXED_MINOR);
1314 unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES); 1286 unregister_chrdev_region(dynamic_uverbs_dev,
1287 IB_UVERBS_NUM_DYNAMIC_MINOR);
1315} 1288}
1316 1289
1317module_init(ib_uverbs_init); 1290module_init(ib_uverbs_init);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index c3ee5d9b336d..b571176babbe 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -35,6 +35,7 @@
35#include <rdma/ib_verbs.h> 35#include <rdma/ib_verbs.h>
36#include <linux/bug.h> 36#include <linux/bug.h>
37#include <linux/file.h> 37#include <linux/file.h>
38#include <rdma/restrack.h>
38#include "rdma_core.h" 39#include "rdma_core.h"
39#include "uverbs.h" 40#include "uverbs.h"
40 41
@@ -319,6 +320,8 @@ static int uverbs_create_cq_handler(struct ib_device *ib_dev,
319 obj->uobject.object = cq; 320 obj->uobject.object = cq;
320 obj->uobject.user_handle = user_handle; 321 obj->uobject.user_handle = user_handle;
321 atomic_set(&cq->usecnt, 0); 322 atomic_set(&cq->usecnt, 0);
323 cq->res.type = RDMA_RESTRACK_CQ;
324 rdma_restrack_add(&cq->res);
322 325
323 ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe); 326 ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe);
324 if (ret) 327 if (ret)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index e36d27ed4daa..16ebc6372c31 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -124,16 +124,24 @@ EXPORT_SYMBOL(ib_wc_status_msg);
124__attribute_const__ int ib_rate_to_mult(enum ib_rate rate) 124__attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
125{ 125{
126 switch (rate) { 126 switch (rate) {
127 case IB_RATE_2_5_GBPS: return 1; 127 case IB_RATE_2_5_GBPS: return 1;
128 case IB_RATE_5_GBPS: return 2; 128 case IB_RATE_5_GBPS: return 2;
129 case IB_RATE_10_GBPS: return 4; 129 case IB_RATE_10_GBPS: return 4;
130 case IB_RATE_20_GBPS: return 8; 130 case IB_RATE_20_GBPS: return 8;
131 case IB_RATE_30_GBPS: return 12; 131 case IB_RATE_30_GBPS: return 12;
132 case IB_RATE_40_GBPS: return 16; 132 case IB_RATE_40_GBPS: return 16;
133 case IB_RATE_60_GBPS: return 24; 133 case IB_RATE_60_GBPS: return 24;
134 case IB_RATE_80_GBPS: return 32; 134 case IB_RATE_80_GBPS: return 32;
135 case IB_RATE_120_GBPS: return 48; 135 case IB_RATE_120_GBPS: return 48;
136 default: return -1; 136 case IB_RATE_14_GBPS: return 6;
137 case IB_RATE_56_GBPS: return 22;
138 case IB_RATE_112_GBPS: return 45;
139 case IB_RATE_168_GBPS: return 67;
140 case IB_RATE_25_GBPS: return 10;
141 case IB_RATE_100_GBPS: return 40;
142 case IB_RATE_200_GBPS: return 80;
143 case IB_RATE_300_GBPS: return 120;
144 default: return -1;
137 } 145 }
138} 146}
139EXPORT_SYMBOL(ib_rate_to_mult); 147EXPORT_SYMBOL(ib_rate_to_mult);
@@ -141,16 +149,24 @@ EXPORT_SYMBOL(ib_rate_to_mult);
141__attribute_const__ enum ib_rate mult_to_ib_rate(int mult) 149__attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
142{ 150{
143 switch (mult) { 151 switch (mult) {
144 case 1: return IB_RATE_2_5_GBPS; 152 case 1: return IB_RATE_2_5_GBPS;
145 case 2: return IB_RATE_5_GBPS; 153 case 2: return IB_RATE_5_GBPS;
146 case 4: return IB_RATE_10_GBPS; 154 case 4: return IB_RATE_10_GBPS;
147 case 8: return IB_RATE_20_GBPS; 155 case 8: return IB_RATE_20_GBPS;
148 case 12: return IB_RATE_30_GBPS; 156 case 12: return IB_RATE_30_GBPS;
149 case 16: return IB_RATE_40_GBPS; 157 case 16: return IB_RATE_40_GBPS;
150 case 24: return IB_RATE_60_GBPS; 158 case 24: return IB_RATE_60_GBPS;
151 case 32: return IB_RATE_80_GBPS; 159 case 32: return IB_RATE_80_GBPS;
152 case 48: return IB_RATE_120_GBPS; 160 case 48: return IB_RATE_120_GBPS;
153 default: return IB_RATE_PORT_CURRENT; 161 case 6: return IB_RATE_14_GBPS;
162 case 22: return IB_RATE_56_GBPS;
163 case 45: return IB_RATE_112_GBPS;
164 case 67: return IB_RATE_168_GBPS;
165 case 10: return IB_RATE_25_GBPS;
166 case 40: return IB_RATE_100_GBPS;
167 case 80: return IB_RATE_200_GBPS;
168 case 120: return IB_RATE_300_GBPS;
169 default: return IB_RATE_PORT_CURRENT;
154 } 170 }
155} 171}
156EXPORT_SYMBOL(mult_to_ib_rate); 172EXPORT_SYMBOL(mult_to_ib_rate);
@@ -247,6 +263,10 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
247 mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; 263 mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE;
248 } 264 }
249 265
266 pd->res.type = RDMA_RESTRACK_PD;
267 pd->res.kern_name = caller;
268 rdma_restrack_add(&pd->res);
269
250 if (mr_access_flags) { 270 if (mr_access_flags) {
251 struct ib_mr *mr; 271 struct ib_mr *mr;
252 272
@@ -296,6 +316,7 @@ void ib_dealloc_pd(struct ib_pd *pd)
296 requires the caller to guarantee we can't race here. */ 316 requires the caller to guarantee we can't race here. */
297 WARN_ON(atomic_read(&pd->usecnt)); 317 WARN_ON(atomic_read(&pd->usecnt));
298 318
319 rdma_restrack_del(&pd->res);
299 /* Making delalloc_pd a void return is a WIP, no driver should return 320 /* Making delalloc_pd a void return is a WIP, no driver should return
300 an error here. */ 321 an error here. */
301 ret = pd->device->dealloc_pd(pd); 322 ret = pd->device->dealloc_pd(pd);
@@ -421,8 +442,7 @@ static bool find_gid_index(const union ib_gid *gid,
421 const struct ib_gid_attr *gid_attr, 442 const struct ib_gid_attr *gid_attr,
422 void *context) 443 void *context)
423{ 444{
424 struct find_gid_index_context *ctx = 445 struct find_gid_index_context *ctx = context;
425 (struct find_gid_index_context *)context;
426 446
427 if (ctx->gid_type != gid_attr->gid_type) 447 if (ctx->gid_type != gid_attr->gid_type)
428 return false; 448 return false;
@@ -481,8 +501,53 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
481} 501}
482EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr); 502EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
483 503
504/* Resolve destination mac address and hop limit for unicast destination
505 * GID entry, considering the source GID entry as well.
506 * ah_attribute must have have valid port_num, sgid_index.
507 */
508static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
509 struct rdma_ah_attr *ah_attr)
510{
511 struct ib_gid_attr sgid_attr;
512 struct ib_global_route *grh;
513 int hop_limit = 0xff;
514 union ib_gid sgid;
515 int ret;
516
517 grh = rdma_ah_retrieve_grh(ah_attr);
518
519 ret = ib_query_gid(device,
520 rdma_ah_get_port_num(ah_attr),
521 grh->sgid_index,
522 &sgid, &sgid_attr);
523 if (ret || !sgid_attr.ndev) {
524 if (!ret)
525 ret = -ENXIO;
526 return ret;
527 }
528
529 /* If destination is link local and source GID is RoCEv1,
530 * IP stack is not used.
531 */
532 if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) &&
533 sgid_attr.gid_type == IB_GID_TYPE_ROCE) {
534 rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
535 ah_attr->roce.dmac);
536 goto done;
537 }
538
539 ret = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
540 ah_attr->roce.dmac,
541 sgid_attr.ndev, &hop_limit);
542done:
543 dev_put(sgid_attr.ndev);
544
545 grh->hop_limit = hop_limit;
546 return ret;
547}
548
484/* 549/*
485 * This function creates ah from the incoming packet. 550 * This function initializes address handle attributes from the incoming packet.
486 * Incoming packet has dgid of the receiver node on which this code is 551 * Incoming packet has dgid of the receiver node on which this code is
487 * getting executed and, sgid contains the GID of the sender. 552 * getting executed and, sgid contains the GID of the sender.
488 * 553 *
@@ -490,13 +555,10 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
490 * as sgid and, sgid is used as dgid because sgid contains destinations 555 * as sgid and, sgid is used as dgid because sgid contains destinations
491 * GID whom to respond to. 556 * GID whom to respond to.
492 * 557 *
493 * This is why when calling rdma_addr_find_l2_eth_by_grh() function, the
494 * position of arguments dgid and sgid do not match the order of the
495 * parameters.
496 */ 558 */
497int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, 559int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
498 const struct ib_wc *wc, const struct ib_grh *grh, 560 const struct ib_wc *wc, const struct ib_grh *grh,
499 struct rdma_ah_attr *ah_attr) 561 struct rdma_ah_attr *ah_attr)
500{ 562{
501 u32 flow_class; 563 u32 flow_class;
502 u16 gid_index; 564 u16 gid_index;
@@ -523,57 +585,33 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
523 if (ret) 585 if (ret)
524 return ret; 586 return ret;
525 587
588 rdma_ah_set_sl(ah_attr, wc->sl);
589 rdma_ah_set_port_num(ah_attr, port_num);
590
526 if (rdma_protocol_roce(device, port_num)) { 591 if (rdma_protocol_roce(device, port_num)) {
527 int if_index = 0;
528 u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ? 592 u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
529 wc->vlan_id : 0xffff; 593 wc->vlan_id : 0xffff;
530 struct net_device *idev;
531 struct net_device *resolved_dev;
532 594
533 if (!(wc->wc_flags & IB_WC_GRH)) 595 if (!(wc->wc_flags & IB_WC_GRH))
534 return -EPROTOTYPE; 596 return -EPROTOTYPE;
535 597
536 if (!device->get_netdev) 598 ret = get_sgid_index_from_eth(device, port_num,
537 return -EOPNOTSUPP; 599 vlan_id, &dgid,
538 600 gid_type, &gid_index);
539 idev = device->get_netdev(device, port_num);
540 if (!idev)
541 return -ENODEV;
542
543 ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
544 ah_attr->roce.dmac,
545 wc->wc_flags & IB_WC_WITH_VLAN ?
546 NULL : &vlan_id,
547 &if_index, &hoplimit);
548 if (ret) {
549 dev_put(idev);
550 return ret;
551 }
552
553 resolved_dev = dev_get_by_index(&init_net, if_index);
554 rcu_read_lock();
555 if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
556 resolved_dev))
557 ret = -EHOSTUNREACH;
558 rcu_read_unlock();
559 dev_put(idev);
560 dev_put(resolved_dev);
561 if (ret) 601 if (ret)
562 return ret; 602 return ret;
563 603
564 ret = get_sgid_index_from_eth(device, port_num, vlan_id, 604 flow_class = be32_to_cpu(grh->version_tclass_flow);
565 &dgid, gid_type, &gid_index); 605 rdma_ah_set_grh(ah_attr, &sgid,
566 if (ret) 606 flow_class & 0xFFFFF,
567 return ret; 607 (u8)gid_index, hoplimit,
568 } 608 (flow_class >> 20) & 0xFF);
569 609 return ib_resolve_unicast_gid_dmac(device, ah_attr);
570 rdma_ah_set_dlid(ah_attr, wc->slid); 610 } else {
571 rdma_ah_set_sl(ah_attr, wc->sl); 611 rdma_ah_set_dlid(ah_attr, wc->slid);
572 rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits); 612 rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
573 rdma_ah_set_port_num(ah_attr, port_num);
574 613
575 if (wc->wc_flags & IB_WC_GRH) { 614 if (wc->wc_flags & IB_WC_GRH) {
576 if (!rdma_cap_eth_ah(device, port_num)) {
577 if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) { 615 if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
578 ret = ib_find_cached_gid_by_port(device, &dgid, 616 ret = ib_find_cached_gid_by_port(device, &dgid,
579 IB_GID_TYPE_IB, 617 IB_GID_TYPE_IB,
@@ -584,18 +622,17 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
584 } else { 622 } else {
585 gid_index = 0; 623 gid_index = 0;
586 } 624 }
587 }
588
589 flow_class = be32_to_cpu(grh->version_tclass_flow);
590 rdma_ah_set_grh(ah_attr, &sgid,
591 flow_class & 0xFFFFF,
592 (u8)gid_index, hoplimit,
593 (flow_class >> 20) & 0xFF);
594 625
626 flow_class = be32_to_cpu(grh->version_tclass_flow);
627 rdma_ah_set_grh(ah_attr, &sgid,
628 flow_class & 0xFFFFF,
629 (u8)gid_index, hoplimit,
630 (flow_class >> 20) & 0xFF);
631 }
632 return 0;
595 } 633 }
596 return 0;
597} 634}
598EXPORT_SYMBOL(ib_init_ah_from_wc); 635EXPORT_SYMBOL(ib_init_ah_attr_from_wc);
599 636
600struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, 637struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
601 const struct ib_grh *grh, u8 port_num) 638 const struct ib_grh *grh, u8 port_num)
@@ -603,7 +640,7 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
603 struct rdma_ah_attr ah_attr; 640 struct rdma_ah_attr ah_attr;
604 int ret; 641 int ret;
605 642
606 ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr); 643 ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr);
607 if (ret) 644 if (ret)
608 return ERR_PTR(ret); 645 return ERR_PTR(ret);
609 646
@@ -850,7 +887,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
850 if (qp_init_attr->cap.max_rdma_ctxs) 887 if (qp_init_attr->cap.max_rdma_ctxs)
851 rdma_rw_init_qp(device, qp_init_attr); 888 rdma_rw_init_qp(device, qp_init_attr);
852 889
853 qp = device->create_qp(pd, qp_init_attr, NULL); 890 qp = _ib_create_qp(device, pd, qp_init_attr, NULL);
854 if (IS_ERR(qp)) 891 if (IS_ERR(qp))
855 return qp; 892 return qp;
856 893
@@ -860,7 +897,6 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
860 return ERR_PTR(ret); 897 return ERR_PTR(ret);
861 } 898 }
862 899
863 qp->device = device;
864 qp->real_qp = qp; 900 qp->real_qp = qp;
865 qp->uobject = NULL; 901 qp->uobject = NULL;
866 qp->qp_type = qp_init_attr->qp_type; 902 qp->qp_type = qp_init_attr->qp_type;
@@ -890,7 +926,6 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
890 atomic_inc(&qp_init_attr->srq->usecnt); 926 atomic_inc(&qp_init_attr->srq->usecnt);
891 } 927 }
892 928
893 qp->pd = pd;
894 qp->send_cq = qp_init_attr->send_cq; 929 qp->send_cq = qp_init_attr->send_cq;
895 qp->xrcd = NULL; 930 qp->xrcd = NULL;
896 931
@@ -1269,16 +1304,8 @@ static int ib_resolve_eth_dmac(struct ib_device *device,
1269 if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr))) 1304 if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr)))
1270 return -EINVAL; 1305 return -EINVAL;
1271 1306
1272 if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE)
1273 return 0;
1274
1275 grh = rdma_ah_retrieve_grh(ah_attr); 1307 grh = rdma_ah_retrieve_grh(ah_attr);
1276 1308
1277 if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw)) {
1278 rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
1279 ah_attr->roce.dmac);
1280 return 0;
1281 }
1282 if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) { 1309 if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
1283 if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) { 1310 if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
1284 __be32 addr = 0; 1311 __be32 addr = 0;
@@ -1290,40 +1317,52 @@ static int ib_resolve_eth_dmac(struct ib_device *device,
1290 (char *)ah_attr->roce.dmac); 1317 (char *)ah_attr->roce.dmac);
1291 } 1318 }
1292 } else { 1319 } else {
1293 union ib_gid sgid; 1320 ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
1294 struct ib_gid_attr sgid_attr; 1321 }
1295 int ifindex; 1322 return ret;
1296 int hop_limit; 1323}
1297
1298 ret = ib_query_gid(device,
1299 rdma_ah_get_port_num(ah_attr),
1300 grh->sgid_index,
1301 &sgid, &sgid_attr);
1302
1303 if (ret || !sgid_attr.ndev) {
1304 if (!ret)
1305 ret = -ENXIO;
1306 goto out;
1307 }
1308
1309 ifindex = sgid_attr.ndev->ifindex;
1310 1324
1311 ret = 1325/**
1312 rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid, 1326 * IB core internal function to perform QP attributes modification.
1313 ah_attr->roce.dmac, 1327 */
1314 NULL, &ifindex, &hop_limit); 1328static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
1329 int attr_mask, struct ib_udata *udata)
1330{
1331 u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
1332 int ret;
1315 1333
1316 dev_put(sgid_attr.ndev); 1334 if (rdma_ib_or_roce(qp->device, port)) {
1335 if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
1336 pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n",
1337 __func__, qp->device->name);
1338 attr->rq_psn &= 0xffffff;
1339 }
1317 1340
1318 grh->hop_limit = hop_limit; 1341 if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) {
1342 pr_warn("%s: %s sq_psn overflow, masking to 24 bits\n",
1343 __func__, qp->device->name);
1344 attr->sq_psn &= 0xffffff;
1345 }
1319 } 1346 }
1320out: 1347
1348 ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
1349 if (!ret && (attr_mask & IB_QP_PORT))
1350 qp->port = attr->port_num;
1351
1321 return ret; 1352 return ret;
1322} 1353}
1323 1354
1355static bool is_qp_type_connected(const struct ib_qp *qp)
1356{
1357 return (qp->qp_type == IB_QPT_UC ||
1358 qp->qp_type == IB_QPT_RC ||
1359 qp->qp_type == IB_QPT_XRC_INI ||
1360 qp->qp_type == IB_QPT_XRC_TGT);
1361}
1362
1324/** 1363/**
1325 * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. 1364 * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
1326 * @qp: The QP to modify. 1365 * @ib_qp: The QP to modify.
1327 * @attr: On input, specifies the QP attributes to modify. On output, 1366 * @attr: On input, specifies the QP attributes to modify. On output,
1328 * the current values of selected QP attributes are returned. 1367 * the current values of selected QP attributes are returned.
1329 * @attr_mask: A bit-mask used to specify which attributes of the QP 1368 * @attr_mask: A bit-mask used to specify which attributes of the QP
@@ -1332,21 +1371,20 @@ out:
1332 * are being modified. 1371 * are being modified.
1333 * It returns 0 on success and returns appropriate error code on error. 1372 * It returns 0 on success and returns appropriate error code on error.
1334 */ 1373 */
1335int ib_modify_qp_with_udata(struct ib_qp *qp, struct ib_qp_attr *attr, 1374int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
1336 int attr_mask, struct ib_udata *udata) 1375 int attr_mask, struct ib_udata *udata)
1337{ 1376{
1377 struct ib_qp *qp = ib_qp->real_qp;
1338 int ret; 1378 int ret;
1339 1379
1340 if (attr_mask & IB_QP_AV) { 1380 if (attr_mask & IB_QP_AV &&
1381 attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
1382 is_qp_type_connected(qp)) {
1341 ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); 1383 ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
1342 if (ret) 1384 if (ret)
1343 return ret; 1385 return ret;
1344 } 1386 }
1345 ret = ib_security_modify_qp(qp, attr, attr_mask, udata); 1387 return _ib_modify_qp(qp, attr, attr_mask, udata);
1346 if (!ret && (attr_mask & IB_QP_PORT))
1347 qp->port = attr->port_num;
1348
1349 return ret;
1350} 1388}
1351EXPORT_SYMBOL(ib_modify_qp_with_udata); 1389EXPORT_SYMBOL(ib_modify_qp_with_udata);
1352 1390
@@ -1409,7 +1447,7 @@ int ib_modify_qp(struct ib_qp *qp,
1409 struct ib_qp_attr *qp_attr, 1447 struct ib_qp_attr *qp_attr,
1410 int qp_attr_mask) 1448 int qp_attr_mask)
1411{ 1449{
1412 return ib_modify_qp_with_udata(qp, qp_attr, qp_attr_mask, NULL); 1450 return _ib_modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
1413} 1451}
1414EXPORT_SYMBOL(ib_modify_qp); 1452EXPORT_SYMBOL(ib_modify_qp);
1415 1453
@@ -1503,6 +1541,7 @@ int ib_destroy_qp(struct ib_qp *qp)
1503 if (!qp->uobject) 1541 if (!qp->uobject)
1504 rdma_rw_cleanup_mrs(qp); 1542 rdma_rw_cleanup_mrs(qp);
1505 1543
1544 rdma_restrack_del(&qp->res);
1506 ret = qp->device->destroy_qp(qp); 1545 ret = qp->device->destroy_qp(qp);
1507 if (!ret) { 1546 if (!ret) {
1508 if (pd) 1547 if (pd)
@@ -1545,6 +1584,8 @@ struct ib_cq *ib_create_cq(struct ib_device *device,
1545 cq->event_handler = event_handler; 1584 cq->event_handler = event_handler;
1546 cq->cq_context = cq_context; 1585 cq->cq_context = cq_context;
1547 atomic_set(&cq->usecnt, 0); 1586 atomic_set(&cq->usecnt, 0);
1587 cq->res.type = RDMA_RESTRACK_CQ;
1588 rdma_restrack_add(&cq->res);
1548 } 1589 }
1549 1590
1550 return cq; 1591 return cq;
@@ -1563,6 +1604,7 @@ int ib_destroy_cq(struct ib_cq *cq)
1563 if (atomic_read(&cq->usecnt)) 1604 if (atomic_read(&cq->usecnt))
1564 return -EBUSY; 1605 return -EBUSY;
1565 1606
1607 rdma_restrack_del(&cq->res);
1566 return cq->device->destroy_cq(cq); 1608 return cq->device->destroy_cq(cq);
1567} 1609}
1568EXPORT_SYMBOL(ib_destroy_cq); 1610EXPORT_SYMBOL(ib_destroy_cq);
@@ -1747,7 +1789,7 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
1747} 1789}
1748EXPORT_SYMBOL(ib_detach_mcast); 1790EXPORT_SYMBOL(ib_detach_mcast);
1749 1791
1750struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device) 1792struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller)
1751{ 1793{
1752 struct ib_xrcd *xrcd; 1794 struct ib_xrcd *xrcd;
1753 1795
@@ -1765,7 +1807,7 @@ struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
1765 1807
1766 return xrcd; 1808 return xrcd;
1767} 1809}
1768EXPORT_SYMBOL(ib_alloc_xrcd); 1810EXPORT_SYMBOL(__ib_alloc_xrcd);
1769 1811
1770int ib_dealloc_xrcd(struct ib_xrcd *xrcd) 1812int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
1771{ 1813{
@@ -1790,11 +1832,11 @@ EXPORT_SYMBOL(ib_dealloc_xrcd);
1790 * ib_create_wq - Creates a WQ associated with the specified protection 1832 * ib_create_wq - Creates a WQ associated with the specified protection
1791 * domain. 1833 * domain.
1792 * @pd: The protection domain associated with the WQ. 1834 * @pd: The protection domain associated with the WQ.
1793 * @wq_init_attr: A list of initial attributes required to create the 1835 * @wq_attr: A list of initial attributes required to create the
1794 * WQ. If WQ creation succeeds, then the attributes are updated to 1836 * WQ. If WQ creation succeeds, then the attributes are updated to
1795 * the actual capabilities of the created WQ. 1837 * the actual capabilities of the created WQ.
1796 * 1838 *
1797 * wq_init_attr->max_wr and wq_init_attr->max_sge determine 1839 * wq_attr->max_wr and wq_attr->max_sge determine
1798 * the requested size of the WQ, and set to the actual values allocated 1840 * the requested size of the WQ, and set to the actual values allocated
1799 * on return. 1841 * on return.
1800 * If ib_create_wq() succeeds, then max_wr and max_sge will always be 1842 * If ib_create_wq() succeeds, then max_wr and max_sge will always be
@@ -2156,16 +2198,16 @@ static void __ib_drain_sq(struct ib_qp *qp)
2156 struct ib_send_wr swr = {}, *bad_swr; 2198 struct ib_send_wr swr = {}, *bad_swr;
2157 int ret; 2199 int ret;
2158 2200
2159 swr.wr_cqe = &sdrain.cqe;
2160 sdrain.cqe.done = ib_drain_qp_done;
2161 init_completion(&sdrain.done);
2162
2163 ret = ib_modify_qp(qp, &attr, IB_QP_STATE); 2201 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
2164 if (ret) { 2202 if (ret) {
2165 WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); 2203 WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
2166 return; 2204 return;
2167 } 2205 }
2168 2206
2207 swr.wr_cqe = &sdrain.cqe;
2208 sdrain.cqe.done = ib_drain_qp_done;
2209 init_completion(&sdrain.done);
2210
2169 ret = ib_post_send(qp, &swr, &bad_swr); 2211 ret = ib_post_send(qp, &swr, &bad_swr);
2170 if (ret) { 2212 if (ret) {
2171 WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); 2213 WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
@@ -2190,16 +2232,16 @@ static void __ib_drain_rq(struct ib_qp *qp)
2190 struct ib_recv_wr rwr = {}, *bad_rwr; 2232 struct ib_recv_wr rwr = {}, *bad_rwr;
2191 int ret; 2233 int ret;
2192 2234
2193 rwr.wr_cqe = &rdrain.cqe;
2194 rdrain.cqe.done = ib_drain_qp_done;
2195 init_completion(&rdrain.done);
2196
2197 ret = ib_modify_qp(qp, &attr, IB_QP_STATE); 2235 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
2198 if (ret) { 2236 if (ret) {
2199 WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); 2237 WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
2200 return; 2238 return;
2201 } 2239 }
2202 2240
2241 rwr.wr_cqe = &rdrain.cqe;
2242 rdrain.cqe.done = ib_drain_qp_done;
2243 init_completion(&rdrain.done);
2244
2203 ret = ib_post_recv(qp, &rwr, &bad_rwr); 2245 ret = ib_post_recv(qp, &rwr, &bad_rwr);
2204 if (ret) { 2246 if (ret) {
2205 WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); 2247 WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index ecbac91b2e14..ca32057e886f 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -43,20 +43,41 @@
43#define ROCE_DRV_MODULE_VERSION "1.0.0" 43#define ROCE_DRV_MODULE_VERSION "1.0.0"
44 44
45#define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver" 45#define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver"
46 46#define BNXT_RE_PAGE_SHIFT_4K (12)
47#define BNXT_RE_PAGE_SIZE_4K BIT(12) 47#define BNXT_RE_PAGE_SHIFT_8K (13)
48#define BNXT_RE_PAGE_SIZE_8K BIT(13) 48#define BNXT_RE_PAGE_SHIFT_64K (16)
49#define BNXT_RE_PAGE_SIZE_64K BIT(16) 49#define BNXT_RE_PAGE_SHIFT_2M (21)
50#define BNXT_RE_PAGE_SIZE_2M BIT(21) 50#define BNXT_RE_PAGE_SHIFT_8M (23)
51#define BNXT_RE_PAGE_SIZE_8M BIT(23) 51#define BNXT_RE_PAGE_SHIFT_1G (30)
52#define BNXT_RE_PAGE_SIZE_1G BIT(30) 52
53 53#define BNXT_RE_PAGE_SIZE_4K BIT(BNXT_RE_PAGE_SHIFT_4K)
54#define BNXT_RE_MAX_MR_SIZE BIT(30) 54#define BNXT_RE_PAGE_SIZE_8K BIT(BNXT_RE_PAGE_SHIFT_8K)
55#define BNXT_RE_PAGE_SIZE_64K BIT(BNXT_RE_PAGE_SHIFT_64K)
56#define BNXT_RE_PAGE_SIZE_2M BIT(BNXT_RE_PAGE_SHIFT_2M)
57#define BNXT_RE_PAGE_SIZE_8M BIT(BNXT_RE_PAGE_SHIFT_8M)
58#define BNXT_RE_PAGE_SIZE_1G BIT(BNXT_RE_PAGE_SHIFT_1G)
59
60#define BNXT_RE_MAX_MR_SIZE_LOW BIT(BNXT_RE_PAGE_SHIFT_1G)
61#define BNXT_RE_MAX_MR_SIZE_HIGH BIT(39)
62#define BNXT_RE_MAX_MR_SIZE BNXT_RE_MAX_MR_SIZE_HIGH
55 63
56#define BNXT_RE_MAX_QPC_COUNT (64 * 1024) 64#define BNXT_RE_MAX_QPC_COUNT (64 * 1024)
57#define BNXT_RE_MAX_MRW_COUNT (64 * 1024) 65#define BNXT_RE_MAX_MRW_COUNT (64 * 1024)
58#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024) 66#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024)
59#define BNXT_RE_MAX_CQ_COUNT (64 * 1024) 67#define BNXT_RE_MAX_CQ_COUNT (64 * 1024)
68#define BNXT_RE_MAX_MRW_COUNT_64K (64 * 1024)
69#define BNXT_RE_MAX_MRW_COUNT_256K (256 * 1024)
70
71/* Number of MRs to reserve for PF, leaving remainder for VFs */
72#define BNXT_RE_RESVD_MR_FOR_PF (32 * 1024)
73#define BNXT_RE_MAX_GID_PER_VF 128
74
75/*
76 * Percentage of resources of each type reserved for PF.
77 * Remaining resources are divided equally among VFs.
78 * [0, 100]
79 */
80#define BNXT_RE_PCT_RSVD_FOR_PF 50
60 81
61#define BNXT_RE_UD_QP_HW_STALL 0x400000 82#define BNXT_RE_UD_QP_HW_STALL 0x400000
62 83
@@ -100,6 +121,7 @@ struct bnxt_re_dev {
100#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4 121#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4
101#define BNXT_RE_FLAG_QOS_WORK_REG 5 122#define BNXT_RE_FLAG_QOS_WORK_REG 5
102#define BNXT_RE_FLAG_TASK_IN_PROG 6 123#define BNXT_RE_FLAG_TASK_IN_PROG 6
124#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
103 struct net_device *netdev; 125 struct net_device *netdev;
104 unsigned int version, major, minor; 126 unsigned int version, major, minor;
105 struct bnxt_en_dev *en_dev; 127 struct bnxt_en_dev *en_dev;
@@ -145,6 +167,9 @@ struct bnxt_re_dev {
145 struct bnxt_re_ah *sqp_ah; 167 struct bnxt_re_ah *sqp_ah;
146 struct bnxt_re_sqp_entries sqp_tbl[1024]; 168 struct bnxt_re_sqp_entries sqp_tbl[1024];
147 atomic_t nq_alloc_cnt; 169 atomic_t nq_alloc_cnt;
170 u32 is_virtfn;
171 u32 num_vfs;
172 struct bnxt_qplib_roce_stats stats;
148}; 173};
149 174
150#define to_bnxt_re_dev(ptr, member) \ 175#define to_bnxt_re_dev(ptr, member) \
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index 7b28219eba46..77416bc61e6e 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -58,16 +58,55 @@
58#include "hw_counters.h" 58#include "hw_counters.h"
59 59
60static const char * const bnxt_re_stat_name[] = { 60static const char * const bnxt_re_stat_name[] = {
61 [BNXT_RE_ACTIVE_QP] = "active_qps", 61 [BNXT_RE_ACTIVE_QP] = "active_qps",
62 [BNXT_RE_ACTIVE_SRQ] = "active_srqs", 62 [BNXT_RE_ACTIVE_SRQ] = "active_srqs",
63 [BNXT_RE_ACTIVE_CQ] = "active_cqs", 63 [BNXT_RE_ACTIVE_CQ] = "active_cqs",
64 [BNXT_RE_ACTIVE_MR] = "active_mrs", 64 [BNXT_RE_ACTIVE_MR] = "active_mrs",
65 [BNXT_RE_ACTIVE_MW] = "active_mws", 65 [BNXT_RE_ACTIVE_MW] = "active_mws",
66 [BNXT_RE_RX_PKTS] = "rx_pkts", 66 [BNXT_RE_RX_PKTS] = "rx_pkts",
67 [BNXT_RE_RX_BYTES] = "rx_bytes", 67 [BNXT_RE_RX_BYTES] = "rx_bytes",
68 [BNXT_RE_TX_PKTS] = "tx_pkts", 68 [BNXT_RE_TX_PKTS] = "tx_pkts",
69 [BNXT_RE_TX_BYTES] = "tx_bytes", 69 [BNXT_RE_TX_BYTES] = "tx_bytes",
70 [BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors" 70 [BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors",
71 [BNXT_RE_TO_RETRANSMITS] = "to_retransmits",
72 [BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd",
73 [BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded",
74 [BNXT_RE_RNR_NAKS_RCVD] = "rnr_naks_rcvd",
75 [BNXT_RE_MISSING_RESP] = "missin_resp",
76 [BNXT_RE_UNRECOVERABLE_ERR] = "unrecoverable_err",
77 [BNXT_RE_BAD_RESP_ERR] = "bad_resp_err",
78 [BNXT_RE_LOCAL_QP_OP_ERR] = "local_qp_op_err",
79 [BNXT_RE_LOCAL_PROTECTION_ERR] = "local_protection_err",
80 [BNXT_RE_MEM_MGMT_OP_ERR] = "mem_mgmt_op_err",
81 [BNXT_RE_REMOTE_INVALID_REQ_ERR] = "remote_invalid_req_err",
82 [BNXT_RE_REMOTE_ACCESS_ERR] = "remote_access_err",
83 [BNXT_RE_REMOTE_OP_ERR] = "remote_op_err",
84 [BNXT_RE_DUP_REQ] = "dup_req",
85 [BNXT_RE_RES_EXCEED_MAX] = "res_exceed_max",
86 [BNXT_RE_RES_LENGTH_MISMATCH] = "res_length_mismatch",
87 [BNXT_RE_RES_EXCEEDS_WQE] = "res_exceeds_wqe",
88 [BNXT_RE_RES_OPCODE_ERR] = "res_opcode_err",
89 [BNXT_RE_RES_RX_INVALID_RKEY] = "res_rx_invalid_rkey",
90 [BNXT_RE_RES_RX_DOMAIN_ERR] = "res_rx_domain_err",
91 [BNXT_RE_RES_RX_NO_PERM] = "res_rx_no_perm",
92 [BNXT_RE_RES_RX_RANGE_ERR] = "res_rx_range_err",
93 [BNXT_RE_RES_TX_INVALID_RKEY] = "res_tx_invalid_rkey",
94 [BNXT_RE_RES_TX_DOMAIN_ERR] = "res_tx_domain_err",
95 [BNXT_RE_RES_TX_NO_PERM] = "res_tx_no_perm",
96 [BNXT_RE_RES_TX_RANGE_ERR] = "res_tx_range_err",
97 [BNXT_RE_RES_IRRQ_OFLOW] = "res_irrq_oflow",
98 [BNXT_RE_RES_UNSUP_OPCODE] = "res_unsup_opcode",
99 [BNXT_RE_RES_UNALIGNED_ATOMIC] = "res_unaligned_atomic",
100 [BNXT_RE_RES_REM_INV_ERR] = "res_rem_inv_err",
101 [BNXT_RE_RES_MEM_ERROR] = "res_mem_err",
102 [BNXT_RE_RES_SRQ_ERR] = "res_srq_err",
103 [BNXT_RE_RES_CMP_ERR] = "res_cmp_err",
104 [BNXT_RE_RES_INVALID_DUP_RKEY] = "res_invalid_dup_rkey",
105 [BNXT_RE_RES_WQE_FORMAT_ERR] = "res_wqe_format_err",
106 [BNXT_RE_RES_CQ_LOAD_ERR] = "res_cq_load_err",
107 [BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err",
108 [BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err",
109 [BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err"
71}; 110};
72 111
73int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, 112int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
@@ -76,6 +115,7 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
76{ 115{
77 struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); 116 struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
78 struct ctx_hw_stats *bnxt_re_stats = rdev->qplib_ctx.stats.dma; 117 struct ctx_hw_stats *bnxt_re_stats = rdev->qplib_ctx.stats.dma;
118 int rc = 0;
79 119
80 if (!port || !stats) 120 if (!port || !stats)
81 return -EINVAL; 121 return -EINVAL;
@@ -97,6 +137,91 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
97 stats->value[BNXT_RE_TX_BYTES] = 137 stats->value[BNXT_RE_TX_BYTES] =
98 le64_to_cpu(bnxt_re_stats->tx_ucast_bytes); 138 le64_to_cpu(bnxt_re_stats->tx_ucast_bytes);
99 } 139 }
140 if (test_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags)) {
141 rc = bnxt_qplib_get_roce_stats(&rdev->rcfw, &rdev->stats);
142 if (rc)
143 clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS,
144 &rdev->flags);
145 stats->value[BNXT_RE_TO_RETRANSMITS] =
146 rdev->stats.to_retransmits;
147 stats->value[BNXT_RE_SEQ_ERR_NAKS_RCVD] =
148 rdev->stats.seq_err_naks_rcvd;
149 stats->value[BNXT_RE_MAX_RETRY_EXCEEDED] =
150 rdev->stats.max_retry_exceeded;
151 stats->value[BNXT_RE_RNR_NAKS_RCVD] =
152 rdev->stats.rnr_naks_rcvd;
153 stats->value[BNXT_RE_MISSING_RESP] =
154 rdev->stats.missing_resp;
155 stats->value[BNXT_RE_UNRECOVERABLE_ERR] =
156 rdev->stats.unrecoverable_err;
157 stats->value[BNXT_RE_BAD_RESP_ERR] =
158 rdev->stats.bad_resp_err;
159 stats->value[BNXT_RE_LOCAL_QP_OP_ERR] =
160 rdev->stats.local_qp_op_err;
161 stats->value[BNXT_RE_LOCAL_PROTECTION_ERR] =
162 rdev->stats.local_protection_err;
163 stats->value[BNXT_RE_MEM_MGMT_OP_ERR] =
164 rdev->stats.mem_mgmt_op_err;
165 stats->value[BNXT_RE_REMOTE_INVALID_REQ_ERR] =
166 rdev->stats.remote_invalid_req_err;
167 stats->value[BNXT_RE_REMOTE_ACCESS_ERR] =
168 rdev->stats.remote_access_err;
169 stats->value[BNXT_RE_REMOTE_OP_ERR] =
170 rdev->stats.remote_op_err;
171 stats->value[BNXT_RE_DUP_REQ] =
172 rdev->stats.dup_req;
173 stats->value[BNXT_RE_RES_EXCEED_MAX] =
174 rdev->stats.res_exceed_max;
175 stats->value[BNXT_RE_RES_LENGTH_MISMATCH] =
176 rdev->stats.res_length_mismatch;
177 stats->value[BNXT_RE_RES_EXCEEDS_WQE] =
178 rdev->stats.res_exceeds_wqe;
179 stats->value[BNXT_RE_RES_OPCODE_ERR] =
180 rdev->stats.res_opcode_err;
181 stats->value[BNXT_RE_RES_RX_INVALID_RKEY] =
182 rdev->stats.res_rx_invalid_rkey;
183 stats->value[BNXT_RE_RES_RX_DOMAIN_ERR] =
184 rdev->stats.res_rx_domain_err;
185 stats->value[BNXT_RE_RES_RX_NO_PERM] =
186 rdev->stats.res_rx_no_perm;
187 stats->value[BNXT_RE_RES_RX_RANGE_ERR] =
188 rdev->stats.res_rx_range_err;
189 stats->value[BNXT_RE_RES_TX_INVALID_RKEY] =
190 rdev->stats.res_tx_invalid_rkey;
191 stats->value[BNXT_RE_RES_TX_DOMAIN_ERR] =
192 rdev->stats.res_tx_domain_err;
193 stats->value[BNXT_RE_RES_TX_NO_PERM] =
194 rdev->stats.res_tx_no_perm;
195 stats->value[BNXT_RE_RES_TX_RANGE_ERR] =
196 rdev->stats.res_tx_range_err;
197 stats->value[BNXT_RE_RES_IRRQ_OFLOW] =
198 rdev->stats.res_irrq_oflow;
199 stats->value[BNXT_RE_RES_UNSUP_OPCODE] =
200 rdev->stats.res_unsup_opcode;
201 stats->value[BNXT_RE_RES_UNALIGNED_ATOMIC] =
202 rdev->stats.res_unaligned_atomic;
203 stats->value[BNXT_RE_RES_REM_INV_ERR] =
204 rdev->stats.res_rem_inv_err;
205 stats->value[BNXT_RE_RES_MEM_ERROR] =
206 rdev->stats.res_mem_error;
207 stats->value[BNXT_RE_RES_SRQ_ERR] =
208 rdev->stats.res_srq_err;
209 stats->value[BNXT_RE_RES_CMP_ERR] =
210 rdev->stats.res_cmp_err;
211 stats->value[BNXT_RE_RES_INVALID_DUP_RKEY] =
212 rdev->stats.res_invalid_dup_rkey;
213 stats->value[BNXT_RE_RES_WQE_FORMAT_ERR] =
214 rdev->stats.res_wqe_format_err;
215 stats->value[BNXT_RE_RES_CQ_LOAD_ERR] =
216 rdev->stats.res_cq_load_err;
217 stats->value[BNXT_RE_RES_SRQ_LOAD_ERR] =
218 rdev->stats.res_srq_load_err;
219 stats->value[BNXT_RE_RES_TX_PCI_ERR] =
220 rdev->stats.res_tx_pci_err;
221 stats->value[BNXT_RE_RES_RX_PCI_ERR] =
222 rdev->stats.res_rx_pci_err;
223 }
224
100 return ARRAY_SIZE(bnxt_re_stat_name); 225 return ARRAY_SIZE(bnxt_re_stat_name);
101} 226}
102 227
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h
index be0dc0093b58..a01a922717d5 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.h
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h
@@ -51,6 +51,45 @@ enum bnxt_re_hw_stats {
51 BNXT_RE_TX_PKTS, 51 BNXT_RE_TX_PKTS,
52 BNXT_RE_TX_BYTES, 52 BNXT_RE_TX_BYTES,
53 BNXT_RE_RECOVERABLE_ERRORS, 53 BNXT_RE_RECOVERABLE_ERRORS,
54 BNXT_RE_TO_RETRANSMITS,
55 BNXT_RE_SEQ_ERR_NAKS_RCVD,
56 BNXT_RE_MAX_RETRY_EXCEEDED,
57 BNXT_RE_RNR_NAKS_RCVD,
58 BNXT_RE_MISSING_RESP,
59 BNXT_RE_UNRECOVERABLE_ERR,
60 BNXT_RE_BAD_RESP_ERR,
61 BNXT_RE_LOCAL_QP_OP_ERR,
62 BNXT_RE_LOCAL_PROTECTION_ERR,
63 BNXT_RE_MEM_MGMT_OP_ERR,
64 BNXT_RE_REMOTE_INVALID_REQ_ERR,
65 BNXT_RE_REMOTE_ACCESS_ERR,
66 BNXT_RE_REMOTE_OP_ERR,
67 BNXT_RE_DUP_REQ,
68 BNXT_RE_RES_EXCEED_MAX,
69 BNXT_RE_RES_LENGTH_MISMATCH,
70 BNXT_RE_RES_EXCEEDS_WQE,
71 BNXT_RE_RES_OPCODE_ERR,
72 BNXT_RE_RES_RX_INVALID_RKEY,
73 BNXT_RE_RES_RX_DOMAIN_ERR,
74 BNXT_RE_RES_RX_NO_PERM,
75 BNXT_RE_RES_RX_RANGE_ERR,
76 BNXT_RE_RES_TX_INVALID_RKEY,
77 BNXT_RE_RES_TX_DOMAIN_ERR,
78 BNXT_RE_RES_TX_NO_PERM,
79 BNXT_RE_RES_TX_RANGE_ERR,
80 BNXT_RE_RES_IRRQ_OFLOW,
81 BNXT_RE_RES_UNSUP_OPCODE,
82 BNXT_RE_RES_UNALIGNED_ATOMIC,
83 BNXT_RE_RES_REM_INV_ERR,
84 BNXT_RE_RES_MEM_ERROR,
85 BNXT_RE_RES_SRQ_ERR,
86 BNXT_RE_RES_CMP_ERR,
87 BNXT_RE_RES_INVALID_DUP_RKEY,
88 BNXT_RE_RES_WQE_FORMAT_ERR,
89 BNXT_RE_RES_CQ_LOAD_ERR,
90 BNXT_RE_RES_SRQ_LOAD_ERR,
91 BNXT_RE_RES_TX_PCI_ERR,
92 BNXT_RE_RES_RX_PCI_ERR,
54 BNXT_RE_NUM_COUNTERS 93 BNXT_RE_NUM_COUNTERS
55}; 94};
56 95
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 2032db7db766..9b8fa77b8831 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -141,12 +141,13 @@ int bnxt_re_query_device(struct ib_device *ibdev,
141 struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; 141 struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
142 142
143 memset(ib_attr, 0, sizeof(*ib_attr)); 143 memset(ib_attr, 0, sizeof(*ib_attr));
144 144 memcpy(&ib_attr->fw_ver, dev_attr->fw_ver,
145 ib_attr->fw_ver = (u64)(unsigned long)(dev_attr->fw_ver); 145 min(sizeof(dev_attr->fw_ver),
146 sizeof(ib_attr->fw_ver)));
146 bnxt_qplib_get_guid(rdev->netdev->dev_addr, 147 bnxt_qplib_get_guid(rdev->netdev->dev_addr,
147 (u8 *)&ib_attr->sys_image_guid); 148 (u8 *)&ib_attr->sys_image_guid);
148 ib_attr->max_mr_size = BNXT_RE_MAX_MR_SIZE; 149 ib_attr->max_mr_size = BNXT_RE_MAX_MR_SIZE;
149 ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_4K; 150 ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M;
150 151
151 ib_attr->vendor_id = rdev->en_dev->pdev->vendor; 152 ib_attr->vendor_id = rdev->en_dev->pdev->vendor;
152 ib_attr->vendor_part_id = rdev->en_dev->pdev->device; 153 ib_attr->vendor_part_id = rdev->en_dev->pdev->device;
@@ -247,8 +248,7 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
247 IB_PORT_VENDOR_CLASS_SUP | 248 IB_PORT_VENDOR_CLASS_SUP |
248 IB_PORT_IP_BASED_GIDS; 249 IB_PORT_IP_BASED_GIDS;
249 250
250 /* Max MSG size set to 2G for now */ 251 port_attr->max_msg_sz = (u32)BNXT_RE_MAX_MR_SIZE_LOW;
251 port_attr->max_msg_sz = 0x80000000;
252 port_attr->bad_pkey_cntr = 0; 252 port_attr->bad_pkey_cntr = 0;
253 port_attr->qkey_viol_cntr = 0; 253 port_attr->qkey_viol_cntr = 0;
254 port_attr->pkey_tbl_len = dev_attr->max_pkey; 254 port_attr->pkey_tbl_len = dev_attr->max_pkey;
@@ -281,6 +281,15 @@ int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num,
281 return 0; 281 return 0;
282} 282}
283 283
284void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str)
285{
286 struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
287
288 snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d",
289 rdev->dev_attr.fw_ver[0], rdev->dev_attr.fw_ver[1],
290 rdev->dev_attr.fw_ver[2], rdev->dev_attr.fw_ver[3]);
291}
292
284int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num, 293int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
285 u16 index, u16 *pkey) 294 u16 index, u16 *pkey)
286{ 295{
@@ -532,7 +541,7 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
532 mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES; 541 mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES;
533 pbl_tbl = dma_addr; 542 pbl_tbl = dma_addr;
534 rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl_tbl, 543 rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl_tbl,
535 BNXT_RE_FENCE_PBL_SIZE, false); 544 BNXT_RE_FENCE_PBL_SIZE, false, PAGE_SIZE);
536 if (rc) { 545 if (rc) {
537 dev_err(rdev_to_dev(rdev), "Failed to register fence-MR\n"); 546 dev_err(rdev_to_dev(rdev), "Failed to register fence-MR\n");
538 goto fail; 547 goto fail;
@@ -1018,6 +1027,7 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
1018 struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; 1027 struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
1019 struct bnxt_re_qp *qp; 1028 struct bnxt_re_qp *qp;
1020 struct bnxt_re_cq *cq; 1029 struct bnxt_re_cq *cq;
1030 struct bnxt_re_srq *srq;
1021 int rc, entries; 1031 int rc, entries;
1022 1032
1023 if ((qp_init_attr->cap.max_send_wr > dev_attr->max_qp_wqes) || 1033 if ((qp_init_attr->cap.max_send_wr > dev_attr->max_qp_wqes) ||
@@ -1073,9 +1083,15 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
1073 } 1083 }
1074 1084
1075 if (qp_init_attr->srq) { 1085 if (qp_init_attr->srq) {
1076 dev_err(rdev_to_dev(rdev), "SRQ not supported"); 1086 srq = container_of(qp_init_attr->srq, struct bnxt_re_srq,
1077 rc = -ENOTSUPP; 1087 ib_srq);
1078 goto fail; 1088 if (!srq) {
1089 dev_err(rdev_to_dev(rdev), "SRQ not found");
1090 rc = -EINVAL;
1091 goto fail;
1092 }
1093 qp->qplib_qp.srq = &srq->qplib_srq;
1094 qp->qplib_qp.rq.max_wqe = 0;
1079 } else { 1095 } else {
1080 /* Allocate 1 more than what's provided so posting max doesn't 1096 /* Allocate 1 more than what's provided so posting max doesn't
1081 * mean empty 1097 * mean empty
@@ -1280,6 +1296,237 @@ static enum ib_mtu __to_ib_mtu(u32 mtu)
1280 } 1296 }
1281} 1297}
1282 1298
1299/* Shared Receive Queues */
1300int bnxt_re_destroy_srq(struct ib_srq *ib_srq)
1301{
1302 struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
1303 ib_srq);
1304 struct bnxt_re_dev *rdev = srq->rdev;
1305 struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq;
1306 struct bnxt_qplib_nq *nq = NULL;
1307 int rc;
1308
1309 if (qplib_srq->cq)
1310 nq = qplib_srq->cq->nq;
1311 rc = bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq);
1312 if (rc) {
1313 dev_err(rdev_to_dev(rdev), "Destroy HW SRQ failed!");
1314 return rc;
1315 }
1316
1317 if (srq->umem && !IS_ERR(srq->umem))
1318 ib_umem_release(srq->umem);
1319 kfree(srq);
1320 atomic_dec(&rdev->srq_count);
1321 if (nq)
1322 nq->budget--;
1323 return 0;
1324}
1325
1326static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev,
1327 struct bnxt_re_pd *pd,
1328 struct bnxt_re_srq *srq,
1329 struct ib_udata *udata)
1330{
1331 struct bnxt_re_srq_req ureq;
1332 struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq;
1333 struct ib_umem *umem;
1334 int bytes = 0;
1335 struct ib_ucontext *context = pd->ib_pd.uobject->context;
1336 struct bnxt_re_ucontext *cntx = container_of(context,
1337 struct bnxt_re_ucontext,
1338 ib_uctx);
1339 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
1340 return -EFAULT;
1341
1342 bytes = (qplib_srq->max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
1343 bytes = PAGE_ALIGN(bytes);
1344 umem = ib_umem_get(context, ureq.srqva, bytes,
1345 IB_ACCESS_LOCAL_WRITE, 1);
1346 if (IS_ERR(umem))
1347 return PTR_ERR(umem);
1348
1349 srq->umem = umem;
1350 qplib_srq->nmap = umem->nmap;
1351 qplib_srq->sglist = umem->sg_head.sgl;
1352 qplib_srq->srq_handle = ureq.srq_handle;
1353 qplib_srq->dpi = &cntx->dpi;
1354
1355 return 0;
1356}
1357
1358struct ib_srq *bnxt_re_create_srq(struct ib_pd *ib_pd,
1359 struct ib_srq_init_attr *srq_init_attr,
1360 struct ib_udata *udata)
1361{
1362 struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
1363 struct bnxt_re_dev *rdev = pd->rdev;
1364 struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
1365 struct bnxt_re_srq *srq;
1366 struct bnxt_qplib_nq *nq = NULL;
1367 int rc, entries;
1368
1369 if (srq_init_attr->attr.max_wr >= dev_attr->max_srq_wqes) {
1370 dev_err(rdev_to_dev(rdev), "Create CQ failed - max exceeded");
1371 rc = -EINVAL;
1372 goto exit;
1373 }
1374
1375 if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
1376 rc = -ENOTSUPP;
1377 goto exit;
1378 }
1379
1380 srq = kzalloc(sizeof(*srq), GFP_KERNEL);
1381 if (!srq) {
1382 rc = -ENOMEM;
1383 goto exit;
1384 }
1385 srq->rdev = rdev;
1386 srq->qplib_srq.pd = &pd->qplib_pd;
1387 srq->qplib_srq.dpi = &rdev->dpi_privileged;
1388 /* Allocate 1 more than what's provided so posting max doesn't
1389 * mean empty
1390 */
1391 entries = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
1392 if (entries > dev_attr->max_srq_wqes + 1)
1393 entries = dev_attr->max_srq_wqes + 1;
1394
1395 srq->qplib_srq.max_wqe = entries;
1396 srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge;
1397 srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit;
1398 srq->srq_limit = srq_init_attr->attr.srq_limit;
1399 srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id;
1400 nq = &rdev->nq[0];
1401
1402 if (udata) {
1403 rc = bnxt_re_init_user_srq(rdev, pd, srq, udata);
1404 if (rc)
1405 goto fail;
1406 }
1407
1408 rc = bnxt_qplib_create_srq(&rdev->qplib_res, &srq->qplib_srq);
1409 if (rc) {
1410 dev_err(rdev_to_dev(rdev), "Create HW SRQ failed!");
1411 goto fail;
1412 }
1413
1414 if (udata) {
1415 struct bnxt_re_srq_resp resp;
1416
1417 resp.srqid = srq->qplib_srq.id;
1418 rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
1419 if (rc) {
1420 dev_err(rdev_to_dev(rdev), "SRQ copy to udata failed!");
1421 bnxt_qplib_destroy_srq(&rdev->qplib_res,
1422 &srq->qplib_srq);
1423 goto exit;
1424 }
1425 }
1426 if (nq)
1427 nq->budget++;
1428 atomic_inc(&rdev->srq_count);
1429
1430 return &srq->ib_srq;
1431
1432fail:
1433 if (udata && srq->umem && !IS_ERR(srq->umem)) {
1434 ib_umem_release(srq->umem);
1435 srq->umem = NULL;
1436 }
1437
1438 kfree(srq);
1439exit:
1440 return ERR_PTR(rc);
1441}
1442
1443int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr,
1444 enum ib_srq_attr_mask srq_attr_mask,
1445 struct ib_udata *udata)
1446{
1447 struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
1448 ib_srq);
1449 struct bnxt_re_dev *rdev = srq->rdev;
1450 int rc;
1451
1452 switch (srq_attr_mask) {
1453 case IB_SRQ_MAX_WR:
1454 /* SRQ resize is not supported */
1455 break;
1456 case IB_SRQ_LIMIT:
1457 /* Change the SRQ threshold */
1458 if (srq_attr->srq_limit > srq->qplib_srq.max_wqe)
1459 return -EINVAL;
1460
1461 srq->qplib_srq.threshold = srq_attr->srq_limit;
1462 rc = bnxt_qplib_modify_srq(&rdev->qplib_res, &srq->qplib_srq);
1463 if (rc) {
1464 dev_err(rdev_to_dev(rdev), "Modify HW SRQ failed!");
1465 return rc;
1466 }
1467 /* On success, update the shadow */
1468 srq->srq_limit = srq_attr->srq_limit;
1469 /* No need to Build and send response back to udata */
1470 break;
1471 default:
1472 dev_err(rdev_to_dev(rdev),
1473 "Unsupported srq_attr_mask 0x%x", srq_attr_mask);
1474 return -EINVAL;
1475 }
1476 return 0;
1477}
1478
1479int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr)
1480{
1481 struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
1482 ib_srq);
1483 struct bnxt_re_srq tsrq;
1484 struct bnxt_re_dev *rdev = srq->rdev;
1485 int rc;
1486
1487 /* Get live SRQ attr */
1488 tsrq.qplib_srq.id = srq->qplib_srq.id;
1489 rc = bnxt_qplib_query_srq(&rdev->qplib_res, &tsrq.qplib_srq);
1490 if (rc) {
1491 dev_err(rdev_to_dev(rdev), "Query HW SRQ failed!");
1492 return rc;
1493 }
1494 srq_attr->max_wr = srq->qplib_srq.max_wqe;
1495 srq_attr->max_sge = srq->qplib_srq.max_sge;
1496 srq_attr->srq_limit = tsrq.qplib_srq.threshold;
1497
1498 return 0;
1499}
1500
1501int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, struct ib_recv_wr *wr,
1502 struct ib_recv_wr **bad_wr)
1503{
1504 struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
1505 ib_srq);
1506 struct bnxt_qplib_swqe wqe;
1507 unsigned long flags;
1508 int rc = 0, payload_sz = 0;
1509
1510 spin_lock_irqsave(&srq->lock, flags);
1511 while (wr) {
1512 /* Transcribe each ib_recv_wr to qplib_swqe */
1513 wqe.num_sge = wr->num_sge;
1514 payload_sz = bnxt_re_build_sgl(wr->sg_list, wqe.sg_list,
1515 wr->num_sge);
1516 wqe.wr_id = wr->wr_id;
1517 wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV;
1518
1519 rc = bnxt_qplib_post_srq_recv(&srq->qplib_srq, &wqe);
1520 if (rc) {
1521 *bad_wr = wr;
1522 break;
1523 }
1524 wr = wr->next;
1525 }
1526 spin_unlock_irqrestore(&srq->lock, flags);
1527
1528 return rc;
1529}
1283static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev, 1530static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev,
1284 struct bnxt_re_qp *qp1_qp, 1531 struct bnxt_re_qp *qp1_qp,
1285 int qp_attr_mask) 1532 int qp_attr_mask)
@@ -2295,10 +2542,14 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr,
2295/* Completion Queues */ 2542/* Completion Queues */
2296int bnxt_re_destroy_cq(struct ib_cq *ib_cq) 2543int bnxt_re_destroy_cq(struct ib_cq *ib_cq)
2297{ 2544{
2298 struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
2299 struct bnxt_re_dev *rdev = cq->rdev;
2300 int rc; 2545 int rc;
2301 struct bnxt_qplib_nq *nq = cq->qplib_cq.nq; 2546 struct bnxt_re_cq *cq;
2547 struct bnxt_qplib_nq *nq;
2548 struct bnxt_re_dev *rdev;
2549
2550 cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
2551 rdev = cq->rdev;
2552 nq = cq->qplib_cq.nq;
2302 2553
2303 rc = bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq); 2554 rc = bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
2304 if (rc) { 2555 if (rc) {
@@ -2308,12 +2559,11 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq)
2308 if (!IS_ERR_OR_NULL(cq->umem)) 2559 if (!IS_ERR_OR_NULL(cq->umem))
2309 ib_umem_release(cq->umem); 2560 ib_umem_release(cq->umem);
2310 2561
2311 if (cq) {
2312 kfree(cq->cql);
2313 kfree(cq);
2314 }
2315 atomic_dec(&rdev->cq_count); 2562 atomic_dec(&rdev->cq_count);
2316 nq->budget--; 2563 nq->budget--;
2564 kfree(cq->cql);
2565 kfree(cq);
2566
2317 return 0; 2567 return 0;
2318} 2568}
2319 2569
@@ -3078,7 +3328,8 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
3078 3328
3079 mr->qplib_mr.hwq.level = PBL_LVL_MAX; 3329 mr->qplib_mr.hwq.level = PBL_LVL_MAX;
3080 mr->qplib_mr.total_size = -1; /* Infinte length */ 3330 mr->qplib_mr.total_size = -1; /* Infinte length */
3081 rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl, 0, false); 3331 rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl, 0, false,
3332 PAGE_SIZE);
3082 if (rc) 3333 if (rc)
3083 goto fail_mr; 3334 goto fail_mr;
3084 3335
@@ -3104,10 +3355,8 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
3104 int rc; 3355 int rc;
3105 3356
3106 rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr); 3357 rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
3107 if (rc) { 3358 if (rc)
3108 dev_err(rdev_to_dev(rdev), "Dereg MR failed: %#x\n", rc); 3359 dev_err(rdev_to_dev(rdev), "Dereg MR failed: %#x\n", rc);
3109 return rc;
3110 }
3111 3360
3112 if (mr->pages) { 3361 if (mr->pages) {
3113 rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res, 3362 rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
@@ -3170,7 +3419,7 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
3170 3419
3171 rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr); 3420 rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
3172 if (rc) 3421 if (rc)
3173 goto fail; 3422 goto bail;
3174 3423
3175 mr->ib_mr.lkey = mr->qplib_mr.lkey; 3424 mr->ib_mr.lkey = mr->qplib_mr.lkey;
3176 mr->ib_mr.rkey = mr->ib_mr.lkey; 3425 mr->ib_mr.rkey = mr->ib_mr.lkey;
@@ -3192,9 +3441,10 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
3192 return &mr->ib_mr; 3441 return &mr->ib_mr;
3193 3442
3194fail_mr: 3443fail_mr:
3195 bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
3196fail:
3197 kfree(mr->pages); 3444 kfree(mr->pages);
3445fail:
3446 bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
3447bail:
3198 kfree(mr); 3448 kfree(mr);
3199 return ERR_PTR(rc); 3449 return ERR_PTR(rc);
3200} 3450}
@@ -3248,6 +3498,46 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw)
3248 return rc; 3498 return rc;
3249} 3499}
3250 3500
3501static int bnxt_re_page_size_ok(int page_shift)
3502{
3503 switch (page_shift) {
3504 case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4K:
3505 case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_8K:
3506 case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_64K:
3507 case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_2M:
3508 case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_256K:
3509 case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1M:
3510 case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4M:
3511 case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G:
3512 return 1;
3513 default:
3514 return 0;
3515 }
3516}
3517
3518static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig,
3519 int page_shift)
3520{
3521 u64 *pbl_tbl = pbl_tbl_orig;
3522 u64 paddr;
3523 u64 page_mask = (1ULL << page_shift) - 1;
3524 int i, pages;
3525 struct scatterlist *sg;
3526 int entry;
3527
3528 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
3529 pages = sg_dma_len(sg) >> PAGE_SHIFT;
3530 for (i = 0; i < pages; i++) {
3531 paddr = sg_dma_address(sg) + (i << PAGE_SHIFT);
3532 if (pbl_tbl == pbl_tbl_orig)
3533 *pbl_tbl++ = paddr & ~page_mask;
3534 else if ((paddr & page_mask) == 0)
3535 *pbl_tbl++ = paddr;
3536 }
3537 }
3538 return pbl_tbl - pbl_tbl_orig;
3539}
3540
3251/* uverbs */ 3541/* uverbs */
3252struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, 3542struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
3253 u64 virt_addr, int mr_access_flags, 3543 u64 virt_addr, int mr_access_flags,
@@ -3257,10 +3547,8 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
3257 struct bnxt_re_dev *rdev = pd->rdev; 3547 struct bnxt_re_dev *rdev = pd->rdev;
3258 struct bnxt_re_mr *mr; 3548 struct bnxt_re_mr *mr;
3259 struct ib_umem *umem; 3549 struct ib_umem *umem;
3260 u64 *pbl_tbl, *pbl_tbl_orig; 3550 u64 *pbl_tbl = NULL;
3261 int i, umem_pgs, pages, rc; 3551 int umem_pgs, page_shift, rc;
3262 struct scatterlist *sg;
3263 int entry;
3264 3552
3265 if (length > BNXT_RE_MAX_MR_SIZE) { 3553 if (length > BNXT_RE_MAX_MR_SIZE) {
3266 dev_err(rdev_to_dev(rdev), "MR Size: %lld > Max supported:%ld\n", 3554 dev_err(rdev_to_dev(rdev), "MR Size: %lld > Max supported:%ld\n",
@@ -3277,64 +3565,68 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
3277 mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags); 3565 mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
3278 mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR; 3566 mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR;
3279 3567
3568 rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
3569 if (rc) {
3570 dev_err(rdev_to_dev(rdev), "Failed to allocate MR");
3571 goto free_mr;
3572 }
3573 /* The fixed portion of the rkey is the same as the lkey */
3574 mr->ib_mr.rkey = mr->qplib_mr.rkey;
3575
3280 umem = ib_umem_get(ib_pd->uobject->context, start, length, 3576 umem = ib_umem_get(ib_pd->uobject->context, start, length,
3281 mr_access_flags, 0); 3577 mr_access_flags, 0);
3282 if (IS_ERR(umem)) { 3578 if (IS_ERR(umem)) {
3283 dev_err(rdev_to_dev(rdev), "Failed to get umem"); 3579 dev_err(rdev_to_dev(rdev), "Failed to get umem");
3284 rc = -EFAULT; 3580 rc = -EFAULT;
3285 goto free_mr; 3581 goto free_mrw;
3286 } 3582 }
3287 mr->ib_umem = umem; 3583 mr->ib_umem = umem;
3288 3584
3289 rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
3290 if (rc) {
3291 dev_err(rdev_to_dev(rdev), "Failed to allocate MR");
3292 goto release_umem;
3293 }
3294 /* The fixed portion of the rkey is the same as the lkey */
3295 mr->ib_mr.rkey = mr->qplib_mr.rkey;
3296
3297 mr->qplib_mr.va = virt_addr; 3585 mr->qplib_mr.va = virt_addr;
3298 umem_pgs = ib_umem_page_count(umem); 3586 umem_pgs = ib_umem_page_count(umem);
3299 if (!umem_pgs) { 3587 if (!umem_pgs) {
3300 dev_err(rdev_to_dev(rdev), "umem is invalid!"); 3588 dev_err(rdev_to_dev(rdev), "umem is invalid!");
3301 rc = -EINVAL; 3589 rc = -EINVAL;
3302 goto free_mrw; 3590 goto free_umem;
3303 } 3591 }
3304 mr->qplib_mr.total_size = length; 3592 mr->qplib_mr.total_size = length;
3305 3593
3306 pbl_tbl = kcalloc(umem_pgs, sizeof(u64 *), GFP_KERNEL); 3594 pbl_tbl = kcalloc(umem_pgs, sizeof(u64 *), GFP_KERNEL);
3307 if (!pbl_tbl) { 3595 if (!pbl_tbl) {
3308 rc = -EINVAL; 3596 rc = -ENOMEM;
3309 goto free_mrw; 3597 goto free_umem;
3310 } 3598 }
3311 pbl_tbl_orig = pbl_tbl;
3312 3599
3313 if (umem->hugetlb) { 3600 page_shift = umem->page_shift;
3314 dev_err(rdev_to_dev(rdev), "umem hugetlb not supported!"); 3601
3602 if (!bnxt_re_page_size_ok(page_shift)) {
3603 dev_err(rdev_to_dev(rdev), "umem page size unsupported!");
3315 rc = -EFAULT; 3604 rc = -EFAULT;
3316 goto fail; 3605 goto fail;
3317 } 3606 }
3318 3607
3319 if (umem->page_shift != PAGE_SHIFT) { 3608 if (!umem->hugetlb && length > BNXT_RE_MAX_MR_SIZE_LOW) {
3320 dev_err(rdev_to_dev(rdev), "umem page shift unsupported!"); 3609 dev_err(rdev_to_dev(rdev), "Requested MR Sz:%llu Max sup:%llu",
3321 rc = -EFAULT; 3610 length, (u64)BNXT_RE_MAX_MR_SIZE_LOW);
3611 rc = -EINVAL;
3322 goto fail; 3612 goto fail;
3323 } 3613 }
3324 /* Map umem buf ptrs to the PBL */ 3614 if (umem->hugetlb && length > BNXT_RE_PAGE_SIZE_2M) {
3325 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 3615 page_shift = BNXT_RE_PAGE_SHIFT_2M;
3326 pages = sg_dma_len(sg) >> umem->page_shift; 3616 dev_warn(rdev_to_dev(rdev), "umem hugetlb set page_size %x",
3327 for (i = 0; i < pages; i++, pbl_tbl++) 3617 1 << page_shift);
3328 *pbl_tbl = sg_dma_address(sg) + (i << umem->page_shift);
3329 } 3618 }
3330 rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl_orig, 3619
3331 umem_pgs, false); 3620 /* Map umem buf ptrs to the PBL */
3621 umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, page_shift);
3622 rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl,
3623 umem_pgs, false, 1 << page_shift);
3332 if (rc) { 3624 if (rc) {
3333 dev_err(rdev_to_dev(rdev), "Failed to register user MR"); 3625 dev_err(rdev_to_dev(rdev), "Failed to register user MR");
3334 goto fail; 3626 goto fail;
3335 } 3627 }
3336 3628
3337 kfree(pbl_tbl_orig); 3629 kfree(pbl_tbl);
3338 3630
3339 mr->ib_mr.lkey = mr->qplib_mr.lkey; 3631 mr->ib_mr.lkey = mr->qplib_mr.lkey;
3340 mr->ib_mr.rkey = mr->qplib_mr.lkey; 3632 mr->ib_mr.rkey = mr->qplib_mr.lkey;
@@ -3342,11 +3634,11 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
3342 3634
3343 return &mr->ib_mr; 3635 return &mr->ib_mr;
3344fail: 3636fail:
3345 kfree(pbl_tbl_orig); 3637 kfree(pbl_tbl);
3638free_umem:
3639 ib_umem_release(umem);
3346free_mrw: 3640free_mrw:
3347 bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr); 3641 bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
3348release_umem:
3349 ib_umem_release(umem);
3350free_mr: 3642free_mr:
3351 kfree(mr); 3643 kfree(mr);
3352 return ERR_PTR(rc); 3644 return ERR_PTR(rc);
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 1df11ed272ea..423ebe012f95 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -68,6 +68,15 @@ struct bnxt_re_ah {
68 struct bnxt_qplib_ah qplib_ah; 68 struct bnxt_qplib_ah qplib_ah;
69}; 69};
70 70
71struct bnxt_re_srq {
72 struct bnxt_re_dev *rdev;
73 u32 srq_limit;
74 struct ib_srq ib_srq;
75 struct bnxt_qplib_srq qplib_srq;
76 struct ib_umem *umem;
77 spinlock_t lock; /* protect srq */
78};
79
71struct bnxt_re_qp { 80struct bnxt_re_qp {
72 struct list_head list; 81 struct list_head list;
73 struct bnxt_re_dev *rdev; 82 struct bnxt_re_dev *rdev;
@@ -143,6 +152,7 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
143 struct ib_port_attr *port_attr); 152 struct ib_port_attr *port_attr);
144int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num, 153int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num,
145 struct ib_port_immutable *immutable); 154 struct ib_port_immutable *immutable);
155void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str);
146int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num, 156int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
147 u16 index, u16 *pkey); 157 u16 index, u16 *pkey);
148int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num, 158int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
@@ -164,6 +174,16 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd,
164int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); 174int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
165int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); 175int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
166int bnxt_re_destroy_ah(struct ib_ah *ah); 176int bnxt_re_destroy_ah(struct ib_ah *ah);
177struct ib_srq *bnxt_re_create_srq(struct ib_pd *pd,
178 struct ib_srq_init_attr *srq_init_attr,
179 struct ib_udata *udata);
180int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
181 enum ib_srq_attr_mask srq_attr_mask,
182 struct ib_udata *udata);
183int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
184int bnxt_re_destroy_srq(struct ib_srq *srq);
185int bnxt_re_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *recv_wr,
186 struct ib_recv_wr **bad_recv_wr);
167struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd, 187struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd,
168 struct ib_qp_init_attr *qp_init_attr, 188 struct ib_qp_init_attr *qp_init_attr,
169 struct ib_udata *udata); 189 struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index aafc19aa5de1..508d00a5a106 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -80,6 +80,79 @@ static DEFINE_MUTEX(bnxt_re_dev_lock);
80static struct workqueue_struct *bnxt_re_wq; 80static struct workqueue_struct *bnxt_re_wq;
81static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait); 81static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait);
82 82
83/* SR-IOV helper functions */
84
85static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev)
86{
87 struct bnxt *bp;
88
89 bp = netdev_priv(rdev->en_dev->net);
90 if (BNXT_VF(bp))
91 rdev->is_virtfn = 1;
92}
93
94/* Set the maximum number of each resource that the driver actually wants
95 * to allocate. This may be up to the maximum number the firmware has
96 * reserved for the function. The driver may choose to allocate fewer
97 * resources than the firmware maximum.
98 */
99static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
100{
101 u32 vf_qps = 0, vf_srqs = 0, vf_cqs = 0, vf_mrws = 0, vf_gids = 0;
102 u32 i;
103 u32 vf_pct;
104 u32 num_vfs;
105 struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
106
107 rdev->qplib_ctx.qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT,
108 dev_attr->max_qp);
109
110 rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT_256K;
111 /* Use max_mr from fw since max_mrw does not get set */
112 rdev->qplib_ctx.mrw_count = min_t(u32, rdev->qplib_ctx.mrw_count,
113 dev_attr->max_mr);
114 rdev->qplib_ctx.srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT,
115 dev_attr->max_srq);
116 rdev->qplib_ctx.cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT,
117 dev_attr->max_cq);
118
119 for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
120 rdev->qplib_ctx.tqm_count[i] =
121 rdev->dev_attr.tqm_alloc_reqs[i];
122
123 if (rdev->num_vfs) {
124 /*
125 * Reserve a set of resources for the PF. Divide the remaining
126 * resources among the VFs
127 */
128 vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF;
129 num_vfs = 100 * rdev->num_vfs;
130 vf_qps = (rdev->qplib_ctx.qpc_count * vf_pct) / num_vfs;
131 vf_srqs = (rdev->qplib_ctx.srqc_count * vf_pct) / num_vfs;
132 vf_cqs = (rdev->qplib_ctx.cq_count * vf_pct) / num_vfs;
133 /*
134 * The driver allows many more MRs than other resources. If the
135 * firmware does also, then reserve a fixed amount for the PF
136 * and divide the rest among VFs. VFs may use many MRs for NFS
137 * mounts, ISER, NVME applications, etc. If the firmware
138 * severely restricts the number of MRs, then let PF have
139 * half and divide the rest among VFs, as for the other
140 * resource types.
141 */
142 if (rdev->qplib_ctx.mrw_count < BNXT_RE_MAX_MRW_COUNT_64K)
143 vf_mrws = rdev->qplib_ctx.mrw_count * vf_pct / num_vfs;
144 else
145 vf_mrws = (rdev->qplib_ctx.mrw_count -
146 BNXT_RE_RESVD_MR_FOR_PF) / rdev->num_vfs;
147 vf_gids = BNXT_RE_MAX_GID_PER_VF;
148 }
149 rdev->qplib_ctx.vf_res.max_mrw_per_vf = vf_mrws;
150 rdev->qplib_ctx.vf_res.max_gid_per_vf = vf_gids;
151 rdev->qplib_ctx.vf_res.max_qp_per_vf = vf_qps;
152 rdev->qplib_ctx.vf_res.max_srq_per_vf = vf_srqs;
153 rdev->qplib_ctx.vf_res.max_cq_per_vf = vf_cqs;
154}
155
83/* for handling bnxt_en callbacks later */ 156/* for handling bnxt_en callbacks later */
84static void bnxt_re_stop(void *p) 157static void bnxt_re_stop(void *p)
85{ 158{
@@ -91,6 +164,15 @@ static void bnxt_re_start(void *p)
91 164
92static void bnxt_re_sriov_config(void *p, int num_vfs) 165static void bnxt_re_sriov_config(void *p, int num_vfs)
93{ 166{
167 struct bnxt_re_dev *rdev = p;
168
169 if (!rdev)
170 return;
171
172 rdev->num_vfs = num_vfs;
173 bnxt_re_set_resource_limits(rdev);
174 bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
175 &rdev->qplib_ctx);
94} 176}
95 177
96static void bnxt_re_shutdown(void *p) 178static void bnxt_re_shutdown(void *p)
@@ -417,7 +499,7 @@ static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
417 return ERR_PTR(-EINVAL); 499 return ERR_PTR(-EINVAL);
418 500
419 if (!(en_dev->flags & BNXT_EN_FLAG_ROCE_CAP)) { 501 if (!(en_dev->flags & BNXT_EN_FLAG_ROCE_CAP)) {
420 dev_dbg(&pdev->dev, 502 dev_info(&pdev->dev,
421 "%s: probe error: RoCE is not supported on this device", 503 "%s: probe error: RoCE is not supported on this device",
422 ROCE_DRV_MODULE_NAME); 504 ROCE_DRV_MODULE_NAME);
423 return ERR_PTR(-ENODEV); 505 return ERR_PTR(-ENODEV);
@@ -490,6 +572,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
490 572
491 ibdev->query_port = bnxt_re_query_port; 573 ibdev->query_port = bnxt_re_query_port;
492 ibdev->get_port_immutable = bnxt_re_get_port_immutable; 574 ibdev->get_port_immutable = bnxt_re_get_port_immutable;
575 ibdev->get_dev_fw_str = bnxt_re_query_fw_str;
493 ibdev->query_pkey = bnxt_re_query_pkey; 576 ibdev->query_pkey = bnxt_re_query_pkey;
494 ibdev->query_gid = bnxt_re_query_gid; 577 ibdev->query_gid = bnxt_re_query_gid;
495 ibdev->get_netdev = bnxt_re_get_netdev; 578 ibdev->get_netdev = bnxt_re_get_netdev;
@@ -505,6 +588,12 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
505 ibdev->query_ah = bnxt_re_query_ah; 588 ibdev->query_ah = bnxt_re_query_ah;
506 ibdev->destroy_ah = bnxt_re_destroy_ah; 589 ibdev->destroy_ah = bnxt_re_destroy_ah;
507 590
591 ibdev->create_srq = bnxt_re_create_srq;
592 ibdev->modify_srq = bnxt_re_modify_srq;
593 ibdev->query_srq = bnxt_re_query_srq;
594 ibdev->destroy_srq = bnxt_re_destroy_srq;
595 ibdev->post_srq_recv = bnxt_re_post_srq_recv;
596
508 ibdev->create_qp = bnxt_re_create_qp; 597 ibdev->create_qp = bnxt_re_create_qp;
509 ibdev->modify_qp = bnxt_re_modify_qp; 598 ibdev->modify_qp = bnxt_re_modify_qp;
510 ibdev->query_qp = bnxt_re_query_qp; 599 ibdev->query_qp = bnxt_re_query_qp;
@@ -541,14 +630,6 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr,
541 return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor); 630 return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
542} 631}
543 632
544static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
545 char *buf)
546{
547 struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
548
549 return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->dev_attr.fw_ver);
550}
551
552static ssize_t show_hca(struct device *device, struct device_attribute *attr, 633static ssize_t show_hca(struct device *device, struct device_attribute *attr,
553 char *buf) 634 char *buf)
554{ 635{
@@ -558,12 +639,10 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
558} 639}
559 640
560static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL); 641static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL);
561static DEVICE_ATTR(fw_rev, 0444, show_fw_ver, NULL);
562static DEVICE_ATTR(hca_type, 0444, show_hca, NULL); 642static DEVICE_ATTR(hca_type, 0444, show_hca, NULL);
563 643
564static struct device_attribute *bnxt_re_attributes[] = { 644static struct device_attribute *bnxt_re_attributes[] = {
565 &dev_attr_hw_rev, 645 &dev_attr_hw_rev,
566 &dev_attr_fw_rev,
567 &dev_attr_hca_type 646 &dev_attr_hca_type
568}; 647};
569 648
@@ -616,10 +695,10 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
616 return rdev; 695 return rdev;
617} 696}
618 697
619static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw, 698static int bnxt_re_handle_unaffi_async_event(struct creq_func_event
620 struct creq_func_event *aeqe) 699 *unaffi_async)
621{ 700{
622 switch (aeqe->event) { 701 switch (unaffi_async->event) {
623 case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR: 702 case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
624 break; 703 break;
625 case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR: 704 case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
@@ -648,6 +727,93 @@ static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
648 return 0; 727 return 0;
649} 728}
650 729
730static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
731 struct bnxt_re_qp *qp)
732{
733 struct ib_event event;
734
735 memset(&event, 0, sizeof(event));
736 if (qp->qplib_qp.srq) {
737 event.device = &qp->rdev->ibdev;
738 event.element.qp = &qp->ib_qp;
739 event.event = IB_EVENT_QP_LAST_WQE_REACHED;
740 }
741
742 if (event.device && qp->ib_qp.event_handler)
743 qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
744
745 return 0;
746}
747
748static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async,
749 void *obj)
750{
751 int rc = 0;
752 u8 event;
753
754 if (!obj)
755 return rc; /* QP was already dead, still return success */
756
757 event = affi_async->event;
758 if (event == CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION) {
759 struct bnxt_qplib_qp *lib_qp = obj;
760 struct bnxt_re_qp *qp = container_of(lib_qp, struct bnxt_re_qp,
761 qplib_qp);
762 rc = bnxt_re_handle_qp_async_event(affi_async, qp);
763 }
764 return rc;
765}
766
767static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
768 void *aeqe, void *obj)
769{
770 struct creq_qp_event *affi_async;
771 struct creq_func_event *unaffi_async;
772 u8 type;
773 int rc;
774
775 type = ((struct creq_base *)aeqe)->type;
776 if (type == CREQ_BASE_TYPE_FUNC_EVENT) {
777 unaffi_async = aeqe;
778 rc = bnxt_re_handle_unaffi_async_event(unaffi_async);
779 } else {
780 affi_async = aeqe;
781 rc = bnxt_re_handle_affi_async_event(affi_async, obj);
782 }
783
784 return rc;
785}
786
787static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq,
788 struct bnxt_qplib_srq *handle, u8 event)
789{
790 struct bnxt_re_srq *srq = container_of(handle, struct bnxt_re_srq,
791 qplib_srq);
792 struct ib_event ib_event;
793 int rc = 0;
794
795 if (!srq) {
796 dev_err(NULL, "%s: SRQ is NULL, SRQN not handled",
797 ROCE_DRV_MODULE_NAME);
798 rc = -EINVAL;
799 goto done;
800 }
801 ib_event.device = &srq->rdev->ibdev;
802 ib_event.element.srq = &srq->ib_srq;
803 if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT)
804 ib_event.event = IB_EVENT_SRQ_LIMIT_REACHED;
805 else
806 ib_event.event = IB_EVENT_SRQ_ERR;
807
808 if (srq->ib_srq.event_handler) {
809 /* Lock event_handler? */
810 (*srq->ib_srq.event_handler)(&ib_event,
811 srq->ib_srq.srq_context);
812 }
813done:
814 return rc;
815}
816
651static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, 817static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
652 struct bnxt_qplib_cq *handle) 818 struct bnxt_qplib_cq *handle)
653{ 819{
@@ -690,7 +856,8 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
690 rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1], 856 rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1],
691 i - 1, rdev->msix_entries[i].vector, 857 i - 1, rdev->msix_entries[i].vector,
692 rdev->msix_entries[i].db_offset, 858 rdev->msix_entries[i].db_offset,
693 &bnxt_re_cqn_handler, NULL); 859 &bnxt_re_cqn_handler,
860 &bnxt_re_srqn_handler);
694 861
695 if (rc) { 862 if (rc) {
696 dev_err(rdev_to_dev(rdev), 863 dev_err(rdev_to_dev(rdev),
@@ -734,7 +901,8 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
734 901
735 /* Configure and allocate resources for qplib */ 902 /* Configure and allocate resources for qplib */
736 rdev->qplib_res.rcfw = &rdev->rcfw; 903 rdev->qplib_res.rcfw = &rdev->rcfw;
737 rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr); 904 rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr,
905 rdev->is_virtfn);
738 if (rc) 906 if (rc)
739 goto fail; 907 goto fail;
740 908
@@ -1035,19 +1203,6 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
1035 } 1203 }
1036} 1204}
1037 1205
1038static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
1039{
1040 u32 i;
1041
1042 rdev->qplib_ctx.qpc_count = BNXT_RE_MAX_QPC_COUNT;
1043 rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT;
1044 rdev->qplib_ctx.srqc_count = BNXT_RE_MAX_SRQC_COUNT;
1045 rdev->qplib_ctx.cq_count = BNXT_RE_MAX_CQ_COUNT;
1046 for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
1047 rdev->qplib_ctx.tqm_count[i] =
1048 rdev->dev_attr.tqm_alloc_reqs[i];
1049}
1050
1051/* worker thread for polling periodic events. Now used for QoS programming*/ 1206/* worker thread for polling periodic events. Now used for QoS programming*/
1052static void bnxt_re_worker(struct work_struct *work) 1207static void bnxt_re_worker(struct work_struct *work)
1053{ 1208{
@@ -1070,6 +1225,9 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
1070 } 1225 }
1071 set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); 1226 set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
1072 1227
1228 /* Check whether VF or PF */
1229 bnxt_re_get_sriov_func_type(rdev);
1230
1073 rc = bnxt_re_request_msix(rdev); 1231 rc = bnxt_re_request_msix(rdev);
1074 if (rc) { 1232 if (rc) {
1075 pr_err("Failed to get MSI-X vectors: %#x\n", rc); 1233 pr_err("Failed to get MSI-X vectors: %#x\n", rc);
@@ -1101,16 +1259,18 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
1101 (rdev->en_dev->pdev, &rdev->rcfw, 1259 (rdev->en_dev->pdev, &rdev->rcfw,
1102 rdev->msix_entries[BNXT_RE_AEQ_IDX].vector, 1260 rdev->msix_entries[BNXT_RE_AEQ_IDX].vector,
1103 rdev->msix_entries[BNXT_RE_AEQ_IDX].db_offset, 1261 rdev->msix_entries[BNXT_RE_AEQ_IDX].db_offset,
1104 0, &bnxt_re_aeq_handler); 1262 rdev->is_virtfn, &bnxt_re_aeq_handler);
1105 if (rc) { 1263 if (rc) {
1106 pr_err("Failed to enable RCFW channel: %#x\n", rc); 1264 pr_err("Failed to enable RCFW channel: %#x\n", rc);
1107 goto free_ring; 1265 goto free_ring;
1108 } 1266 }
1109 1267
1110 rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr); 1268 rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr,
1269 rdev->is_virtfn);
1111 if (rc) 1270 if (rc)
1112 goto disable_rcfw; 1271 goto disable_rcfw;
1113 bnxt_re_set_resource_limits(rdev); 1272 if (!rdev->is_virtfn)
1273 bnxt_re_set_resource_limits(rdev);
1114 1274
1115 rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0); 1275 rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0);
1116 if (rc) { 1276 if (rc) {
@@ -1125,7 +1285,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
1125 goto free_ctx; 1285 goto free_ctx;
1126 } 1286 }
1127 1287
1128 rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx, 0); 1288 rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx,
1289 rdev->is_virtfn);
1129 if (rc) { 1290 if (rc) {
1130 pr_err("Failed to initialize RCFW: %#x\n", rc); 1291 pr_err("Failed to initialize RCFW: %#x\n", rc);
1131 goto free_sctx; 1292 goto free_sctx;
@@ -1144,13 +1305,15 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
1144 goto fail; 1305 goto fail;
1145 } 1306 }
1146 1307
1147 rc = bnxt_re_setup_qos(rdev); 1308 if (!rdev->is_virtfn) {
1148 if (rc) 1309 rc = bnxt_re_setup_qos(rdev);
1149 pr_info("RoCE priority not yet configured\n"); 1310 if (rc)
1311 pr_info("RoCE priority not yet configured\n");
1150 1312
1151 INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker); 1313 INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
1152 set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags); 1314 set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
1153 schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000)); 1315 schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
1316 }
1154 1317
1155 /* Register ib dev */ 1318 /* Register ib dev */
1156 rc = bnxt_re_register_ib(rdev); 1319 rc = bnxt_re_register_ib(rdev);
@@ -1176,6 +1339,7 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
1176 set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags); 1339 set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
1177 ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed, 1340 ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
1178 &rdev->active_width); 1341 &rdev->active_width);
1342 set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
1179 bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE); 1343 bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
1180 bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE); 1344 bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE);
1181 1345
@@ -1400,7 +1564,7 @@ err_netdev:
1400 1564
1401static void __exit bnxt_re_mod_exit(void) 1565static void __exit bnxt_re_mod_exit(void)
1402{ 1566{
1403 struct bnxt_re_dev *rdev; 1567 struct bnxt_re_dev *rdev, *next;
1404 LIST_HEAD(to_be_deleted); 1568 LIST_HEAD(to_be_deleted);
1405 1569
1406 mutex_lock(&bnxt_re_dev_lock); 1570 mutex_lock(&bnxt_re_dev_lock);
@@ -1408,8 +1572,11 @@ static void __exit bnxt_re_mod_exit(void)
1408 if (!list_empty(&bnxt_re_dev_list)) 1572 if (!list_empty(&bnxt_re_dev_list))
1409 list_splice_init(&bnxt_re_dev_list, &to_be_deleted); 1573 list_splice_init(&bnxt_re_dev_list, &to_be_deleted);
1410 mutex_unlock(&bnxt_re_dev_lock); 1574 mutex_unlock(&bnxt_re_dev_lock);
1411 1575 /*
1412 list_for_each_entry(rdev, &to_be_deleted, list) { 1576 * Cleanup the devices in reverse order so that the VF device
1577 * cleanup is done before PF cleanup
1578 */
1579 list_for_each_entry_safe_reverse(rdev, next, &to_be_deleted, list) {
1413 dev_info(rdev_to_dev(rdev), "Unregistering Device"); 1580 dev_info(rdev_to_dev(rdev), "Unregistering Device");
1414 bnxt_re_dev_stop(rdev); 1581 bnxt_re_dev_stop(rdev);
1415 bnxt_re_ib_unreg(rdev, true); 1582 bnxt_re_ib_unreg(rdev, true);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 61764f7aa79b..8b5f11ac0e42 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -52,6 +52,7 @@
52 52
53static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq); 53static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq);
54static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp); 54static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp);
55static void bnxt_qplib_arm_srq(struct bnxt_qplib_srq *srq, u32 arm_type);
55 56
56static void bnxt_qplib_cancel_phantom_processing(struct bnxt_qplib_qp *qp) 57static void bnxt_qplib_cancel_phantom_processing(struct bnxt_qplib_qp *qp)
57{ 58{
@@ -278,6 +279,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
278 struct nq_base *nqe, **nq_ptr; 279 struct nq_base *nqe, **nq_ptr;
279 struct bnxt_qplib_cq *cq; 280 struct bnxt_qplib_cq *cq;
280 int num_cqne_processed = 0; 281 int num_cqne_processed = 0;
282 int num_srqne_processed = 0;
281 u32 sw_cons, raw_cons; 283 u32 sw_cons, raw_cons;
282 u16 type; 284 u16 type;
283 int budget = nq->budget; 285 int budget = nq->budget;
@@ -320,6 +322,26 @@ static void bnxt_qplib_service_nq(unsigned long data)
320 spin_unlock_bh(&cq->compl_lock); 322 spin_unlock_bh(&cq->compl_lock);
321 break; 323 break;
322 } 324 }
325 case NQ_BASE_TYPE_SRQ_EVENT:
326 {
327 struct nq_srq_event *nqsrqe =
328 (struct nq_srq_event *)nqe;
329
330 q_handle = le32_to_cpu(nqsrqe->srq_handle_low);
331 q_handle |= (u64)le32_to_cpu(nqsrqe->srq_handle_high)
332 << 32;
333 bnxt_qplib_arm_srq((struct bnxt_qplib_srq *)q_handle,
334 DBR_DBR_TYPE_SRQ_ARMENA);
335 if (!nq->srqn_handler(nq,
336 (struct bnxt_qplib_srq *)q_handle,
337 nqsrqe->event))
338 num_srqne_processed++;
339 else
340 dev_warn(&nq->pdev->dev,
341 "QPLIB: SRQ event 0x%x not handled",
342 nqsrqe->event);
343 break;
344 }
323 case NQ_BASE_TYPE_DBQ_EVENT: 345 case NQ_BASE_TYPE_DBQ_EVENT:
324 break; 346 break;
325 default: 347 default:
@@ -384,17 +406,19 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
384 int (*cqn_handler)(struct bnxt_qplib_nq *nq, 406 int (*cqn_handler)(struct bnxt_qplib_nq *nq,
385 struct bnxt_qplib_cq *), 407 struct bnxt_qplib_cq *),
386 int (*srqn_handler)(struct bnxt_qplib_nq *nq, 408 int (*srqn_handler)(struct bnxt_qplib_nq *nq,
387 void *, u8 event)) 409 struct bnxt_qplib_srq *,
410 u8 event))
388{ 411{
389 resource_size_t nq_base; 412 resource_size_t nq_base;
390 int rc = -1; 413 int rc = -1;
391 414
392 nq->pdev = pdev; 415 nq->pdev = pdev;
393 nq->vector = msix_vector; 416 nq->vector = msix_vector;
417 if (cqn_handler)
418 nq->cqn_handler = cqn_handler;
394 419
395 nq->cqn_handler = cqn_handler; 420 if (srqn_handler)
396 421 nq->srqn_handler = srqn_handler;
397 nq->srqn_handler = srqn_handler;
398 422
399 tasklet_init(&nq->worker, bnxt_qplib_service_nq, (unsigned long)nq); 423 tasklet_init(&nq->worker, bnxt_qplib_service_nq, (unsigned long)nq);
400 424
@@ -410,7 +434,6 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
410 if (rc) { 434 if (rc) {
411 dev_err(&nq->pdev->dev, 435 dev_err(&nq->pdev->dev,
412 "Failed to request IRQ for NQ: %#x", rc); 436 "Failed to request IRQ for NQ: %#x", rc);
413 bnxt_qplib_disable_nq(nq);
414 goto fail; 437 goto fail;
415 } 438 }
416 439
@@ -469,6 +492,238 @@ int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq)
469 return 0; 492 return 0;
470} 493}
471 494
495/* SRQ */
496static void bnxt_qplib_arm_srq(struct bnxt_qplib_srq *srq, u32 arm_type)
497{
498 struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
499 struct dbr_dbr db_msg = { 0 };
500 void __iomem *db;
501 u32 sw_prod = 0;
502
503 /* Ring DB */
504 sw_prod = (arm_type == DBR_DBR_TYPE_SRQ_ARM) ? srq->threshold :
505 HWQ_CMP(srq_hwq->prod, srq_hwq);
506 db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) &
507 DBR_DBR_INDEX_MASK);
508 db_msg.type_xid = cpu_to_le32(((srq->id << DBR_DBR_XID_SFT) &
509 DBR_DBR_XID_MASK) | arm_type);
510 db = (arm_type == DBR_DBR_TYPE_SRQ_ARMENA) ?
511 srq->dbr_base : srq->dpi->dbr;
512 wmb(); /* barrier before db ring */
513 __iowrite64_copy(db, &db_msg, sizeof(db_msg) / sizeof(u64));
514}
515
516int bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res,
517 struct bnxt_qplib_srq *srq)
518{
519 struct bnxt_qplib_rcfw *rcfw = res->rcfw;
520 struct cmdq_destroy_srq req;
521 struct creq_destroy_srq_resp resp;
522 u16 cmd_flags = 0;
523 int rc;
524
525 RCFW_CMD_PREP(req, DESTROY_SRQ, cmd_flags);
526
527 /* Configure the request */
528 req.srq_cid = cpu_to_le32(srq->id);
529
530 rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
531 (void *)&resp, NULL, 0);
532 if (rc)
533 return rc;
534
535 bnxt_qplib_free_hwq(res->pdev, &srq->hwq);
536 kfree(srq->swq);
537 return 0;
538}
539
540int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
541 struct bnxt_qplib_srq *srq)
542{
543 struct bnxt_qplib_rcfw *rcfw = res->rcfw;
544 struct cmdq_create_srq req;
545 struct creq_create_srq_resp resp;
546 struct bnxt_qplib_pbl *pbl;
547 u16 cmd_flags = 0;
548 int rc, idx;
549
550 srq->hwq.max_elements = srq->max_wqe;
551 rc = bnxt_qplib_alloc_init_hwq(res->pdev, &srq->hwq, srq->sglist,
552 srq->nmap, &srq->hwq.max_elements,
553 BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0,
554 PAGE_SIZE, HWQ_TYPE_QUEUE);
555 if (rc)
556 goto exit;
557
558 srq->swq = kcalloc(srq->hwq.max_elements, sizeof(*srq->swq),
559 GFP_KERNEL);
560 if (!srq->swq)
561 goto fail;
562
563 RCFW_CMD_PREP(req, CREATE_SRQ, cmd_flags);
564
565 /* Configure the request */
566 req.dpi = cpu_to_le32(srq->dpi->dpi);
567 req.srq_handle = cpu_to_le64(srq);
568
569 req.srq_size = cpu_to_le16((u16)srq->hwq.max_elements);
570 pbl = &srq->hwq.pbl[PBL_LVL_0];
571 req.pg_size_lvl = cpu_to_le16((((u16)srq->hwq.level &
572 CMDQ_CREATE_SRQ_LVL_MASK) <<
573 CMDQ_CREATE_SRQ_LVL_SFT) |
574 (pbl->pg_size == ROCE_PG_SIZE_4K ?
575 CMDQ_CREATE_SRQ_PG_SIZE_PG_4K :
576 pbl->pg_size == ROCE_PG_SIZE_8K ?
577 CMDQ_CREATE_SRQ_PG_SIZE_PG_8K :
578 pbl->pg_size == ROCE_PG_SIZE_64K ?
579 CMDQ_CREATE_SRQ_PG_SIZE_PG_64K :
580 pbl->pg_size == ROCE_PG_SIZE_2M ?
581 CMDQ_CREATE_SRQ_PG_SIZE_PG_2M :
582 pbl->pg_size == ROCE_PG_SIZE_8M ?
583 CMDQ_CREATE_SRQ_PG_SIZE_PG_8M :
584 pbl->pg_size == ROCE_PG_SIZE_1G ?
585 CMDQ_CREATE_SRQ_PG_SIZE_PG_1G :
586 CMDQ_CREATE_SRQ_PG_SIZE_PG_4K));
587 req.pbl = cpu_to_le64(pbl->pg_map_arr[0]);
588 req.pd_id = cpu_to_le32(srq->pd->id);
589 req.eventq_id = cpu_to_le16(srq->eventq_hw_ring_id);
590
591 rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
592 (void *)&resp, NULL, 0);
593 if (rc)
594 goto fail;
595
596 spin_lock_init(&srq->lock);
597 srq->start_idx = 0;
598 srq->last_idx = srq->hwq.max_elements - 1;
599 for (idx = 0; idx < srq->hwq.max_elements; idx++)
600 srq->swq[idx].next_idx = idx + 1;
601 srq->swq[srq->last_idx].next_idx = -1;
602
603 srq->id = le32_to_cpu(resp.xid);
604 srq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem;
605 if (srq->threshold)
606 bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ_ARMENA);
607 srq->arm_req = false;
608
609 return 0;
610fail:
611 bnxt_qplib_free_hwq(res->pdev, &srq->hwq);
612 kfree(srq->swq);
613exit:
614 return rc;
615}
616
617int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res,
618 struct bnxt_qplib_srq *srq)
619{
620 struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
621 u32 sw_prod, sw_cons, count = 0;
622
623 sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
624 sw_cons = HWQ_CMP(srq_hwq->cons, srq_hwq);
625
626 count = sw_prod > sw_cons ? sw_prod - sw_cons :
627 srq_hwq->max_elements - sw_cons + sw_prod;
628 if (count > srq->threshold) {
629 srq->arm_req = false;
630 bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ_ARM);
631 } else {
632 /* Deferred arming */
633 srq->arm_req = true;
634 }
635
636 return 0;
637}
638
639int bnxt_qplib_query_srq(struct bnxt_qplib_res *res,
640 struct bnxt_qplib_srq *srq)
641{
642 struct bnxt_qplib_rcfw *rcfw = res->rcfw;
643 struct cmdq_query_srq req;
644 struct creq_query_srq_resp resp;
645 struct bnxt_qplib_rcfw_sbuf *sbuf;
646 struct creq_query_srq_resp_sb *sb;
647 u16 cmd_flags = 0;
648 int rc = 0;
649
650 RCFW_CMD_PREP(req, QUERY_SRQ, cmd_flags);
651 req.srq_cid = cpu_to_le32(srq->id);
652
653 /* Configure the request */
654 sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
655 if (!sbuf)
656 return -ENOMEM;
657 sb = sbuf->sb;
658 rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
659 (void *)sbuf, 0);
660 srq->threshold = le16_to_cpu(sb->srq_limit);
661 bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
662
663 return rc;
664}
665
666int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
667 struct bnxt_qplib_swqe *wqe)
668{
669 struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
670 struct rq_wqe *srqe, **srqe_ptr;
671 struct sq_sge *hw_sge;
672 u32 sw_prod, sw_cons, count = 0;
673 int i, rc = 0, next;
674
675 spin_lock(&srq_hwq->lock);
676 if (srq->start_idx == srq->last_idx) {
677 dev_err(&srq_hwq->pdev->dev, "QPLIB: FP: SRQ (0x%x) is full!",
678 srq->id);
679 rc = -EINVAL;
680 spin_unlock(&srq_hwq->lock);
681 goto done;
682 }
683 next = srq->start_idx;
684 srq->start_idx = srq->swq[next].next_idx;
685 spin_unlock(&srq_hwq->lock);
686
687 sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
688 srqe_ptr = (struct rq_wqe **)srq_hwq->pbl_ptr;
689 srqe = &srqe_ptr[RQE_PG(sw_prod)][RQE_IDX(sw_prod)];
690 memset(srqe, 0, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
691 /* Calculate wqe_size16 and data_len */
692 for (i = 0, hw_sge = (struct sq_sge *)srqe->data;
693 i < wqe->num_sge; i++, hw_sge++) {
694 hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr);
695 hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey);
696 hw_sge->size = cpu_to_le32(wqe->sg_list[i].size);
697 }
698 srqe->wqe_type = wqe->type;
699 srqe->flags = wqe->flags;
700 srqe->wqe_size = wqe->num_sge +
701 ((offsetof(typeof(*srqe), data) + 15) >> 4);
702 srqe->wr_id[0] = cpu_to_le32((u32)next);
703 srq->swq[next].wr_id = wqe->wr_id;
704
705 srq_hwq->prod++;
706
707 spin_lock(&srq_hwq->lock);
708 sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
709 /* retaining srq_hwq->cons for this logic
710 * actually the lock is only required to
711 * read srq_hwq->cons.
712 */
713 sw_cons = HWQ_CMP(srq_hwq->cons, srq_hwq);
714 count = sw_prod > sw_cons ? sw_prod - sw_cons :
715 srq_hwq->max_elements - sw_cons + sw_prod;
716 spin_unlock(&srq_hwq->lock);
717 /* Ring DB */
718 bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ);
719 if (srq->arm_req == true && count > srq->threshold) {
720 srq->arm_req = false;
721 bnxt_qplib_arm_srq(srq, DBR_DBR_TYPE_SRQ_ARM);
722 }
723done:
724 return rc;
725}
726
472/* QP */ 727/* QP */
473int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) 728int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
474{ 729{
@@ -737,6 +992,12 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
737 pbl->pg_size == ROCE_PG_SIZE_1G ? 992 pbl->pg_size == ROCE_PG_SIZE_1G ?
738 CMDQ_CREATE_QP_RQ_PG_SIZE_PG_1G : 993 CMDQ_CREATE_QP_RQ_PG_SIZE_PG_1G :
739 CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K); 994 CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K);
995 } else {
996 /* SRQ */
997 if (qp->srq) {
998 qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED;
999 req.srq_cid = cpu_to_le32(qp->srq->id);
1000 }
740 } 1001 }
741 1002
742 if (qp->rcq) 1003 if (qp->rcq)
@@ -2068,6 +2329,16 @@ done:
2068 return rc; 2329 return rc;
2069} 2330}
2070 2331
2332static void bnxt_qplib_release_srqe(struct bnxt_qplib_srq *srq, u32 tag)
2333{
2334 spin_lock(&srq->hwq.lock);
2335 srq->swq[srq->last_idx].next_idx = (int)tag;
2336 srq->last_idx = (int)tag;
2337 srq->swq[srq->last_idx].next_idx = -1;
2338 srq->hwq.cons++; /* Support for SRQE counter */
2339 spin_unlock(&srq->hwq.lock);
2340}
2341
2071static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, 2342static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
2072 struct cq_res_rc *hwcqe, 2343 struct cq_res_rc *hwcqe,
2073 struct bnxt_qplib_cqe **pcqe, 2344 struct bnxt_qplib_cqe **pcqe,
@@ -2075,6 +2346,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
2075{ 2346{
2076 struct bnxt_qplib_qp *qp; 2347 struct bnxt_qplib_qp *qp;
2077 struct bnxt_qplib_q *rq; 2348 struct bnxt_qplib_q *rq;
2349 struct bnxt_qplib_srq *srq;
2078 struct bnxt_qplib_cqe *cqe; 2350 struct bnxt_qplib_cqe *cqe;
2079 u32 wr_id_idx; 2351 u32 wr_id_idx;
2080 int rc = 0; 2352 int rc = 0;
@@ -2102,27 +2374,46 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
2102 2374
2103 wr_id_idx = le32_to_cpu(hwcqe->srq_or_rq_wr_id) & 2375 wr_id_idx = le32_to_cpu(hwcqe->srq_or_rq_wr_id) &
2104 CQ_RES_RC_SRQ_OR_RQ_WR_ID_MASK; 2376 CQ_RES_RC_SRQ_OR_RQ_WR_ID_MASK;
2105 rq = &qp->rq; 2377 if (cqe->flags & CQ_RES_RC_FLAGS_SRQ_SRQ) {
2106 if (wr_id_idx > rq->hwq.max_elements) { 2378 srq = qp->srq;
2107 dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process RC "); 2379 if (!srq)
2108 dev_err(&cq->hwq.pdev->dev, 2380 return -EINVAL;
2109 "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x", 2381 if (wr_id_idx > srq->hwq.max_elements) {
2110 wr_id_idx, rq->hwq.max_elements); 2382 dev_err(&cq->hwq.pdev->dev,
2111 return -EINVAL; 2383 "QPLIB: FP: CQ Process RC ");
2112 } 2384 dev_err(&cq->hwq.pdev->dev,
2113 2385 "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
2114 cqe->wr_id = rq->swq[wr_id_idx].wr_id; 2386 wr_id_idx, srq->hwq.max_elements);
2115 cqe++; 2387 return -EINVAL;
2116 (*budget)--; 2388 }
2117 rq->hwq.cons++; 2389 cqe->wr_id = srq->swq[wr_id_idx].wr_id;
2118 *pcqe = cqe; 2390 bnxt_qplib_release_srqe(srq, wr_id_idx);
2391 cqe++;
2392 (*budget)--;
2393 *pcqe = cqe;
2394 } else {
2395 rq = &qp->rq;
2396 if (wr_id_idx > rq->hwq.max_elements) {
2397 dev_err(&cq->hwq.pdev->dev,
2398 "QPLIB: FP: CQ Process RC ");
2399 dev_err(&cq->hwq.pdev->dev,
2400 "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
2401 wr_id_idx, rq->hwq.max_elements);
2402 return -EINVAL;
2403 }
2404 cqe->wr_id = rq->swq[wr_id_idx].wr_id;
2405 cqe++;
2406 (*budget)--;
2407 rq->hwq.cons++;
2408 *pcqe = cqe;
2119 2409
2120 if (hwcqe->status != CQ_RES_RC_STATUS_OK) { 2410 if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
2121 qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; 2411 qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
2122 /* Add qp to flush list of the CQ */ 2412 /* Add qp to flush list of the CQ */
2123 bnxt_qplib_lock_buddy_cq(qp, cq); 2413 bnxt_qplib_lock_buddy_cq(qp, cq);
2124 __bnxt_qplib_add_flush_qp(qp); 2414 __bnxt_qplib_add_flush_qp(qp);
2125 bnxt_qplib_unlock_buddy_cq(qp, cq); 2415 bnxt_qplib_unlock_buddy_cq(qp, cq);
2416 }
2126 } 2417 }
2127 2418
2128done: 2419done:
@@ -2136,6 +2427,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
2136{ 2427{
2137 struct bnxt_qplib_qp *qp; 2428 struct bnxt_qplib_qp *qp;
2138 struct bnxt_qplib_q *rq; 2429 struct bnxt_qplib_q *rq;
2430 struct bnxt_qplib_srq *srq;
2139 struct bnxt_qplib_cqe *cqe; 2431 struct bnxt_qplib_cqe *cqe;
2140 u32 wr_id_idx; 2432 u32 wr_id_idx;
2141 int rc = 0; 2433 int rc = 0;
@@ -2166,27 +2458,48 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
2166 hwcqe->src_qp_high_srq_or_rq_wr_id) & 2458 hwcqe->src_qp_high_srq_or_rq_wr_id) &
2167 CQ_RES_UD_SRC_QP_HIGH_MASK) >> 8); 2459 CQ_RES_UD_SRC_QP_HIGH_MASK) >> 8);
2168 2460
2169 rq = &qp->rq; 2461 if (cqe->flags & CQ_RES_RC_FLAGS_SRQ_SRQ) {
2170 if (wr_id_idx > rq->hwq.max_elements) { 2462 srq = qp->srq;
2171 dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process UD "); 2463 if (!srq)
2172 dev_err(&cq->hwq.pdev->dev, 2464 return -EINVAL;
2173 "QPLIB: wr_id idx %#x exceeded RQ max %#x",
2174 wr_id_idx, rq->hwq.max_elements);
2175 return -EINVAL;
2176 }
2177 2465
2178 cqe->wr_id = rq->swq[wr_id_idx].wr_id; 2466 if (wr_id_idx > srq->hwq.max_elements) {
2179 cqe++; 2467 dev_err(&cq->hwq.pdev->dev,
2180 (*budget)--; 2468 "QPLIB: FP: CQ Process UD ");
2181 rq->hwq.cons++; 2469 dev_err(&cq->hwq.pdev->dev,
2182 *pcqe = cqe; 2470 "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
2471 wr_id_idx, srq->hwq.max_elements);
2472 return -EINVAL;
2473 }
2474 cqe->wr_id = srq->swq[wr_id_idx].wr_id;
2475 bnxt_qplib_release_srqe(srq, wr_id_idx);
2476 cqe++;
2477 (*budget)--;
2478 *pcqe = cqe;
2479 } else {
2480 rq = &qp->rq;
2481 if (wr_id_idx > rq->hwq.max_elements) {
2482 dev_err(&cq->hwq.pdev->dev,
2483 "QPLIB: FP: CQ Process UD ");
2484 dev_err(&cq->hwq.pdev->dev,
2485 "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
2486 wr_id_idx, rq->hwq.max_elements);
2487 return -EINVAL;
2488 }
2183 2489
2184 if (hwcqe->status != CQ_RES_RC_STATUS_OK) { 2490 cqe->wr_id = rq->swq[wr_id_idx].wr_id;
2185 qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; 2491 cqe++;
2186 /* Add qp to flush list of the CQ */ 2492 (*budget)--;
2187 bnxt_qplib_lock_buddy_cq(qp, cq); 2493 rq->hwq.cons++;
2188 __bnxt_qplib_add_flush_qp(qp); 2494 *pcqe = cqe;
2189 bnxt_qplib_unlock_buddy_cq(qp, cq); 2495
2496 if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
2497 qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
2498 /* Add qp to flush list of the CQ */
2499 bnxt_qplib_lock_buddy_cq(qp, cq);
2500 __bnxt_qplib_add_flush_qp(qp);
2501 bnxt_qplib_unlock_buddy_cq(qp, cq);
2502 }
2190 } 2503 }
2191done: 2504done:
2192 return rc; 2505 return rc;
@@ -2218,6 +2531,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
2218{ 2531{
2219 struct bnxt_qplib_qp *qp; 2532 struct bnxt_qplib_qp *qp;
2220 struct bnxt_qplib_q *rq; 2533 struct bnxt_qplib_q *rq;
2534 struct bnxt_qplib_srq *srq;
2221 struct bnxt_qplib_cqe *cqe; 2535 struct bnxt_qplib_cqe *cqe;
2222 u32 wr_id_idx; 2536 u32 wr_id_idx;
2223 int rc = 0; 2537 int rc = 0;
@@ -2256,26 +2570,49 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
2256 cqe->raweth_qp1_flags2 = le32_to_cpu(hwcqe->raweth_qp1_flags2); 2570 cqe->raweth_qp1_flags2 = le32_to_cpu(hwcqe->raweth_qp1_flags2);
2257 cqe->raweth_qp1_metadata = le32_to_cpu(hwcqe->raweth_qp1_metadata); 2571 cqe->raweth_qp1_metadata = le32_to_cpu(hwcqe->raweth_qp1_metadata);
2258 2572
2259 rq = &qp->rq; 2573 if (cqe->flags & CQ_RES_RAWETH_QP1_FLAGS_SRQ_SRQ) {
2260 if (wr_id_idx > rq->hwq.max_elements) { 2574 srq = qp->srq;
2261 dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process Raw/QP1 RQ wr_id "); 2575 if (!srq) {
2262 dev_err(&cq->hwq.pdev->dev, "QPLIB: ix 0x%x exceeded RQ max 0x%x", 2576 dev_err(&cq->hwq.pdev->dev,
2263 wr_id_idx, rq->hwq.max_elements); 2577 "QPLIB: FP: SRQ used but not defined??");
2264 return -EINVAL; 2578 return -EINVAL;
2265 } 2579 }
2266 2580 if (wr_id_idx > srq->hwq.max_elements) {
2267 cqe->wr_id = rq->swq[wr_id_idx].wr_id; 2581 dev_err(&cq->hwq.pdev->dev,
2268 cqe++; 2582 "QPLIB: FP: CQ Process Raw/QP1 ");
2269 (*budget)--; 2583 dev_err(&cq->hwq.pdev->dev,
2270 rq->hwq.cons++; 2584 "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
2271 *pcqe = cqe; 2585 wr_id_idx, srq->hwq.max_elements);
2586 return -EINVAL;
2587 }
2588 cqe->wr_id = srq->swq[wr_id_idx].wr_id;
2589 bnxt_qplib_release_srqe(srq, wr_id_idx);
2590 cqe++;
2591 (*budget)--;
2592 *pcqe = cqe;
2593 } else {
2594 rq = &qp->rq;
2595 if (wr_id_idx > rq->hwq.max_elements) {
2596 dev_err(&cq->hwq.pdev->dev,
2597 "QPLIB: FP: CQ Process Raw/QP1 RQ wr_id ");
2598 dev_err(&cq->hwq.pdev->dev,
2599 "QPLIB: ix 0x%x exceeded RQ max 0x%x",
2600 wr_id_idx, rq->hwq.max_elements);
2601 return -EINVAL;
2602 }
2603 cqe->wr_id = rq->swq[wr_id_idx].wr_id;
2604 cqe++;
2605 (*budget)--;
2606 rq->hwq.cons++;
2607 *pcqe = cqe;
2272 2608
2273 if (hwcqe->status != CQ_RES_RC_STATUS_OK) { 2609 if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
2274 qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; 2610 qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
2275 /* Add qp to flush list of the CQ */ 2611 /* Add qp to flush list of the CQ */
2276 bnxt_qplib_lock_buddy_cq(qp, cq); 2612 bnxt_qplib_lock_buddy_cq(qp, cq);
2277 __bnxt_qplib_add_flush_qp(qp); 2613 __bnxt_qplib_add_flush_qp(qp);
2278 bnxt_qplib_unlock_buddy_cq(qp, cq); 2614 bnxt_qplib_unlock_buddy_cq(qp, cq);
2615 }
2279 } 2616 }
2280 2617
2281done: 2618done:
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index c582d4ec8173..211b27a8f9e2 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -39,6 +39,27 @@
39#ifndef __BNXT_QPLIB_FP_H__ 39#ifndef __BNXT_QPLIB_FP_H__
40#define __BNXT_QPLIB_FP_H__ 40#define __BNXT_QPLIB_FP_H__
41 41
42struct bnxt_qplib_srq {
43 struct bnxt_qplib_pd *pd;
44 struct bnxt_qplib_dpi *dpi;
45 void __iomem *dbr_base;
46 u64 srq_handle;
47 u32 id;
48 u32 max_wqe;
49 u32 max_sge;
50 u32 threshold;
51 bool arm_req;
52 struct bnxt_qplib_cq *cq;
53 struct bnxt_qplib_hwq hwq;
54 struct bnxt_qplib_swq *swq;
55 struct scatterlist *sglist;
56 int start_idx;
57 int last_idx;
58 u32 nmap;
59 u16 eventq_hw_ring_id;
60 spinlock_t lock; /* protect SRQE link list */
61};
62
42struct bnxt_qplib_sge { 63struct bnxt_qplib_sge {
43 u64 addr; 64 u64 addr;
44 u32 lkey; 65 u32 lkey;
@@ -79,6 +100,7 @@ static inline u32 get_psne_idx(u32 val)
79 100
80struct bnxt_qplib_swq { 101struct bnxt_qplib_swq {
81 u64 wr_id; 102 u64 wr_id;
103 int next_idx;
82 u8 type; 104 u8 type;
83 u8 flags; 105 u8 flags;
84 u32 start_psn; 106 u32 start_psn;
@@ -404,29 +426,27 @@ struct bnxt_qplib_cq {
404 writel(NQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db) 426 writel(NQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db)
405 427
406struct bnxt_qplib_nq { 428struct bnxt_qplib_nq {
407 struct pci_dev *pdev; 429 struct pci_dev *pdev;
408 430
409 int vector; 431 int vector;
410 cpumask_t mask; 432 cpumask_t mask;
411 int budget; 433 int budget;
412 bool requested; 434 bool requested;
413 struct tasklet_struct worker; 435 struct tasklet_struct worker;
414 struct bnxt_qplib_hwq hwq; 436 struct bnxt_qplib_hwq hwq;
415 437
416 u16 bar_reg; 438 u16 bar_reg;
417 u16 bar_reg_off; 439 u16 bar_reg_off;
418 u16 ring_id; 440 u16 ring_id;
419 void __iomem *bar_reg_iomem; 441 void __iomem *bar_reg_iomem;
420 442
421 int (*cqn_handler) 443 int (*cqn_handler)(struct bnxt_qplib_nq *nq,
422 (struct bnxt_qplib_nq *nq, 444 struct bnxt_qplib_cq *cq);
423 struct bnxt_qplib_cq *cq); 445 int (*srqn_handler)(struct bnxt_qplib_nq *nq,
424 int (*srqn_handler) 446 struct bnxt_qplib_srq *srq,
425 (struct bnxt_qplib_nq *nq, 447 u8 event);
426 void *srq, 448 struct workqueue_struct *cqn_wq;
427 u8 event); 449 char name[32];
428 struct workqueue_struct *cqn_wq;
429 char name[32];
430}; 450};
431 451
432struct bnxt_qplib_nq_work { 452struct bnxt_qplib_nq_work {
@@ -441,8 +461,18 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
441 int (*cqn_handler)(struct bnxt_qplib_nq *nq, 461 int (*cqn_handler)(struct bnxt_qplib_nq *nq,
442 struct bnxt_qplib_cq *cq), 462 struct bnxt_qplib_cq *cq),
443 int (*srqn_handler)(struct bnxt_qplib_nq *nq, 463 int (*srqn_handler)(struct bnxt_qplib_nq *nq,
444 void *srq, 464 struct bnxt_qplib_srq *srq,
445 u8 event)); 465 u8 event));
466int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
467 struct bnxt_qplib_srq *srq);
468int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res,
469 struct bnxt_qplib_srq *srq);
470int bnxt_qplib_query_srq(struct bnxt_qplib_res *res,
471 struct bnxt_qplib_srq *srq);
472int bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res,
473 struct bnxt_qplib_srq *srq);
474int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
475 struct bnxt_qplib_swqe *wqe);
446int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp); 476int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
447int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp); 477int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
448int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp); 478int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index bb5574adf195..8329ec6a7946 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -93,7 +93,8 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
93 opcode = req->opcode; 93 opcode = req->opcode;
94 if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) && 94 if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
95 (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC && 95 (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
96 opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW)) { 96 opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW &&
97 opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) {
97 dev_err(&rcfw->pdev->dev, 98 dev_err(&rcfw->pdev->dev,
98 "QPLIB: RCFW not initialized, reject opcode 0x%x", 99 "QPLIB: RCFW not initialized, reject opcode 0x%x",
99 opcode); 100 opcode);
@@ -615,7 +616,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
615 int msix_vector, 616 int msix_vector,
616 int cp_bar_reg_off, int virt_fn, 617 int cp_bar_reg_off, int virt_fn,
617 int (*aeq_handler)(struct bnxt_qplib_rcfw *, 618 int (*aeq_handler)(struct bnxt_qplib_rcfw *,
618 struct creq_func_event *)) 619 void *, void *))
619{ 620{
620 resource_size_t res_base; 621 resource_size_t res_base;
621 struct cmdq_init init; 622 struct cmdq_init init;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 2946a7cfae82..6bee6e3636ea 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -167,7 +167,7 @@ struct bnxt_qplib_rcfw {
167#define FIRMWARE_TIMED_OUT 3 167#define FIRMWARE_TIMED_OUT 3
168 wait_queue_head_t waitq; 168 wait_queue_head_t waitq;
169 int (*aeq_handler)(struct bnxt_qplib_rcfw *, 169 int (*aeq_handler)(struct bnxt_qplib_rcfw *,
170 struct creq_func_event *); 170 void *, void *);
171 u32 seq_num; 171 u32 seq_num;
172 172
173 /* Bar region info */ 173 /* Bar region info */
@@ -199,9 +199,8 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
199 struct bnxt_qplib_rcfw *rcfw, 199 struct bnxt_qplib_rcfw *rcfw,
200 int msix_vector, 200 int msix_vector,
201 int cp_bar_reg_off, int virt_fn, 201 int cp_bar_reg_off, int virt_fn,
202 int (*aeq_handler) 202 int (*aeq_handler)(struct bnxt_qplib_rcfw *,
203 (struct bnxt_qplib_rcfw *, 203 void *aeqe, void *obj));
204 struct creq_func_event *));
205 204
206struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf( 205struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
207 struct bnxt_qplib_rcfw *rcfw, 206 struct bnxt_qplib_rcfw *rcfw,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 4e101704e801..ad37d54affcc 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -104,13 +104,12 @@ static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
104 104
105 if (!sghead) { 105 if (!sghead) {
106 for (i = 0; i < pages; i++) { 106 for (i = 0; i < pages; i++) {
107 pbl->pg_arr[i] = dma_alloc_coherent(&pdev->dev, 107 pbl->pg_arr[i] = dma_zalloc_coherent(&pdev->dev,
108 pbl->pg_size, 108 pbl->pg_size,
109 &pbl->pg_map_arr[i], 109 &pbl->pg_map_arr[i],
110 GFP_KERNEL); 110 GFP_KERNEL);
111 if (!pbl->pg_arr[i]) 111 if (!pbl->pg_arr[i])
112 goto fail; 112 goto fail;
113 memset(pbl->pg_arr[i], 0, pbl->pg_size);
114 pbl->pg_count++; 113 pbl->pg_count++;
115 } 114 }
116 } else { 115 } else {
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 9543ce51a28a..c015c1861351 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -64,8 +64,28 @@ static bool bnxt_qplib_is_atomic_cap(struct bnxt_qplib_rcfw *rcfw)
64 return !!(pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ); 64 return !!(pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ);
65} 65}
66 66
67static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw,
68 char *fw_ver)
69{
70 struct cmdq_query_version req;
71 struct creq_query_version_resp resp;
72 u16 cmd_flags = 0;
73 int rc = 0;
74
75 RCFW_CMD_PREP(req, QUERY_VERSION, cmd_flags);
76
77 rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
78 (void *)&resp, NULL, 0);
79 if (rc)
80 return;
81 fw_ver[0] = resp.fw_maj;
82 fw_ver[1] = resp.fw_minor;
83 fw_ver[2] = resp.fw_bld;
84 fw_ver[3] = resp.fw_rsvd;
85}
86
67int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, 87int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
68 struct bnxt_qplib_dev_attr *attr) 88 struct bnxt_qplib_dev_attr *attr, bool vf)
69{ 89{
70 struct cmdq_query_func req; 90 struct cmdq_query_func req;
71 struct creq_query_func_resp resp; 91 struct creq_query_func_resp resp;
@@ -95,7 +115,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
95 /* Extract the context from the side buffer */ 115 /* Extract the context from the side buffer */
96 attr->max_qp = le32_to_cpu(sb->max_qp); 116 attr->max_qp = le32_to_cpu(sb->max_qp);
97 /* max_qp value reported by FW for PF doesn't include the QP1 for PF */ 117 /* max_qp value reported by FW for PF doesn't include the QP1 for PF */
98 attr->max_qp += 1; 118 if (!vf)
119 attr->max_qp += 1;
99 attr->max_qp_rd_atom = 120 attr->max_qp_rd_atom =
100 sb->max_qp_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ? 121 sb->max_qp_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
101 BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_rd_atom; 122 BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_rd_atom;
@@ -133,7 +154,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
133 attr->l2_db_size = (sb->l2_db_space_size + 1) * PAGE_SIZE; 154 attr->l2_db_size = (sb->l2_db_space_size + 1) * PAGE_SIZE;
134 attr->max_sgid = le32_to_cpu(sb->max_gid); 155 attr->max_sgid = le32_to_cpu(sb->max_gid);
135 156
136 strlcpy(attr->fw_ver, "20.6.28.0", sizeof(attr->fw_ver)); 157 bnxt_qplib_query_version(rcfw, attr->fw_ver);
137 158
138 for (i = 0; i < MAX_TQM_ALLOC_REQ / 4; i++) { 159 for (i = 0; i < MAX_TQM_ALLOC_REQ / 4; i++) {
139 temp = le32_to_cpu(sb->tqm_alloc_reqs[i]); 160 temp = le32_to_cpu(sb->tqm_alloc_reqs[i]);
@@ -150,6 +171,38 @@ bail:
150 return rc; 171 return rc;
151} 172}
152 173
174int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
175 struct bnxt_qplib_rcfw *rcfw,
176 struct bnxt_qplib_ctx *ctx)
177{
178 struct cmdq_set_func_resources req;
179 struct creq_set_func_resources_resp resp;
180 u16 cmd_flags = 0;
181 int rc = 0;
182
183 RCFW_CMD_PREP(req, SET_FUNC_RESOURCES, cmd_flags);
184
185 req.number_of_qp = cpu_to_le32(ctx->qpc_count);
186 req.number_of_mrw = cpu_to_le32(ctx->mrw_count);
187 req.number_of_srq = cpu_to_le32(ctx->srqc_count);
188 req.number_of_cq = cpu_to_le32(ctx->cq_count);
189
190 req.max_qp_per_vf = cpu_to_le32(ctx->vf_res.max_qp_per_vf);
191 req.max_mrw_per_vf = cpu_to_le32(ctx->vf_res.max_mrw_per_vf);
192 req.max_srq_per_vf = cpu_to_le32(ctx->vf_res.max_srq_per_vf);
193 req.max_cq_per_vf = cpu_to_le32(ctx->vf_res.max_cq_per_vf);
194 req.max_gid_per_vf = cpu_to_le32(ctx->vf_res.max_gid_per_vf);
195
196 rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
197 (void *)&resp,
198 NULL, 0);
199 if (rc) {
200 dev_err(&res->pdev->dev,
201 "QPLIB: Failed to set function resources");
202 }
203 return rc;
204}
205
153/* SGID */ 206/* SGID */
154int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res, 207int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
155 struct bnxt_qplib_sgid_tbl *sgid_tbl, int index, 208 struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
@@ -604,7 +657,7 @@ int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
604} 657}
605 658
606int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, 659int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
607 u64 *pbl_tbl, int num_pbls, bool block) 660 u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size)
608{ 661{
609 struct bnxt_qplib_rcfw *rcfw = res->rcfw; 662 struct bnxt_qplib_rcfw *rcfw = res->rcfw;
610 struct cmdq_register_mr req; 663 struct cmdq_register_mr req;
@@ -615,6 +668,9 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
615 u32 pg_size; 668 u32 pg_size;
616 669
617 if (num_pbls) { 670 if (num_pbls) {
671 /* Allocate memory for the non-leaf pages to store buf ptrs.
672 * Non-leaf pages always uses system PAGE_SIZE
673 */
618 pg_ptrs = roundup_pow_of_two(num_pbls); 674 pg_ptrs = roundup_pow_of_two(num_pbls);
619 pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT; 675 pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT;
620 if (!pages) 676 if (!pages)
@@ -632,6 +688,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
632 bnxt_qplib_free_hwq(res->pdev, &mr->hwq); 688 bnxt_qplib_free_hwq(res->pdev, &mr->hwq);
633 689
634 mr->hwq.max_elements = pages; 690 mr->hwq.max_elements = pages;
691 /* Use system PAGE_SIZE */
635 rc = bnxt_qplib_alloc_init_hwq(res->pdev, &mr->hwq, NULL, 0, 692 rc = bnxt_qplib_alloc_init_hwq(res->pdev, &mr->hwq, NULL, 0,
636 &mr->hwq.max_elements, 693 &mr->hwq.max_elements,
637 PAGE_SIZE, 0, PAGE_SIZE, 694 PAGE_SIZE, 0, PAGE_SIZE,
@@ -652,18 +709,22 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
652 709
653 /* Configure the request */ 710 /* Configure the request */
654 if (mr->hwq.level == PBL_LVL_MAX) { 711 if (mr->hwq.level == PBL_LVL_MAX) {
712 /* No PBL provided, just use system PAGE_SIZE */
655 level = 0; 713 level = 0;
656 req.pbl = 0; 714 req.pbl = 0;
657 pg_size = PAGE_SIZE; 715 pg_size = PAGE_SIZE;
658 } else { 716 } else {
659 level = mr->hwq.level + 1; 717 level = mr->hwq.level + 1;
660 req.pbl = cpu_to_le64(mr->hwq.pbl[PBL_LVL_0].pg_map_arr[0]); 718 req.pbl = cpu_to_le64(mr->hwq.pbl[PBL_LVL_0].pg_map_arr[0]);
661 pg_size = mr->hwq.pbl[PBL_LVL_0].pg_size;
662 } 719 }
720 pg_size = buf_pg_size ? buf_pg_size : PAGE_SIZE;
663 req.log2_pg_size_lvl = (level << CMDQ_REGISTER_MR_LVL_SFT) | 721 req.log2_pg_size_lvl = (level << CMDQ_REGISTER_MR_LVL_SFT) |
664 ((ilog2(pg_size) << 722 ((ilog2(pg_size) <<
665 CMDQ_REGISTER_MR_LOG2_PG_SIZE_SFT) & 723 CMDQ_REGISTER_MR_LOG2_PG_SIZE_SFT) &
666 CMDQ_REGISTER_MR_LOG2_PG_SIZE_MASK); 724 CMDQ_REGISTER_MR_LOG2_PG_SIZE_MASK);
725 req.log2_pbl_pg_size = cpu_to_le16(((ilog2(PAGE_SIZE) <<
726 CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_SFT) &
727 CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_MASK));
667 req.access = (mr->flags & 0xFFFF); 728 req.access = (mr->flags & 0xFFFF);
668 req.va = cpu_to_le64(mr->va); 729 req.va = cpu_to_le64(mr->va);
669 req.key = cpu_to_le32(mr->lkey); 730 req.key = cpu_to_le32(mr->lkey);
@@ -729,3 +790,73 @@ int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids)
729 0); 790 0);
730 return 0; 791 return 0;
731} 792}
793
794int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
795 struct bnxt_qplib_roce_stats *stats)
796{
797 struct cmdq_query_roce_stats req;
798 struct creq_query_roce_stats_resp resp;
799 struct bnxt_qplib_rcfw_sbuf *sbuf;
800 struct creq_query_roce_stats_resp_sb *sb;
801 u16 cmd_flags = 0;
802 int rc = 0;
803
804 RCFW_CMD_PREP(req, QUERY_ROCE_STATS, cmd_flags);
805
806 sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
807 if (!sbuf) {
808 dev_err(&rcfw->pdev->dev,
809 "QPLIB: SP: QUERY_ROCE_STATS alloc side buffer failed");
810 return -ENOMEM;
811 }
812
813 sb = sbuf->sb;
814 req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
815 rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
816 (void *)sbuf, 0);
817 if (rc)
818 goto bail;
819 /* Extract the context from the side buffer */
820 stats->to_retransmits = le64_to_cpu(sb->to_retransmits);
821 stats->seq_err_naks_rcvd = le64_to_cpu(sb->seq_err_naks_rcvd);
822 stats->max_retry_exceeded = le64_to_cpu(sb->max_retry_exceeded);
823 stats->rnr_naks_rcvd = le64_to_cpu(sb->rnr_naks_rcvd);
824 stats->missing_resp = le64_to_cpu(sb->missing_resp);
825 stats->unrecoverable_err = le64_to_cpu(sb->unrecoverable_err);
826 stats->bad_resp_err = le64_to_cpu(sb->bad_resp_err);
827 stats->local_qp_op_err = le64_to_cpu(sb->local_qp_op_err);
828 stats->local_protection_err = le64_to_cpu(sb->local_protection_err);
829 stats->mem_mgmt_op_err = le64_to_cpu(sb->mem_mgmt_op_err);
830 stats->remote_invalid_req_err = le64_to_cpu(sb->remote_invalid_req_err);
831 stats->remote_access_err = le64_to_cpu(sb->remote_access_err);
832 stats->remote_op_err = le64_to_cpu(sb->remote_op_err);
833 stats->dup_req = le64_to_cpu(sb->dup_req);
834 stats->res_exceed_max = le64_to_cpu(sb->res_exceed_max);
835 stats->res_length_mismatch = le64_to_cpu(sb->res_length_mismatch);
836 stats->res_exceeds_wqe = le64_to_cpu(sb->res_exceeds_wqe);
837 stats->res_opcode_err = le64_to_cpu(sb->res_opcode_err);
838 stats->res_rx_invalid_rkey = le64_to_cpu(sb->res_rx_invalid_rkey);
839 stats->res_rx_domain_err = le64_to_cpu(sb->res_rx_domain_err);
840 stats->res_rx_no_perm = le64_to_cpu(sb->res_rx_no_perm);
841 stats->res_rx_range_err = le64_to_cpu(sb->res_rx_range_err);
842 stats->res_tx_invalid_rkey = le64_to_cpu(sb->res_tx_invalid_rkey);
843 stats->res_tx_domain_err = le64_to_cpu(sb->res_tx_domain_err);
844 stats->res_tx_no_perm = le64_to_cpu(sb->res_tx_no_perm);
845 stats->res_tx_range_err = le64_to_cpu(sb->res_tx_range_err);
846 stats->res_irrq_oflow = le64_to_cpu(sb->res_irrq_oflow);
847 stats->res_unsup_opcode = le64_to_cpu(sb->res_unsup_opcode);
848 stats->res_unaligned_atomic = le64_to_cpu(sb->res_unaligned_atomic);
849 stats->res_rem_inv_err = le64_to_cpu(sb->res_rem_inv_err);
850 stats->res_mem_error = le64_to_cpu(sb->res_mem_error);
851 stats->res_srq_err = le64_to_cpu(sb->res_srq_err);
852 stats->res_cmp_err = le64_to_cpu(sb->res_cmp_err);
853 stats->res_invalid_dup_rkey = le64_to_cpu(sb->res_invalid_dup_rkey);
854 stats->res_wqe_format_err = le64_to_cpu(sb->res_wqe_format_err);
855 stats->res_cq_load_err = le64_to_cpu(sb->res_cq_load_err);
856 stats->res_srq_load_err = le64_to_cpu(sb->res_srq_load_err);
857 stats->res_tx_pci_err = le64_to_cpu(sb->res_tx_pci_err);
858 stats->res_rx_pci_err = le64_to_cpu(sb->res_rx_pci_err);
859bail:
860 bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
861 return rc;
862}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 11322582f5e4..9d3e8b994945 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -45,7 +45,8 @@
45#define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040 45#define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040
46 46
47struct bnxt_qplib_dev_attr { 47struct bnxt_qplib_dev_attr {
48 char fw_ver[32]; 48#define FW_VER_ARR_LEN 4
49 u8 fw_ver[FW_VER_ARR_LEN];
49 u16 max_sgid; 50 u16 max_sgid;
50 u16 max_mrw; 51 u16 max_mrw;
51 u32 max_qp; 52 u32 max_qp;
@@ -127,6 +128,85 @@ struct bnxt_qplib_frpl {
127#define BNXT_QPLIB_ACCESS_ZERO_BASED BIT(5) 128#define BNXT_QPLIB_ACCESS_ZERO_BASED BIT(5)
128#define BNXT_QPLIB_ACCESS_ON_DEMAND BIT(6) 129#define BNXT_QPLIB_ACCESS_ON_DEMAND BIT(6)
129 130
131struct bnxt_qplib_roce_stats {
132 u64 to_retransmits;
133 u64 seq_err_naks_rcvd;
134 /* seq_err_naks_rcvd is 64 b */
135 u64 max_retry_exceeded;
136 /* max_retry_exceeded is 64 b */
137 u64 rnr_naks_rcvd;
138 /* rnr_naks_rcvd is 64 b */
139 u64 missing_resp;
140 u64 unrecoverable_err;
141 /* unrecoverable_err is 64 b */
142 u64 bad_resp_err;
143 /* bad_resp_err is 64 b */
144 u64 local_qp_op_err;
145 /* local_qp_op_err is 64 b */
146 u64 local_protection_err;
147 /* local_protection_err is 64 b */
148 u64 mem_mgmt_op_err;
149 /* mem_mgmt_op_err is 64 b */
150 u64 remote_invalid_req_err;
151 /* remote_invalid_req_err is 64 b */
152 u64 remote_access_err;
153 /* remote_access_err is 64 b */
154 u64 remote_op_err;
155 /* remote_op_err is 64 b */
156 u64 dup_req;
157 /* dup_req is 64 b */
158 u64 res_exceed_max;
159 /* res_exceed_max is 64 b */
160 u64 res_length_mismatch;
161 /* res_length_mismatch is 64 b */
162 u64 res_exceeds_wqe;
163 /* res_exceeds_wqe is 64 b */
164 u64 res_opcode_err;
165 /* res_opcode_err is 64 b */
166 u64 res_rx_invalid_rkey;
167 /* res_rx_invalid_rkey is 64 b */
168 u64 res_rx_domain_err;
169 /* res_rx_domain_err is 64 b */
170 u64 res_rx_no_perm;
171 /* res_rx_no_perm is 64 b */
172 u64 res_rx_range_err;
173 /* res_rx_range_err is 64 b */
174 u64 res_tx_invalid_rkey;
175 /* res_tx_invalid_rkey is 64 b */
176 u64 res_tx_domain_err;
177 /* res_tx_domain_err is 64 b */
178 u64 res_tx_no_perm;
179 /* res_tx_no_perm is 64 b */
180 u64 res_tx_range_err;
181 /* res_tx_range_err is 64 b */
182 u64 res_irrq_oflow;
183 /* res_irrq_oflow is 64 b */
184 u64 res_unsup_opcode;
185 /* res_unsup_opcode is 64 b */
186 u64 res_unaligned_atomic;
187 /* res_unaligned_atomic is 64 b */
188 u64 res_rem_inv_err;
189 /* res_rem_inv_err is 64 b */
190 u64 res_mem_error;
191 /* res_mem_error is 64 b */
192 u64 res_srq_err;
193 /* res_srq_err is 64 b */
194 u64 res_cmp_err;
195 /* res_cmp_err is 64 b */
196 u64 res_invalid_dup_rkey;
197 /* res_invalid_dup_rkey is 64 b */
198 u64 res_wqe_format_err;
199 /* res_wqe_format_err is 64 b */
200 u64 res_cq_load_err;
201 /* res_cq_load_err is 64 b */
202 u64 res_srq_load_err;
203 /* res_srq_load_err is 64 b */
204 u64 res_tx_pci_err;
205 /* res_tx_pci_err is 64 b */
206 u64 res_rx_pci_err;
207 /* res_rx_pci_err is 64 b */
208};
209
130int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res, 210int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
131 struct bnxt_qplib_sgid_tbl *sgid_tbl, int index, 211 struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
132 struct bnxt_qplib_gid *gid); 212 struct bnxt_qplib_gid *gid);
@@ -147,7 +227,10 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
147 struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey, 227 struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
148 bool update); 228 bool update);
149int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, 229int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
150 struct bnxt_qplib_dev_attr *attr); 230 struct bnxt_qplib_dev_attr *attr, bool vf);
231int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
232 struct bnxt_qplib_rcfw *rcfw,
233 struct bnxt_qplib_ctx *ctx);
151int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah); 234int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah);
152int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah); 235int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah);
153int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, 236int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res,
@@ -155,7 +238,7 @@ int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res,
155int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw, 238int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
156 bool block); 239 bool block);
157int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, 240int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
158 u64 *pbl_tbl, int num_pbls, bool block); 241 u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size);
159int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr); 242int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr);
160int bnxt_qplib_alloc_fast_reg_mr(struct bnxt_qplib_res *res, 243int bnxt_qplib_alloc_fast_reg_mr(struct bnxt_qplib_res *res,
161 struct bnxt_qplib_mrw *mr, int max); 244 struct bnxt_qplib_mrw *mr, int max);
@@ -164,4 +247,6 @@ int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
164int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res, 247int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res,
165 struct bnxt_qplib_frpl *frpl); 248 struct bnxt_qplib_frpl *frpl);
166int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids); 249int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids);
250int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
251 struct bnxt_qplib_roce_stats *stats);
167#endif /* __BNXT_QPLIB_SP_H__*/ 252#endif /* __BNXT_QPLIB_SP_H__*/
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index c3cba6063a03..2d7ea096a247 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -954,6 +954,7 @@ struct cmdq_base {
954 #define CMDQ_BASE_OPCODE_QUERY_VERSION 0x8bUL 954 #define CMDQ_BASE_OPCODE_QUERY_VERSION 0x8bUL
955 #define CMDQ_BASE_OPCODE_MODIFY_CC 0x8cUL 955 #define CMDQ_BASE_OPCODE_MODIFY_CC 0x8cUL
956 #define CMDQ_BASE_OPCODE_QUERY_CC 0x8dUL 956 #define CMDQ_BASE_OPCODE_QUERY_CC 0x8dUL
957 #define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS 0x8eUL
957 u8 cmd_size; 958 u8 cmd_size;
958 __le16 flags; 959 __le16 flags;
959 __le16 cookie; 960 __le16 cookie;
@@ -1383,8 +1384,20 @@ struct cmdq_register_mr {
1383 #define CMDQ_REGISTER_MR_LVL_LVL_0 0x0UL 1384 #define CMDQ_REGISTER_MR_LVL_LVL_0 0x0UL
1384 #define CMDQ_REGISTER_MR_LVL_LVL_1 0x1UL 1385 #define CMDQ_REGISTER_MR_LVL_LVL_1 0x1UL
1385 #define CMDQ_REGISTER_MR_LVL_LVL_2 0x2UL 1386 #define CMDQ_REGISTER_MR_LVL_LVL_2 0x2UL
1387 #define CMDQ_REGISTER_MR_LVL_LAST CMDQ_REGISTER_MR_LVL_LVL_2
1386 #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_MASK 0x7cUL 1388 #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_MASK 0x7cUL
1387 #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_SFT 2 1389 #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_SFT 2
1390 #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_4K (0xcUL << 2)
1391 #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_8K (0xdUL << 2)
1392 #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_64K (0x10UL << 2)
1393 #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_256K (0x12UL << 2)
1394 #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_1M (0x14UL << 2)
1395 #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_2M (0x15UL << 2)
1396 #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_4M (0x16UL << 2)
1397 #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_1G (0x1eUL << 2)
1398 #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_LAST \
1399 CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_1G
1400 #define CMDQ_REGISTER_MR_UNUSED1 0x80UL
1388 u8 access; 1401 u8 access;
1389 #define CMDQ_REGISTER_MR_ACCESS_LOCAL_WRITE 0x1UL 1402 #define CMDQ_REGISTER_MR_ACCESS_LOCAL_WRITE 0x1UL
1390 #define CMDQ_REGISTER_MR_ACCESS_REMOTE_READ 0x2UL 1403 #define CMDQ_REGISTER_MR_ACCESS_REMOTE_READ 0x2UL
@@ -1392,7 +1405,21 @@ struct cmdq_register_mr {
1392 #define CMDQ_REGISTER_MR_ACCESS_REMOTE_ATOMIC 0x8UL 1405 #define CMDQ_REGISTER_MR_ACCESS_REMOTE_ATOMIC 0x8UL
1393 #define CMDQ_REGISTER_MR_ACCESS_MW_BIND 0x10UL 1406 #define CMDQ_REGISTER_MR_ACCESS_MW_BIND 0x10UL
1394 #define CMDQ_REGISTER_MR_ACCESS_ZERO_BASED 0x20UL 1407 #define CMDQ_REGISTER_MR_ACCESS_ZERO_BASED 0x20UL
1395 __le16 unused_1; 1408 __le16 log2_pbl_pg_size;
1409 #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_MASK 0x1fUL
1410 #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_SFT 0
1411 #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4K 0xcUL
1412 #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_8K 0xdUL
1413 #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_64K 0x10UL
1414 #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_256K 0x12UL
1415 #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1M 0x14UL
1416 #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_2M 0x15UL
1417 #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4M 0x16UL
1418 #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G 0x1eUL
1419 #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_LAST \
1420 CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G
1421 #define CMDQ_REGISTER_MR_UNUSED11_MASK 0xffe0UL
1422 #define CMDQ_REGISTER_MR_UNUSED11_SFT 5
1396 __le32 key; 1423 __le32 key;
1397 __le64 pbl; 1424 __le64 pbl;
1398 __le64 va; 1425 __le64 va;
@@ -1799,6 +1826,16 @@ struct cmdq_set_func_resources {
1799 u8 resp_size; 1826 u8 resp_size;
1800 u8 reserved8; 1827 u8 reserved8;
1801 __le64 resp_addr; 1828 __le64 resp_addr;
1829 __le32 number_of_qp;
1830 __le32 number_of_mrw;
1831 __le32 number_of_srq;
1832 __le32 number_of_cq;
1833 __le32 max_qp_per_vf;
1834 __le32 max_mrw_per_vf;
1835 __le32 max_srq_per_vf;
1836 __le32 max_cq_per_vf;
1837 __le32 max_gid_per_vf;
1838 __le32 stat_ctx_id;
1802}; 1839};
1803 1840
1804/* Read hardware resource context command (24 bytes) */ 1841/* Read hardware resource context command (24 bytes) */
@@ -2013,6 +2050,20 @@ struct creq_modify_qp_resp {
2013 __le16 reserved48[3]; 2050 __le16 reserved48[3];
2014}; 2051};
2015 2052
2053/* cmdq_query_roce_stats (size:128b/16B) */
2054struct cmdq_query_roce_stats {
2055 u8 opcode;
2056 #define CMDQ_QUERY_ROCE_STATS_OPCODE_QUERY_ROCE_STATS 0x8eUL
2057 #define CMDQ_QUERY_ROCE_STATS_OPCODE_LAST \
2058 CMDQ_QUERY_ROCE_STATS_OPCODE_QUERY_ROCE_STATS
2059 u8 cmd_size;
2060 __le16 flags;
2061 __le16 cookie;
2062 u8 resp_size;
2063 u8 reserved8;
2064 __le64 resp_addr;
2065};
2066
2016/* Query QP command response (16 bytes) */ 2067/* Query QP command response (16 bytes) */
2017struct creq_query_qp_resp { 2068struct creq_query_qp_resp {
2018 u8 type; 2069 u8 type;
@@ -2783,6 +2834,80 @@ struct creq_query_cc_resp_sb {
2783 __le64 reserved64_1; 2834 __le64 reserved64_1;
2784}; 2835};
2785 2836
2837/* creq_query_roce_stats_resp (size:128b/16B) */
2838struct creq_query_roce_stats_resp {
2839 u8 type;
2840 #define CREQ_QUERY_ROCE_STATS_RESP_TYPE_MASK 0x3fUL
2841 #define CREQ_QUERY_ROCE_STATS_RESP_TYPE_SFT 0
2842 #define CREQ_QUERY_ROCE_STATS_RESP_TYPE_QP_EVENT 0x38UL
2843 #define CREQ_QUERY_ROCE_STATS_RESP_TYPE_LAST \
2844 CREQ_QUERY_ROCE_STATS_RESP_TYPE_QP_EVENT
2845 u8 status;
2846 __le16 cookie;
2847 __le32 size;
2848 u8 v;
2849 #define CREQ_QUERY_ROCE_STATS_RESP_V 0x1UL
2850 u8 event;
2851 #define CREQ_QUERY_ROCE_STATS_RESP_EVENT_QUERY_ROCE_STATS 0x8eUL
2852 #define CREQ_QUERY_ROCE_STATS_RESP_EVENT_LAST \
2853 CREQ_QUERY_ROCE_STATS_RESP_EVENT_QUERY_ROCE_STATS
2854 u8 reserved48[6];
2855};
2856
2857/* creq_query_roce_stats_resp_sb (size:2624b/328B) */
2858struct creq_query_roce_stats_resp_sb {
2859 u8 opcode;
2860 #define CREQ_QUERY_ROCE_STATS_RESP_SB_OPCODE_QUERY_ROCE_STATS 0x8eUL
2861 #define CREQ_QUERY_ROCE_STATS_RESP_SB_OPCODE_LAST \
2862 CREQ_QUERY_ROCE_STATS_RESP_SB_OPCODE_QUERY_ROCE_STATS
2863 u8 status;
2864 __le16 cookie;
2865 __le16 flags;
2866 u8 resp_size;
2867 u8 rsvd;
2868 __le32 num_counters;
2869 __le32 rsvd1;
2870 __le64 to_retransmits;
2871 __le64 seq_err_naks_rcvd;
2872 __le64 max_retry_exceeded;
2873 __le64 rnr_naks_rcvd;
2874 __le64 missing_resp;
2875 __le64 unrecoverable_err;
2876 __le64 bad_resp_err;
2877 __le64 local_qp_op_err;
2878 __le64 local_protection_err;
2879 __le64 mem_mgmt_op_err;
2880 __le64 remote_invalid_req_err;
2881 __le64 remote_access_err;
2882 __le64 remote_op_err;
2883 __le64 dup_req;
2884 __le64 res_exceed_max;
2885 __le64 res_length_mismatch;
2886 __le64 res_exceeds_wqe;
2887 __le64 res_opcode_err;
2888 __le64 res_rx_invalid_rkey;
2889 __le64 res_rx_domain_err;
2890 __le64 res_rx_no_perm;
2891 __le64 res_rx_range_err;
2892 __le64 res_tx_invalid_rkey;
2893 __le64 res_tx_domain_err;
2894 __le64 res_tx_no_perm;
2895 __le64 res_tx_range_err;
2896 __le64 res_irrq_oflow;
2897 __le64 res_unsup_opcode;
2898 __le64 res_unaligned_atomic;
2899 __le64 res_rem_inv_err;
2900 __le64 res_mem_error;
2901 __le64 res_srq_err;
2902 __le64 res_cmp_err;
2903 __le64 res_invalid_dup_rkey;
2904 __le64 res_wqe_format_err;
2905 __le64 res_cq_load_err;
2906 __le64 res_srq_load_err;
2907 __le64 res_tx_pci_err;
2908 __le64 res_rx_pci_err;
2909};
2910
2786/* QP error notification event (16 bytes) */ 2911/* QP error notification event (16 bytes) */
2787struct creq_qp_error_notification { 2912struct creq_qp_error_notification {
2788 u8 type; 2913 u8 type;
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 21db3b48a617..4cf17c650c36 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -257,8 +257,8 @@ static void set_emss(struct c4iw_ep *ep, u16 opt)
257 if (ep->emss < 128) 257 if (ep->emss < 128)
258 ep->emss = 128; 258 ep->emss = 128;
259 if (ep->emss & 7) 259 if (ep->emss & 7)
260 pr_warn("Warning: misaligned mtu idx %u mss %u emss=%u\n", 260 pr_debug("Warning: misaligned mtu idx %u mss %u emss=%u\n",
261 TCPOPT_MSS_G(opt), ep->mss, ep->emss); 261 TCPOPT_MSS_G(opt), ep->mss, ep->emss);
262 pr_debug("mss_idx %u mss %u emss=%u\n", TCPOPT_MSS_G(opt), ep->mss, 262 pr_debug("mss_idx %u mss %u emss=%u\n", TCPOPT_MSS_G(opt), ep->mss,
263 ep->emss); 263 ep->emss);
264} 264}
@@ -2733,9 +2733,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2733 return 0; 2733 return 0;
2734 2734
2735 if (cxgb_is_neg_adv(req->status)) { 2735 if (cxgb_is_neg_adv(req->status)) {
2736 pr_warn("%s Negative advice on abort- tid %u status %d (%s)\n", 2736 pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
2737 __func__, ep->hwtid, req->status, 2737 ep->hwtid, req->status, neg_adv_str(req->status));
2738 neg_adv_str(req->status));
2739 ep->stats.abort_neg_adv++; 2738 ep->stats.abort_neg_adv++;
2740 mutex_lock(&dev->rdev.stats.lock); 2739 mutex_lock(&dev->rdev.stats.lock);
2741 dev->rdev.stats.neg_adv++; 2740 dev->rdev.stats.neg_adv++;
@@ -3567,8 +3566,8 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
3567 case MORIBUND: 3566 case MORIBUND:
3568 case ABORTING: 3567 case ABORTING:
3569 case DEAD: 3568 case DEAD:
3570 pr_info("%s ignoring disconnect ep %p state %u\n", 3569 pr_debug("ignoring disconnect ep %p state %u\n",
3571 __func__, ep, ep->com.state); 3570 ep, ep->com.state);
3572 break; 3571 break;
3573 default: 3572 default:
3574 WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); 3573 WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
@@ -4097,9 +4096,15 @@ static void process_work(struct work_struct *work)
4097 dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); 4096 dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
4098 opcode = rpl->ot.opcode; 4097 opcode = rpl->ot.opcode;
4099 4098
4100 ret = work_handlers[opcode](dev, skb); 4099 if (opcode >= ARRAY_SIZE(work_handlers) ||
4101 if (!ret) 4100 !work_handlers[opcode]) {
4101 pr_err("No handler for opcode 0x%x.\n", opcode);
4102 kfree_skb(skb); 4102 kfree_skb(skb);
4103 } else {
4104 ret = work_handlers[opcode](dev, skb);
4105 if (!ret)
4106 kfree_skb(skb);
4107 }
4103 process_timedout_eps(); 4108 process_timedout_eps();
4104 } 4109 }
4105} 4110}
@@ -4201,8 +4206,8 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
4201 return 0; 4206 return 0;
4202 } 4207 }
4203 if (cxgb_is_neg_adv(req->status)) { 4208 if (cxgb_is_neg_adv(req->status)) {
4204 pr_warn("%s Negative advice on abort- tid %u status %d (%s)\n", 4209 pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
4205 __func__, ep->hwtid, req->status, 4210 ep->hwtid, req->status,
4206 neg_adv_str(req->status)); 4211 neg_adv_str(req->status));
4207 goto out; 4212 goto out;
4208 } 4213 }
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index af77d128d242..7a9d0de89d6a 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -66,7 +66,7 @@ MODULE_PARM_DESC(c4iw_wr_log_size_order,
66 66
67static LIST_HEAD(uld_ctx_list); 67static LIST_HEAD(uld_ctx_list);
68static DEFINE_MUTEX(dev_mutex); 68static DEFINE_MUTEX(dev_mutex);
69struct workqueue_struct *reg_workq; 69static struct workqueue_struct *reg_workq;
70 70
71#define DB_FC_RESUME_SIZE 64 71#define DB_FC_RESUME_SIZE 64
72#define DB_FC_RESUME_DELAY 1 72#define DB_FC_RESUME_DELAY 1
@@ -108,19 +108,19 @@ void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe)
108 idx = (atomic_inc_return(&wq->rdev->wr_log_idx) - 1) & 108 idx = (atomic_inc_return(&wq->rdev->wr_log_idx) - 1) &
109 (wq->rdev->wr_log_size - 1); 109 (wq->rdev->wr_log_size - 1);
110 le.poll_sge_ts = cxgb4_read_sge_timestamp(wq->rdev->lldi.ports[0]); 110 le.poll_sge_ts = cxgb4_read_sge_timestamp(wq->rdev->lldi.ports[0]);
111 getnstimeofday(&le.poll_host_ts); 111 le.poll_host_time = ktime_get();
112 le.valid = 1; 112 le.valid = 1;
113 le.cqe_sge_ts = CQE_TS(cqe); 113 le.cqe_sge_ts = CQE_TS(cqe);
114 if (SQ_TYPE(cqe)) { 114 if (SQ_TYPE(cqe)) {
115 le.qid = wq->sq.qid; 115 le.qid = wq->sq.qid;
116 le.opcode = CQE_OPCODE(cqe); 116 le.opcode = CQE_OPCODE(cqe);
117 le.post_host_ts = wq->sq.sw_sq[wq->sq.cidx].host_ts; 117 le.post_host_time = wq->sq.sw_sq[wq->sq.cidx].host_time;
118 le.post_sge_ts = wq->sq.sw_sq[wq->sq.cidx].sge_ts; 118 le.post_sge_ts = wq->sq.sw_sq[wq->sq.cidx].sge_ts;
119 le.wr_id = CQE_WRID_SQ_IDX(cqe); 119 le.wr_id = CQE_WRID_SQ_IDX(cqe);
120 } else { 120 } else {
121 le.qid = wq->rq.qid; 121 le.qid = wq->rq.qid;
122 le.opcode = FW_RI_RECEIVE; 122 le.opcode = FW_RI_RECEIVE;
123 le.post_host_ts = wq->rq.sw_rq[wq->rq.cidx].host_ts; 123 le.post_host_time = wq->rq.sw_rq[wq->rq.cidx].host_time;
124 le.post_sge_ts = wq->rq.sw_rq[wq->rq.cidx].sge_ts; 124 le.post_sge_ts = wq->rq.sw_rq[wq->rq.cidx].sge_ts;
125 le.wr_id = CQE_WRID_MSN(cqe); 125 le.wr_id = CQE_WRID_MSN(cqe);
126 } 126 }
@@ -130,9 +130,9 @@ void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe)
130static int wr_log_show(struct seq_file *seq, void *v) 130static int wr_log_show(struct seq_file *seq, void *v)
131{ 131{
132 struct c4iw_dev *dev = seq->private; 132 struct c4iw_dev *dev = seq->private;
133 struct timespec prev_ts = {0, 0}; 133 ktime_t prev_time;
134 struct wr_log_entry *lep; 134 struct wr_log_entry *lep;
135 int prev_ts_set = 0; 135 int prev_time_set = 0;
136 int idx, end; 136 int idx, end;
137 137
138#define ts2ns(ts) div64_u64((ts) * dev->rdev.lldi.cclk_ps, 1000) 138#define ts2ns(ts) div64_u64((ts) * dev->rdev.lldi.cclk_ps, 1000)
@@ -145,33 +145,29 @@ static int wr_log_show(struct seq_file *seq, void *v)
145 lep = &dev->rdev.wr_log[idx]; 145 lep = &dev->rdev.wr_log[idx];
146 while (idx != end) { 146 while (idx != end) {
147 if (lep->valid) { 147 if (lep->valid) {
148 if (!prev_ts_set) { 148 if (!prev_time_set) {
149 prev_ts_set = 1; 149 prev_time_set = 1;
150 prev_ts = lep->poll_host_ts; 150 prev_time = lep->poll_host_time;
151 } 151 }
152 seq_printf(seq, "%04u: sec %lu nsec %lu qid %u opcode " 152 seq_printf(seq, "%04u: nsec %llu qid %u opcode "
153 "%u %s 0x%x host_wr_delta sec %lu nsec %lu " 153 "%u %s 0x%x host_wr_delta nsec %llu "
154 "post_sge_ts 0x%llx cqe_sge_ts 0x%llx " 154 "post_sge_ts 0x%llx cqe_sge_ts 0x%llx "
155 "poll_sge_ts 0x%llx post_poll_delta_ns %llu " 155 "poll_sge_ts 0x%llx post_poll_delta_ns %llu "
156 "cqe_poll_delta_ns %llu\n", 156 "cqe_poll_delta_ns %llu\n",
157 idx, 157 idx,
158 timespec_sub(lep->poll_host_ts, 158 ktime_to_ns(ktime_sub(lep->poll_host_time,
159 prev_ts).tv_sec, 159 prev_time)),
160 timespec_sub(lep->poll_host_ts,
161 prev_ts).tv_nsec,
162 lep->qid, lep->opcode, 160 lep->qid, lep->opcode,
163 lep->opcode == FW_RI_RECEIVE ? 161 lep->opcode == FW_RI_RECEIVE ?
164 "msn" : "wrid", 162 "msn" : "wrid",
165 lep->wr_id, 163 lep->wr_id,
166 timespec_sub(lep->poll_host_ts, 164 ktime_to_ns(ktime_sub(lep->poll_host_time,
167 lep->post_host_ts).tv_sec, 165 lep->post_host_time)),
168 timespec_sub(lep->poll_host_ts,
169 lep->post_host_ts).tv_nsec,
170 lep->post_sge_ts, lep->cqe_sge_ts, 166 lep->post_sge_ts, lep->cqe_sge_ts,
171 lep->poll_sge_ts, 167 lep->poll_sge_ts,
172 ts2ns(lep->poll_sge_ts - lep->post_sge_ts), 168 ts2ns(lep->poll_sge_ts - lep->post_sge_ts),
173 ts2ns(lep->poll_sge_ts - lep->cqe_sge_ts)); 169 ts2ns(lep->poll_sge_ts - lep->cqe_sge_ts));
174 prev_ts = lep->poll_host_ts; 170 prev_time = lep->poll_host_time;
175 } 171 }
176 idx++; 172 idx++;
177 if (idx > (dev->rdev.wr_log_size - 1)) 173 if (idx > (dev->rdev.wr_log_size - 1))
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index a252d5c40ae3..3e9d8b277ab9 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -236,7 +236,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid)
236 if (atomic_dec_and_test(&chp->refcnt)) 236 if (atomic_dec_and_test(&chp->refcnt))
237 wake_up(&chp->wait); 237 wake_up(&chp->wait);
238 } else { 238 } else {
239 pr_warn("%s unknown cqid 0x%x\n", __func__, qid); 239 pr_debug("unknown cqid 0x%x\n", qid);
240 spin_unlock_irqrestore(&dev->lock, flag); 240 spin_unlock_irqrestore(&dev->lock, flag);
241 } 241 }
242 return 0; 242 return 0;
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 65dd3726ca02..cc929002c05e 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -153,8 +153,8 @@ struct c4iw_hw_queue {
153}; 153};
154 154
155struct wr_log_entry { 155struct wr_log_entry {
156 struct timespec post_host_ts; 156 ktime_t post_host_time;
157 struct timespec poll_host_ts; 157 ktime_t poll_host_time;
158 u64 post_sge_ts; 158 u64 post_sge_ts;
159 u64 cqe_sge_ts; 159 u64 cqe_sge_ts;
160 u64 poll_sge_ts; 160 u64 poll_sge_ts;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index d5c92fc520d6..de77b6027d69 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1042,7 +1042,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1042 if (c4iw_wr_log) { 1042 if (c4iw_wr_log) {
1043 swsqe->sge_ts = cxgb4_read_sge_timestamp( 1043 swsqe->sge_ts = cxgb4_read_sge_timestamp(
1044 qhp->rhp->rdev.lldi.ports[0]); 1044 qhp->rhp->rdev.lldi.ports[0]);
1045 getnstimeofday(&swsqe->host_ts); 1045 swsqe->host_time = ktime_get();
1046 } 1046 }
1047 1047
1048 init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16); 1048 init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
@@ -1117,8 +1117,8 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1117 qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].sge_ts = 1117 qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].sge_ts =
1118 cxgb4_read_sge_timestamp( 1118 cxgb4_read_sge_timestamp(
1119 qhp->rhp->rdev.lldi.ports[0]); 1119 qhp->rhp->rdev.lldi.ports[0]);
1120 getnstimeofday( 1120 qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].host_time =
1121 &qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].host_ts); 1121 ktime_get();
1122 } 1122 }
1123 1123
1124 wqe->recv.opcode = FW_RI_RECV_WR; 1124 wqe->recv.opcode = FW_RI_RECV_WR;
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 79e8ee12c391..8369c7c8de83 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -277,7 +277,7 @@ struct t4_swsqe {
277 int signaled; 277 int signaled;
278 u16 idx; 278 u16 idx;
279 int flushed; 279 int flushed;
280 struct timespec host_ts; 280 ktime_t host_time;
281 u64 sge_ts; 281 u64 sge_ts;
282}; 282};
283 283
@@ -318,7 +318,7 @@ struct t4_sq {
318 318
319struct t4_swrqe { 319struct t4_swrqe {
320 u64 wr_id; 320 u64 wr_id;
321 struct timespec host_ts; 321 ktime_t host_time;
322 u64 sge_ts; 322 u64 sge_ts;
323}; 323};
324 324
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 4f057e8ffe50..6660f920f42e 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -6518,11 +6518,12 @@ static void _dc_start(struct hfi1_devdata *dd)
6518 if (!dd->dc_shutdown) 6518 if (!dd->dc_shutdown)
6519 return; 6519 return;
6520 6520
6521 /* 6521 /* Take the 8051 out of reset */
6522 * Take the 8051 out of reset, wait until 8051 is ready, and set host 6522 write_csr(dd, DC_DC8051_CFG_RST, 0ull);
6523 * version bit. 6523 /* Wait until 8051 is ready */
6524 */ 6524 if (wait_fm_ready(dd, TIMEOUT_8051_START))
6525 release_and_wait_ready_8051_firmware(dd); 6525 dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
6526 __func__);
6526 6527
6527 /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */ 6528 /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
6528 write_csr(dd, DCC_CFG_RESET, 0x10); 6529 write_csr(dd, DCC_CFG_RESET, 0x10);
@@ -8564,23 +8565,27 @@ int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data)
8564} 8565}
8565 8566
8566/* 8567/*
8567 * If the 8051 is in reset mode (dd->dc_shutdown == 1), this function
8568 * will still continue executing.
8569 *
8570 * Returns: 8568 * Returns:
8571 * < 0 = Linux error, not able to get access 8569 * < 0 = Linux error, not able to get access
8572 * > 0 = 8051 command RETURN_CODE 8570 * > 0 = 8051 command RETURN_CODE
8573 */ 8571 */
8574static int _do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data, 8572static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
8575 u64 *out_data) 8573 u64 *out_data)
8576{ 8574{
8577 u64 reg, completed; 8575 u64 reg, completed;
8578 int return_code; 8576 int return_code;
8579 unsigned long timeout; 8577 unsigned long timeout;
8580 8578
8581 lockdep_assert_held(&dd->dc8051_lock);
8582 hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data); 8579 hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
8583 8580
8581 mutex_lock(&dd->dc8051_lock);
8582
8583 /* We can't send any commands to the 8051 if it's in reset */
8584 if (dd->dc_shutdown) {
8585 return_code = -ENODEV;
8586 goto fail;
8587 }
8588
8584 /* 8589 /*
8585 * If an 8051 host command timed out previously, then the 8051 is 8590 * If an 8051 host command timed out previously, then the 8051 is
8586 * stuck. 8591 * stuck.
@@ -8681,29 +8686,6 @@ static int _do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
8681 write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0); 8686 write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
8682 8687
8683fail: 8688fail:
8684 return return_code;
8685}
8686
8687/*
8688 * Returns:
8689 * < 0 = Linux error, not able to get access
8690 * > 0 = 8051 command RETURN_CODE
8691 */
8692static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
8693 u64 *out_data)
8694{
8695 int return_code;
8696
8697 mutex_lock(&dd->dc8051_lock);
8698 /* We can't send any commands to the 8051 if it's in reset */
8699 if (dd->dc_shutdown) {
8700 return_code = -ENODEV;
8701 goto fail;
8702 }
8703
8704 return_code = _do_8051_command(dd, type, in_data, out_data);
8705
8706fail:
8707 mutex_unlock(&dd->dc8051_lock); 8689 mutex_unlock(&dd->dc8051_lock);
8708 return return_code; 8690 return return_code;
8709} 8691}
@@ -8713,17 +8695,16 @@ static int set_physical_link_state(struct hfi1_devdata *dd, u64 state)
8713 return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL); 8695 return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
8714} 8696}
8715 8697
8716static int _load_8051_config(struct hfi1_devdata *dd, u8 field_id, 8698int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
8717 u8 lane_id, u32 config_data) 8699 u8 lane_id, u32 config_data)
8718{ 8700{
8719 u64 data; 8701 u64 data;
8720 int ret; 8702 int ret;
8721 8703
8722 lockdep_assert_held(&dd->dc8051_lock);
8723 data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT 8704 data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT
8724 | (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT 8705 | (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT
8725 | (u64)config_data << LOAD_DATA_DATA_SHIFT; 8706 | (u64)config_data << LOAD_DATA_DATA_SHIFT;
8726 ret = _do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL); 8707 ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
8727 if (ret != HCMD_SUCCESS) { 8708 if (ret != HCMD_SUCCESS) {
8728 dd_dev_err(dd, 8709 dd_dev_err(dd,
8729 "load 8051 config: field id %d, lane %d, err %d\n", 8710 "load 8051 config: field id %d, lane %d, err %d\n",
@@ -8732,18 +8713,6 @@ static int _load_8051_config(struct hfi1_devdata *dd, u8 field_id,
8732 return ret; 8713 return ret;
8733} 8714}
8734 8715
8735int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
8736 u8 lane_id, u32 config_data)
8737{
8738 int return_code;
8739
8740 mutex_lock(&dd->dc8051_lock);
8741 return_code = _load_8051_config(dd, field_id, lane_id, config_data);
8742 mutex_unlock(&dd->dc8051_lock);
8743
8744 return return_code;
8745}
8746
8747/* 8716/*
8748 * Read the 8051 firmware "registers". Use the RAM directly. Always 8717 * Read the 8051 firmware "registers". Use the RAM directly. Always
8749 * set the result, even on error. 8718 * set the result, even on error.
@@ -8859,14 +8828,13 @@ int write_host_interface_version(struct hfi1_devdata *dd, u8 version)
8859 u32 frame; 8828 u32 frame;
8860 u32 mask; 8829 u32 mask;
8861 8830
8862 lockdep_assert_held(&dd->dc8051_lock);
8863 mask = (HOST_INTERFACE_VERSION_MASK << HOST_INTERFACE_VERSION_SHIFT); 8831 mask = (HOST_INTERFACE_VERSION_MASK << HOST_INTERFACE_VERSION_SHIFT);
8864 read_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, &frame); 8832 read_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, &frame);
8865 /* Clear, then set field */ 8833 /* Clear, then set field */
8866 frame &= ~mask; 8834 frame &= ~mask;
8867 frame |= ((u32)version << HOST_INTERFACE_VERSION_SHIFT); 8835 frame |= ((u32)version << HOST_INTERFACE_VERSION_SHIFT);
8868 return _load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, 8836 return load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG,
8869 frame); 8837 frame);
8870} 8838}
8871 8839
8872void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, 8840void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
@@ -9270,6 +9238,14 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
9270 if (ret != HCMD_SUCCESS) 9238 if (ret != HCMD_SUCCESS)
9271 goto set_local_link_attributes_fail; 9239 goto set_local_link_attributes_fail;
9272 9240
9241 ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION);
9242 if (ret != HCMD_SUCCESS) {
9243 dd_dev_err(dd,
9244 "Failed to set host interface version, return 0x%x\n",
9245 ret);
9246 goto set_local_link_attributes_fail;
9247 }
9248
9273 /* 9249 /*
9274 * DC supports continuous updates. 9250 * DC supports continuous updates.
9275 */ 9251 */
@@ -14944,9 +14920,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
14944 14920
14945 if (num_vls < HFI1_MIN_VLS_SUPPORTED || 14921 if (num_vls < HFI1_MIN_VLS_SUPPORTED ||
14946 num_vls > HFI1_MAX_VLS_SUPPORTED) { 14922 num_vls > HFI1_MAX_VLS_SUPPORTED) {
14947 hfi1_early_err(&pdev->dev, 14923 dd_dev_err(dd, "Invalid num_vls %u, using %u VLs\n",
14948 "Invalid num_vls %u, using %u VLs\n", 14924 num_vls, HFI1_MAX_VLS_SUPPORTED);
14949 num_vls, HFI1_MAX_VLS_SUPPORTED);
14950 num_vls = HFI1_MAX_VLS_SUPPORTED; 14925 num_vls = HFI1_MAX_VLS_SUPPORTED;
14951 } 14926 }
14952 ppd->vls_supported = num_vls; 14927 ppd->vls_supported = num_vls;
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 133e313feca4..21fca8ec5076 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -508,6 +508,7 @@
508#define DOWN_REMOTE_REASON_SHIFT 16 508#define DOWN_REMOTE_REASON_SHIFT 16
509#define DOWN_REMOTE_REASON_MASK 0xff 509#define DOWN_REMOTE_REASON_MASK 0xff
510 510
511#define HOST_INTERFACE_VERSION 1
511#define HOST_INTERFACE_VERSION_SHIFT 16 512#define HOST_INTERFACE_VERSION_SHIFT 16
512#define HOST_INTERFACE_VERSION_MASK 0xff 513#define HOST_INTERFACE_VERSION_MASK 0xff
513 514
@@ -713,7 +714,6 @@ void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
713 u8 *ver_patch); 714 u8 *ver_patch);
714int write_host_interface_version(struct hfi1_devdata *dd, u8 version); 715int write_host_interface_version(struct hfi1_devdata *dd, u8 version);
715void read_guid(struct hfi1_devdata *dd); 716void read_guid(struct hfi1_devdata *dd);
716int release_and_wait_ready_8051_firmware(struct hfi1_devdata *dd);
717int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout); 717int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout);
718void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason, 718void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
719 u8 neigh_reason, u8 rem_reason); 719 u8 neigh_reason, u8 rem_reason);
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 4f65ac671044..067b29f35f21 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -159,22 +159,6 @@ static int hfi1_caps_get(char *buffer, const struct kernel_param *kp)
159 return scnprintf(buffer, PAGE_SIZE, "0x%lx", cap_mask); 159 return scnprintf(buffer, PAGE_SIZE, "0x%lx", cap_mask);
160} 160}
161 161
162const char *get_unit_name(int unit)
163{
164 static char iname[16];
165
166 snprintf(iname, sizeof(iname), DRIVER_NAME "_%u", unit);
167 return iname;
168}
169
170const char *get_card_name(struct rvt_dev_info *rdi)
171{
172 struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
173 struct hfi1_devdata *dd = container_of(ibdev,
174 struct hfi1_devdata, verbs_dev);
175 return get_unit_name(dd->unit);
176}
177
178struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi) 162struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi)
179{ 163{
180 struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi); 164 struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 98868df78a7e..2b57ba70ddd6 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -68,7 +68,6 @@
68#define ALT_FW_FABRIC_NAME "hfi1_fabric_d.fw" 68#define ALT_FW_FABRIC_NAME "hfi1_fabric_d.fw"
69#define ALT_FW_SBUS_NAME "hfi1_sbus_d.fw" 69#define ALT_FW_SBUS_NAME "hfi1_sbus_d.fw"
70#define ALT_FW_PCIE_NAME "hfi1_pcie_d.fw" 70#define ALT_FW_PCIE_NAME "hfi1_pcie_d.fw"
71#define HOST_INTERFACE_VERSION 1
72 71
73MODULE_FIRMWARE(DEFAULT_FW_8051_NAME_ASIC); 72MODULE_FIRMWARE(DEFAULT_FW_8051_NAME_ASIC);
74MODULE_FIRMWARE(DEFAULT_FW_FABRIC_NAME); 73MODULE_FIRMWARE(DEFAULT_FW_FABRIC_NAME);
@@ -976,46 +975,6 @@ int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout)
976} 975}
977 976
978/* 977/*
979 * Clear all reset bits, releasing the 8051.
980 * Wait for firmware to be ready to accept host requests.
981 * Then, set host version bit.
982 *
983 * This function executes even if the 8051 is in reset mode when
984 * dd->dc_shutdown == 1.
985 *
986 * Expects dd->dc8051_lock to be held.
987 */
988int release_and_wait_ready_8051_firmware(struct hfi1_devdata *dd)
989{
990 int ret;
991
992 lockdep_assert_held(&dd->dc8051_lock);
993 /* clear all reset bits, releasing the 8051 */
994 write_csr(dd, DC_DC8051_CFG_RST, 0ull);
995
996 /*
997 * Wait for firmware to be ready to accept host
998 * requests.
999 */
1000 ret = wait_fm_ready(dd, TIMEOUT_8051_START);
1001 if (ret) {
1002 dd_dev_err(dd, "8051 start timeout, current FW state 0x%x\n",
1003 get_firmware_state(dd));
1004 return ret;
1005 }
1006
1007 ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION);
1008 if (ret != HCMD_SUCCESS) {
1009 dd_dev_err(dd,
1010 "Failed to set host interface version, return 0x%x\n",
1011 ret);
1012 return -EIO;
1013 }
1014
1015 return 0;
1016}
1017
1018/*
1019 * Load the 8051 firmware. 978 * Load the 8051 firmware.
1020 */ 979 */
1021static int load_8051_firmware(struct hfi1_devdata *dd, 980static int load_8051_firmware(struct hfi1_devdata *dd,
@@ -1080,22 +1039,31 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
1080 if (ret) 1039 if (ret)
1081 return ret; 1040 return ret;
1082 1041
1042 /* clear all reset bits, releasing the 8051 */
1043 write_csr(dd, DC_DC8051_CFG_RST, 0ull);
1044
1083 /* 1045 /*
1084 * Clear all reset bits, releasing the 8051.
1085 * DC reset step 5. Wait for firmware to be ready to accept host 1046 * DC reset step 5. Wait for firmware to be ready to accept host
1086 * requests. 1047 * requests.
1087 * Then, set host version bit.
1088 */ 1048 */
1089 mutex_lock(&dd->dc8051_lock); 1049 ret = wait_fm_ready(dd, TIMEOUT_8051_START);
1090 ret = release_and_wait_ready_8051_firmware(dd); 1050 if (ret) { /* timed out */
1091 mutex_unlock(&dd->dc8051_lock); 1051 dd_dev_err(dd, "8051 start timeout, current state 0x%x\n",
1092 if (ret) 1052 get_firmware_state(dd));
1093 return ret; 1053 return -ETIMEDOUT;
1054 }
1094 1055
1095 read_misc_status(dd, &ver_major, &ver_minor, &ver_patch); 1056 read_misc_status(dd, &ver_major, &ver_minor, &ver_patch);
1096 dd_dev_info(dd, "8051 firmware version %d.%d.%d\n", 1057 dd_dev_info(dd, "8051 firmware version %d.%d.%d\n",
1097 (int)ver_major, (int)ver_minor, (int)ver_patch); 1058 (int)ver_major, (int)ver_minor, (int)ver_patch);
1098 dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch); 1059 dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch);
1060 ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION);
1061 if (ret != HCMD_SUCCESS) {
1062 dd_dev_err(dd,
1063 "Failed to set host interface version, return 0x%x\n",
1064 ret);
1065 return -EIO;
1066 }
1099 1067
1100 return 0; 1068 return 0;
1101} 1069}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 8ce9118d4a7f..b42c22292597 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1623,7 +1623,7 @@ static int ingress_pkey_table_search(struct hfi1_pportdata *ppd, u16 pkey)
1623 * the 'error info' for this failure. 1623 * the 'error info' for this failure.
1624 */ 1624 */
1625static void ingress_pkey_table_fail(struct hfi1_pportdata *ppd, u16 pkey, 1625static void ingress_pkey_table_fail(struct hfi1_pportdata *ppd, u16 pkey,
1626 u16 slid) 1626 u32 slid)
1627{ 1627{
1628 struct hfi1_devdata *dd = ppd->dd; 1628 struct hfi1_devdata *dd = ppd->dd;
1629 1629
@@ -1971,8 +1971,6 @@ int get_platform_config_field(struct hfi1_devdata *dd,
1971 table_type, int table_index, int field_index, 1971 table_type, int table_index, int field_index,
1972 u32 *data, u32 len); 1972 u32 *data, u32 len);
1973 1973
1974const char *get_unit_name(int unit);
1975const char *get_card_name(struct rvt_dev_info *rdi);
1976struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi); 1974struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi);
1977 1975
1978/* 1976/*
@@ -2122,39 +2120,42 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
2122 2120
2123#define dd_dev_emerg(dd, fmt, ...) \ 2121#define dd_dev_emerg(dd, fmt, ...) \
2124 dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \ 2122 dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \
2125 get_unit_name((dd)->unit), ##__VA_ARGS__) 2123 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
2126 2124
2127#define dd_dev_err(dd, fmt, ...) \ 2125#define dd_dev_err(dd, fmt, ...) \
2128 dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ 2126 dev_err(&(dd)->pcidev->dev, "%s: " fmt, \
2129 get_unit_name((dd)->unit), ##__VA_ARGS__) 2127 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
2130 2128
2131#define dd_dev_err_ratelimited(dd, fmt, ...) \ 2129#define dd_dev_err_ratelimited(dd, fmt, ...) \
2132 dev_err_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \ 2130 dev_err_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
2133 get_unit_name((dd)->unit), ##__VA_ARGS__) 2131 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), \
2132 ##__VA_ARGS__)
2134 2133
2135#define dd_dev_warn(dd, fmt, ...) \ 2134#define dd_dev_warn(dd, fmt, ...) \
2136 dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \ 2135 dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \
2137 get_unit_name((dd)->unit), ##__VA_ARGS__) 2136 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
2138 2137
2139#define dd_dev_warn_ratelimited(dd, fmt, ...) \ 2138#define dd_dev_warn_ratelimited(dd, fmt, ...) \
2140 dev_warn_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \ 2139 dev_warn_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
2141 get_unit_name((dd)->unit), ##__VA_ARGS__) 2140 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), \
2141 ##__VA_ARGS__)
2142 2142
2143#define dd_dev_info(dd, fmt, ...) \ 2143#define dd_dev_info(dd, fmt, ...) \
2144 dev_info(&(dd)->pcidev->dev, "%s: " fmt, \ 2144 dev_info(&(dd)->pcidev->dev, "%s: " fmt, \
2145 get_unit_name((dd)->unit), ##__VA_ARGS__) 2145 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
2146 2146
2147#define dd_dev_info_ratelimited(dd, fmt, ...) \ 2147#define dd_dev_info_ratelimited(dd, fmt, ...) \
2148 dev_info_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \ 2148 dev_info_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
2149 get_unit_name((dd)->unit), ##__VA_ARGS__) 2149 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), \
2150 ##__VA_ARGS__)
2150 2151
2151#define dd_dev_dbg(dd, fmt, ...) \ 2152#define dd_dev_dbg(dd, fmt, ...) \
2152 dev_dbg(&(dd)->pcidev->dev, "%s: " fmt, \ 2153 dev_dbg(&(dd)->pcidev->dev, "%s: " fmt, \
2153 get_unit_name((dd)->unit), ##__VA_ARGS__) 2154 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
2154 2155
2155#define hfi1_dev_porterr(dd, port, fmt, ...) \ 2156#define hfi1_dev_porterr(dd, port, fmt, ...) \
2156 dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \ 2157 dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \
2157 get_unit_name((dd)->unit), (port), ##__VA_ARGS__) 2158 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), (port), ##__VA_ARGS__)
2158 2159
2159/* 2160/*
2160 * this is used for formatting hw error messages... 2161 * this is used for formatting hw error messages...
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 8e3b3e7d829a..9b128268fb28 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1272,6 +1272,8 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
1272 "Could not allocate unit ID: error %d\n", -ret); 1272 "Could not allocate unit ID: error %d\n", -ret);
1273 goto bail; 1273 goto bail;
1274 } 1274 }
1275 rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
1276
1275 /* 1277 /*
1276 * Initialize all locks for the device. This needs to be as early as 1278 * Initialize all locks for the device. This needs to be as early as
1277 * possible so locks are usable. 1279 * possible so locks are usable.
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index cf8dba34fe30..34547a48a445 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -4348,11 +4348,7 @@ static int opa_local_smp_check(struct hfi1_ibport *ibp,
4348 */ 4348 */
4349 if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) 4349 if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY)
4350 return 0; 4350 return 0;
4351 /* 4351 ingress_pkey_table_fail(ppd, pkey, in_wc->slid);
4352 * On OPA devices it is okay to lose the upper 16 bits of LID as this
4353 * information is obtained elsewhere. Mask off the upper 16 bits.
4354 */
4355 ingress_pkey_table_fail(ppd, pkey, ib_lid_cpu16(0xFFFF & in_wc->slid));
4356 return 1; 4352 return 1;
4357} 4353}
4358 4354
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 4b01ccd895b4..5507910e8b8a 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -556,6 +556,8 @@ void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter)
556 struct sdma_engine *sde; 556 struct sdma_engine *sde;
557 struct send_context *send_context; 557 struct send_context *send_context;
558 struct rvt_ack_entry *e = NULL; 558 struct rvt_ack_entry *e = NULL;
559 struct rvt_srq *srq = qp->ibqp.srq ?
560 ibsrq_to_rvtsrq(qp->ibqp.srq) : NULL;
559 561
560 sde = qp_to_sdma_engine(qp, priv->s_sc); 562 sde = qp_to_sdma_engine(qp, priv->s_sc);
561 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 563 wqe = rvt_get_swqe_ptr(qp, qp->s_last);
@@ -563,7 +565,7 @@ void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter)
563 if (qp->s_ack_queue) 565 if (qp->s_ack_queue)
564 e = &qp->s_ack_queue[qp->s_tail_ack_queue]; 566 e = &qp->s_ack_queue[qp->s_tail_ack_queue];
565 seq_printf(s, 567 seq_printf(s,
566 "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d OS %x %x E %x %x %x\n", 568 "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d OS %x %x E %x %x %x RNR %d %s %d\n",
567 iter->n, 569 iter->n,
568 qp_idle(qp) ? "I" : "B", 570 qp_idle(qp) ? "I" : "B",
569 qp->ibqp.qp_num, 571 qp->ibqp.qp_num,
@@ -610,7 +612,11 @@ void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter)
610 /* ack queue information */ 612 /* ack queue information */
611 e ? e->opcode : 0, 613 e ? e->opcode : 0,
612 e ? e->psn : 0, 614 e ? e->psn : 0,
613 e ? e->lpsn : 0); 615 e ? e->lpsn : 0,
616 qp->r_min_rnr_timer,
617 srq ? "SRQ" : "RQ",
618 srq ? srq->rq.size : qp->r_rq.size
619 );
614} 620}
615 621
616void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp) 622void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 7eb5d50578ba..14cc212a21c7 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -841,11 +841,11 @@ static inline void hfi1_make_rc_ack_16B(struct rvt_qp *qp,
841 /* Convert dwords to flits */ 841 /* Convert dwords to flits */
842 len = (*hwords + *nwords) >> 1; 842 len = (*hwords + *nwords) >> 1;
843 843
844 hfi1_make_16b_hdr(hdr, 844 hfi1_make_16b_hdr(hdr, ppd->lid |
845 ppd->lid | rdma_ah_get_path_bits(&qp->remote_ah_attr), 845 (rdma_ah_get_path_bits(&qp->remote_ah_attr) &
846 ((1 << ppd->lmc) - 1)),
846 opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), 847 opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr),
847 16B), 848 16B), len, pkey, becn, 0, l4, sc5);
848 len, pkey, becn, 0, l4, sc5);
849 849
850 bth0 = pkey | (OP(ACKNOWLEDGE) << 24); 850 bth0 = pkey | (OP(ACKNOWLEDGE) << 24);
851 bth0 |= extra_bytes << 20; 851 bth0 |= extra_bytes << 20;
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index a38785e224cc..b8776a362a91 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1486,7 +1486,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
1486 props->max_mtu = mtu_to_enum((!valid_ib_mtu(hfi1_max_mtu) ? 1486 props->max_mtu = mtu_to_enum((!valid_ib_mtu(hfi1_max_mtu) ?
1487 4096 : hfi1_max_mtu), IB_MTU_4096); 1487 4096 : hfi1_max_mtu), IB_MTU_4096);
1488 props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : 1488 props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
1489 mtu_to_enum(ppd->ibmtu, IB_MTU_2048); 1489 mtu_to_enum(ppd->ibmtu, IB_MTU_4096);
1490 1490
1491 /* 1491 /*
1492 * sm_lid of 0xFFFF needs special handling so that it can 1492 * sm_lid of 0xFFFF needs special handling so that it can
@@ -1844,7 +1844,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
1844 struct hfi1_ibport *ibp = &ppd->ibport_data; 1844 struct hfi1_ibport *ibp = &ppd->ibport_data;
1845 unsigned i; 1845 unsigned i;
1846 int ret; 1846 int ret;
1847 size_t lcpysz = IB_DEVICE_NAME_MAX;
1848 1847
1849 for (i = 0; i < dd->num_pports; i++) 1848 for (i = 0; i < dd->num_pports; i++)
1850 init_ibport(ppd + i); 1849 init_ibport(ppd + i);
@@ -1872,8 +1871,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
1872 */ 1871 */
1873 if (!ib_hfi1_sys_image_guid) 1872 if (!ib_hfi1_sys_image_guid)
1874 ib_hfi1_sys_image_guid = ibdev->node_guid; 1873 ib_hfi1_sys_image_guid = ibdev->node_guid;
1875 lcpysz = strlcpy(ibdev->name, class_name(), lcpysz);
1876 strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
1877 ibdev->owner = THIS_MODULE; 1874 ibdev->owner = THIS_MODULE;
1878 ibdev->phys_port_cnt = dd->num_pports; 1875 ibdev->phys_port_cnt = dd->num_pports;
1879 ibdev->dev.parent = &dd->pcidev->dev; 1876 ibdev->dev.parent = &dd->pcidev->dev;
@@ -1893,7 +1890,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
1893 * Fill in rvt info object. 1890 * Fill in rvt info object.
1894 */ 1891 */
1895 dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files; 1892 dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files;
1896 dd->verbs_dev.rdi.driver_f.get_card_name = get_card_name;
1897 dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev; 1893 dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev;
1898 dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah; 1894 dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah;
1899 dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah; 1895 dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah;
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile
index ff426a625e13..97bf2cd1cacb 100644
--- a/drivers/infiniband/hw/hns/Makefile
+++ b/drivers/infiniband/hw/hns/Makefile
@@ -5,7 +5,7 @@
5ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3 5ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
6 6
7obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o 7obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o
8hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_eq.o hns_roce_pd.o \ 8hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
9 hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ 9 hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
10 hns_roce_cq.o hns_roce_alloc.o 10 hns_roce_cq.o hns_roce_alloc.o
11obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o 11obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 1085cb249bc1..9ebe839d8b24 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -103,6 +103,7 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
103 context->out_param = out_param; 103 context->out_param = out_param;
104 complete(&context->done); 104 complete(&context->done);
105} 105}
106EXPORT_SYMBOL_GPL(hns_roce_cmd_event);
106 107
107/* this should be called with "use_events" */ 108/* this should be called with "use_events" */
108static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, 109static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param,
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index b1c94223c28b..9549ae51a0dd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -88,6 +88,16 @@ enum {
88 HNS_ROCE_CMD_DESTROY_SRQC_BT0 = 0x38, 88 HNS_ROCE_CMD_DESTROY_SRQC_BT0 = 0x38,
89 HNS_ROCE_CMD_DESTROY_SRQC_BT1 = 0x39, 89 HNS_ROCE_CMD_DESTROY_SRQC_BT1 = 0x39,
90 HNS_ROCE_CMD_DESTROY_SRQC_BT2 = 0x3a, 90 HNS_ROCE_CMD_DESTROY_SRQC_BT2 = 0x3a,
91
92 /* EQC commands */
93 HNS_ROCE_CMD_CREATE_AEQC = 0x80,
94 HNS_ROCE_CMD_MODIFY_AEQC = 0x81,
95 HNS_ROCE_CMD_QUERY_AEQC = 0x82,
96 HNS_ROCE_CMD_DESTROY_AEQC = 0x83,
97 HNS_ROCE_CMD_CREATE_CEQC = 0x90,
98 HNS_ROCE_CMD_MODIFY_CEQC = 0x91,
99 HNS_ROCE_CMD_QUERY_CEQC = 0x92,
100 HNS_ROCE_CMD_DESTROY_CEQC = 0x93,
91}; 101};
92 102
93enum { 103enum {
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index 7ecb7a4147a8..dd67fafd0c40 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -376,6 +376,12 @@
376#define ROCEE_RX_CMQ_TAIL_REG 0x07024 376#define ROCEE_RX_CMQ_TAIL_REG 0x07024
377#define ROCEE_RX_CMQ_HEAD_REG 0x07028 377#define ROCEE_RX_CMQ_HEAD_REG 0x07028
378 378
379#define ROCEE_VF_MB_CFG0_REG 0x40
380#define ROCEE_VF_MB_STATUS_REG 0x58
381
382#define ROCEE_VF_EQ_DB_CFG0_REG 0x238
383#define ROCEE_VF_EQ_DB_CFG1_REG 0x23C
384
379#define ROCEE_VF_SMAC_CFG0_REG 0x12000 385#define ROCEE_VF_SMAC_CFG0_REG 0x12000
380#define ROCEE_VF_SMAC_CFG1_REG 0x12004 386#define ROCEE_VF_SMAC_CFG1_REG 0x12004
381 387
@@ -385,4 +391,9 @@
385#define ROCEE_VF_SGID_CFG3_REG 0x1000c 391#define ROCEE_VF_SGID_CFG3_REG 0x1000c
386#define ROCEE_VF_SGID_CFG4_REG 0x10010 392#define ROCEE_VF_SGID_CFG4_REG 0x10010
387 393
394#define ROCEE_VF_ABN_INT_CFG_REG 0x13000
395#define ROCEE_VF_ABN_INT_ST_REG 0x13004
396#define ROCEE_VF_ABN_INT_EN_REG 0x13008
397#define ROCEE_VF_EVENT_INT_EN_REG 0x1300c
398
388#endif /* _HNS_ROCE_COMMON_H */ 399#endif /* _HNS_ROCE_COMMON_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 2111b57a3489..bccc9b54c9ce 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -196,15 +196,14 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
196 if (ret) 196 if (ret)
197 dev_err(dev, "HW2SW_CQ failed (%d) for CQN %06lx\n", ret, 197 dev_err(dev, "HW2SW_CQ failed (%d) for CQN %06lx\n", ret,
198 hr_cq->cqn); 198 hr_cq->cqn);
199 if (hr_dev->eq_table.eq) { 199
200 /* Waiting interrupt process procedure carried out */ 200 /* Waiting interrupt process procedure carried out */
201 synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq); 201 synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq);
202 202
203 /* wait for all interrupt processed */ 203 /* wait for all interrupt processed */
204 if (atomic_dec_and_test(&hr_cq->refcount)) 204 if (atomic_dec_and_test(&hr_cq->refcount))
205 complete(&hr_cq->free); 205 complete(&hr_cq->free);
206 wait_for_completion(&hr_cq->free); 206 wait_for_completion(&hr_cq->free);
207 }
208 207
209 spin_lock_irq(&cq_table->lock); 208 spin_lock_irq(&cq_table->lock);
210 radix_tree_delete(&cq_table->tree, hr_cq->cqn); 209 radix_tree_delete(&cq_table->tree, hr_cq->cqn);
@@ -460,6 +459,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn)
460 ++cq->arm_sn; 459 ++cq->arm_sn;
461 cq->comp(cq); 460 cq->comp(cq);
462} 461}
462EXPORT_SYMBOL_GPL(hns_roce_cq_completion);
463 463
464void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) 464void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
465{ 465{
@@ -482,6 +482,7 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
482 if (atomic_dec_and_test(&cq->refcount)) 482 if (atomic_dec_and_test(&cq->refcount))
483 complete(&cq->free); 483 complete(&cq->free);
484} 484}
485EXPORT_SYMBOL_GPL(hns_roce_cq_event);
485 486
486int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) 487int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev)
487{ 488{
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index b154ce40cded..42c3b5a2d441 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -62,12 +62,16 @@
62#define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2 62#define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2
63#define HNS_ROCE_MIN_CQE_CNT 16 63#define HNS_ROCE_MIN_CQE_CNT 16
64 64
65#define HNS_ROCE_MAX_IRQ_NUM 34 65#define HNS_ROCE_MAX_IRQ_NUM 128
66 66
67#define HNS_ROCE_COMP_VEC_NUM 32 67#define EQ_ENABLE 1
68#define EQ_DISABLE 0
68 69
69#define HNS_ROCE_AEQE_VEC_NUM 1 70#define HNS_ROCE_CEQ 0
70#define HNS_ROCE_AEQE_OF_VEC_NUM 1 71#define HNS_ROCE_AEQ 1
72
73#define HNS_ROCE_CEQ_ENTRY_SIZE 0x4
74#define HNS_ROCE_AEQ_ENTRY_SIZE 0x10
71 75
72/* 4G/4K = 1M */ 76/* 4G/4K = 1M */
73#define HNS_ROCE_SL_SHIFT 28 77#define HNS_ROCE_SL_SHIFT 28
@@ -130,6 +134,7 @@ enum hns_roce_event {
130 HNS_ROCE_EVENT_TYPE_DB_OVERFLOW = 0x12, 134 HNS_ROCE_EVENT_TYPE_DB_OVERFLOW = 0x12,
131 HNS_ROCE_EVENT_TYPE_MB = 0x13, 135 HNS_ROCE_EVENT_TYPE_MB = 0x13,
132 HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW = 0x14, 136 HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW = 0x14,
137 HNS_ROCE_EVENT_TYPE_FLR = 0x15,
133}; 138};
134 139
135/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */ 140/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */
@@ -173,6 +178,7 @@ enum {
173enum { 178enum {
174 HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0), 179 HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0),
175 HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1), 180 HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1),
181 HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2)
176}; 182};
177 183
178enum hns_roce_mtt_type { 184enum hns_roce_mtt_type {
@@ -441,6 +447,21 @@ struct hns_roce_cmd_mailbox {
441 447
442struct hns_roce_dev; 448struct hns_roce_dev;
443 449
450struct hns_roce_rinl_sge {
451 void *addr;
452 u32 len;
453};
454
455struct hns_roce_rinl_wqe {
456 struct hns_roce_rinl_sge *sg_list;
457 u32 sge_cnt;
458};
459
460struct hns_roce_rinl_buf {
461 struct hns_roce_rinl_wqe *wqe_list;
462 u32 wqe_cnt;
463};
464
444struct hns_roce_qp { 465struct hns_roce_qp {
445 struct ib_qp ibqp; 466 struct ib_qp ibqp;
446 struct hns_roce_buf hr_buf; 467 struct hns_roce_buf hr_buf;
@@ -462,7 +483,9 @@ struct hns_roce_qp {
462 u8 resp_depth; 483 u8 resp_depth;
463 u8 state; 484 u8 state;
464 u32 access_flags; 485 u32 access_flags;
486 u32 atomic_rd_en;
465 u32 pkey_index; 487 u32 pkey_index;
488 u32 qkey;
466 void (*event)(struct hns_roce_qp *, 489 void (*event)(struct hns_roce_qp *,
467 enum hns_roce_event); 490 enum hns_roce_event);
468 unsigned long qpn; 491 unsigned long qpn;
@@ -472,6 +495,8 @@ struct hns_roce_qp {
472 495
473 struct hns_roce_sge sge; 496 struct hns_roce_sge sge;
474 u32 next_sge; 497 u32 next_sge;
498
499 struct hns_roce_rinl_buf rq_inl_buf;
475}; 500};
476 501
477struct hns_roce_sqp { 502struct hns_roce_sqp {
@@ -485,6 +510,45 @@ struct hns_roce_ib_iboe {
485 u8 phy_port[HNS_ROCE_MAX_PORTS]; 510 u8 phy_port[HNS_ROCE_MAX_PORTS];
486}; 511};
487 512
513enum {
514 HNS_ROCE_EQ_STAT_INVALID = 0,
515 HNS_ROCE_EQ_STAT_VALID = 2,
516};
517
518struct hns_roce_ceqe {
519 u32 comp;
520};
521
522struct hns_roce_aeqe {
523 u32 asyn;
524 union {
525 struct {
526 u32 qp;
527 u32 rsv0;
528 u32 rsv1;
529 } qp_event;
530
531 struct {
532 u32 cq;
533 u32 rsv0;
534 u32 rsv1;
535 } cq_event;
536
537 struct {
538 u32 ceqe;
539 u32 rsv0;
540 u32 rsv1;
541 } ce_event;
542
543 struct {
544 __le64 out_param;
545 __le16 token;
546 u8 status;
547 u8 rsv0;
548 } __packed cmd;
549 } event;
550};
551
488struct hns_roce_eq { 552struct hns_roce_eq {
489 struct hns_roce_dev *hr_dev; 553 struct hns_roce_dev *hr_dev;
490 void __iomem *doorbell; 554 void __iomem *doorbell;
@@ -498,11 +562,31 @@ struct hns_roce_eq {
498 int log_page_size; 562 int log_page_size;
499 int cons_index; 563 int cons_index;
500 struct hns_roce_buf_list *buf_list; 564 struct hns_roce_buf_list *buf_list;
565 int over_ignore;
566 int coalesce;
567 int arm_st;
568 u64 eqe_ba;
569 int eqe_ba_pg_sz;
570 int eqe_buf_pg_sz;
571 int hop_num;
572 u64 *bt_l0; /* Base address table for L0 */
573 u64 **bt_l1; /* Base address table for L1 */
574 u64 **buf;
575 dma_addr_t l0_dma;
576 dma_addr_t *l1_dma;
577 dma_addr_t *buf_dma;
578 u32 l0_last_num; /* L0 last chunk num */
579 u32 l1_last_num; /* L1 last chunk num */
580 int eq_max_cnt;
581 int eq_period;
582 int shift;
583 dma_addr_t cur_eqe_ba;
584 dma_addr_t nxt_eqe_ba;
501}; 585};
502 586
503struct hns_roce_eq_table { 587struct hns_roce_eq_table {
504 struct hns_roce_eq *eq; 588 struct hns_roce_eq *eq;
505 void __iomem **eqc_base; 589 void __iomem **eqc_base; /* only for hw v1 */
506}; 590};
507 591
508struct hns_roce_caps { 592struct hns_roce_caps {
@@ -528,7 +612,7 @@ struct hns_roce_caps {
528 u32 min_wqes; 612 u32 min_wqes;
529 int reserved_cqs; 613 int reserved_cqs;
530 int num_aeq_vectors; /* 1 */ 614 int num_aeq_vectors; /* 1 */
531 int num_comp_vectors; /* 32 ceq */ 615 int num_comp_vectors;
532 int num_other_vectors; 616 int num_other_vectors;
533 int num_mtpts; 617 int num_mtpts;
534 u32 num_mtt_segs; 618 u32 num_mtt_segs;
@@ -550,7 +634,7 @@ struct hns_roce_caps {
550 u32 pbl_buf_pg_sz; 634 u32 pbl_buf_pg_sz;
551 u32 pbl_hop_num; 635 u32 pbl_hop_num;
552 int aeqe_depth; 636 int aeqe_depth;
553 int ceqe_depth[HNS_ROCE_COMP_VEC_NUM]; 637 int ceqe_depth;
554 enum ib_mtu max_mtu; 638 enum ib_mtu max_mtu;
555 u32 qpc_bt_num; 639 u32 qpc_bt_num;
556 u32 srqc_bt_num; 640 u32 srqc_bt_num;
@@ -574,6 +658,9 @@ struct hns_roce_caps {
574 u32 cqe_ba_pg_sz; 658 u32 cqe_ba_pg_sz;
575 u32 cqe_buf_pg_sz; 659 u32 cqe_buf_pg_sz;
576 u32 cqe_hop_num; 660 u32 cqe_hop_num;
661 u32 eqe_ba_pg_sz;
662 u32 eqe_buf_pg_sz;
663 u32 eqe_hop_num;
577 u32 chunk_sz; /* chunk size in non multihop mode*/ 664 u32 chunk_sz; /* chunk size in non multihop mode*/
578 u64 flags; 665 u64 flags;
579}; 666};
@@ -623,6 +710,8 @@ struct hns_roce_hw {
623 int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr); 710 int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr);
624 int (*destroy_cq)(struct ib_cq *ibcq); 711 int (*destroy_cq)(struct ib_cq *ibcq);
625 int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); 712 int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
713 int (*init_eq)(struct hns_roce_dev *hr_dev);
714 void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
626}; 715};
627 716
628struct hns_roce_dev { 717struct hns_roce_dev {
diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.c b/drivers/infiniband/hw/hns/hns_roce_eq.c
deleted file mode 100644
index d184431e2bf5..000000000000
--- a/drivers/infiniband/hw/hns/hns_roce_eq.c
+++ /dev/null
@@ -1,759 +0,0 @@
1/*
2 * Copyright (c) 2016 Hisilicon Limited.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/platform_device.h>
34#include <linux/interrupt.h>
35#include "hns_roce_common.h"
36#include "hns_roce_device.h"
37#include "hns_roce_eq.h"
38
39static void eq_set_cons_index(struct hns_roce_eq *eq, int req_not)
40{
41 roce_raw_write((eq->cons_index & CONS_INDEX_MASK) |
42 (req_not << eq->log_entries), eq->doorbell);
43 /* Memory barrier */
44 mb();
45}
46
47static struct hns_roce_aeqe *get_aeqe(struct hns_roce_eq *eq, u32 entry)
48{
49 unsigned long off = (entry & (eq->entries - 1)) *
50 HNS_ROCE_AEQ_ENTRY_SIZE;
51
52 return (struct hns_roce_aeqe *)((u8 *)
53 (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
54 off % HNS_ROCE_BA_SIZE);
55}
56
57static struct hns_roce_aeqe *next_aeqe_sw(struct hns_roce_eq *eq)
58{
59 struct hns_roce_aeqe *aeqe = get_aeqe(eq, eq->cons_index);
60
61 return (roce_get_bit(aeqe->asyn, HNS_ROCE_AEQE_U32_4_OWNER_S) ^
62 !!(eq->cons_index & eq->entries)) ? aeqe : NULL;
63}
64
65static void hns_roce_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
66 struct hns_roce_aeqe *aeqe, int qpn)
67{
68 struct device *dev = &hr_dev->pdev->dev;
69
70 dev_warn(dev, "Local Work Queue Catastrophic Error.\n");
71 switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
72 HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
73 case HNS_ROCE_LWQCE_QPC_ERROR:
74 dev_warn(dev, "QP %d, QPC error.\n", qpn);
75 break;
76 case HNS_ROCE_LWQCE_MTU_ERROR:
77 dev_warn(dev, "QP %d, MTU error.\n", qpn);
78 break;
79 case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
80 dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn);
81 break;
82 case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
83 dev_warn(dev, "QP %d, WQE addr error.\n", qpn);
84 break;
85 case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
86 dev_warn(dev, "QP %d, WQE shift error\n", qpn);
87 break;
88 case HNS_ROCE_LWQCE_SL_ERROR:
89 dev_warn(dev, "QP %d, SL error.\n", qpn);
90 break;
91 case HNS_ROCE_LWQCE_PORT_ERROR:
92 dev_warn(dev, "QP %d, port error.\n", qpn);
93 break;
94 default:
95 break;
96 }
97}
98
99static void hns_roce_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
100 struct hns_roce_aeqe *aeqe,
101 int qpn)
102{
103 struct device *dev = &hr_dev->pdev->dev;
104
105 dev_warn(dev, "Local Access Violation Work Queue Error.\n");
106 switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
107 HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
108 case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
109 dev_warn(dev, "QP %d, R_key violation.\n", qpn);
110 break;
111 case HNS_ROCE_LAVWQE_LENGTH_ERROR:
112 dev_warn(dev, "QP %d, length error.\n", qpn);
113 break;
114 case HNS_ROCE_LAVWQE_VA_ERROR:
115 dev_warn(dev, "QP %d, VA error.\n", qpn);
116 break;
117 case HNS_ROCE_LAVWQE_PD_ERROR:
118 dev_err(dev, "QP %d, PD error.\n", qpn);
119 break;
120 case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
121 dev_warn(dev, "QP %d, rw acc error.\n", qpn);
122 break;
123 case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
124 dev_warn(dev, "QP %d, key state error.\n", qpn);
125 break;
126 case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
127 dev_warn(dev, "QP %d, MR operation error.\n", qpn);
128 break;
129 default:
130 break;
131 }
132}
133
134static void hns_roce_qp_err_handle(struct hns_roce_dev *hr_dev,
135 struct hns_roce_aeqe *aeqe,
136 int event_type)
137{
138 struct device *dev = &hr_dev->pdev->dev;
139 int phy_port;
140 int qpn;
141
142 qpn = roce_get_field(aeqe->event.qp_event.qp,
143 HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M,
144 HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S);
145 phy_port = roce_get_field(aeqe->event.qp_event.qp,
146 HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M,
147 HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S);
148 if (qpn <= 1)
149 qpn = HNS_ROCE_MAX_PORTS * qpn + phy_port;
150
151 switch (event_type) {
152 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
153 dev_warn(dev, "Invalid Req Local Work Queue Error.\n"
154 "QP %d, phy_port %d.\n", qpn, phy_port);
155 break;
156 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
157 hns_roce_wq_catas_err_handle(hr_dev, aeqe, qpn);
158 break;
159 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
160 hns_roce_local_wq_access_err_handle(hr_dev, aeqe, qpn);
161 break;
162 default:
163 break;
164 }
165
166 hns_roce_qp_event(hr_dev, qpn, event_type);
167}
168
169static void hns_roce_cq_err_handle(struct hns_roce_dev *hr_dev,
170 struct hns_roce_aeqe *aeqe,
171 int event_type)
172{
173 struct device *dev = &hr_dev->pdev->dev;
174 u32 cqn;
175
176 cqn = le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq,
177 HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M,
178 HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S));
179
180 switch (event_type) {
181 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
182 dev_warn(dev, "CQ 0x%x access err.\n", cqn);
183 break;
184 case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
185 dev_warn(dev, "CQ 0x%x overflow\n", cqn);
186 break;
187 case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID:
188 dev_warn(dev, "CQ 0x%x ID invalid.\n", cqn);
189 break;
190 default:
191 break;
192 }
193
194 hns_roce_cq_event(hr_dev, cqn, event_type);
195}
196
197static void hns_roce_db_overflow_handle(struct hns_roce_dev *hr_dev,
198 struct hns_roce_aeqe *aeqe)
199{
200 struct device *dev = &hr_dev->pdev->dev;
201
202 switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
203 HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
204 case HNS_ROCE_DB_SUBTYPE_SDB_OVF:
205 dev_warn(dev, "SDB overflow.\n");
206 break;
207 case HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF:
208 dev_warn(dev, "SDB almost overflow.\n");
209 break;
210 case HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP:
211 dev_warn(dev, "SDB almost empty.\n");
212 break;
213 case HNS_ROCE_DB_SUBTYPE_ODB_OVF:
214 dev_warn(dev, "ODB overflow.\n");
215 break;
216 case HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF:
217 dev_warn(dev, "ODB almost overflow.\n");
218 break;
219 case HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP:
220 dev_warn(dev, "SDB almost empty.\n");
221 break;
222 default:
223 break;
224 }
225}
226
227static int hns_roce_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
228{
229 struct device *dev = &hr_dev->pdev->dev;
230 struct hns_roce_aeqe *aeqe;
231 int aeqes_found = 0;
232 int event_type;
233
234 while ((aeqe = next_aeqe_sw(eq))) {
235 dev_dbg(dev, "aeqe = %p, aeqe->asyn.event_type = 0x%lx\n", aeqe,
236 roce_get_field(aeqe->asyn,
237 HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
238 HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S));
239 /* Memory barrier */
240 rmb();
241
242 event_type = roce_get_field(aeqe->asyn,
243 HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
244 HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S);
245 switch (event_type) {
246 case HNS_ROCE_EVENT_TYPE_PATH_MIG:
247 dev_warn(dev, "PATH MIG not supported\n");
248 break;
249 case HNS_ROCE_EVENT_TYPE_COMM_EST:
250 dev_warn(dev, "COMMUNICATION established\n");
251 break;
252 case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
253 dev_warn(dev, "SQ DRAINED not supported\n");
254 break;
255 case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
256 dev_warn(dev, "PATH MIG failed\n");
257 break;
258 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
259 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
260 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
261 hns_roce_qp_err_handle(hr_dev, aeqe, event_type);
262 break;
263 case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
264 case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
265 case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
266 dev_warn(dev, "SRQ not support!\n");
267 break;
268 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
269 case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
270 case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID:
271 hns_roce_cq_err_handle(hr_dev, aeqe, event_type);
272 break;
273 case HNS_ROCE_EVENT_TYPE_PORT_CHANGE:
274 dev_warn(dev, "port change.\n");
275 break;
276 case HNS_ROCE_EVENT_TYPE_MB:
277 hns_roce_cmd_event(hr_dev,
278 le16_to_cpu(aeqe->event.cmd.token),
279 aeqe->event.cmd.status,
280 le64_to_cpu(aeqe->event.cmd.out_param
281 ));
282 break;
283 case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
284 hns_roce_db_overflow_handle(hr_dev, aeqe);
285 break;
286 case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
287 dev_warn(dev, "CEQ 0x%lx overflow.\n",
288 roce_get_field(aeqe->event.ce_event.ceqe,
289 HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M,
290 HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S));
291 break;
292 default:
293 dev_warn(dev, "Unhandled event %d on EQ %d at index %u\n",
294 event_type, eq->eqn, eq->cons_index);
295 break;
296 }
297
298 eq->cons_index++;
299 aeqes_found = 1;
300
301 if (eq->cons_index > 2 * hr_dev->caps.aeqe_depth - 1) {
302 dev_warn(dev, "cons_index overflow, set back to zero\n"
303 );
304 eq->cons_index = 0;
305 }
306 }
307
308 eq_set_cons_index(eq, 0);
309
310 return aeqes_found;
311}
312
313static struct hns_roce_ceqe *get_ceqe(struct hns_roce_eq *eq, u32 entry)
314{
315 unsigned long off = (entry & (eq->entries - 1)) *
316 HNS_ROCE_CEQ_ENTRY_SIZE;
317
318 return (struct hns_roce_ceqe *)((u8 *)
319 (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
320 off % HNS_ROCE_BA_SIZE);
321}
322
323static struct hns_roce_ceqe *next_ceqe_sw(struct hns_roce_eq *eq)
324{
325 struct hns_roce_ceqe *ceqe = get_ceqe(eq, eq->cons_index);
326
327 return (!!(roce_get_bit(ceqe->ceqe.comp,
328 HNS_ROCE_CEQE_CEQE_COMP_OWNER_S))) ^
329 (!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
330}
331
332static int hns_roce_ceq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
333{
334 struct hns_roce_ceqe *ceqe;
335 int ceqes_found = 0;
336 u32 cqn;
337
338 while ((ceqe = next_ceqe_sw(eq))) {
339 /* Memory barrier */
340 rmb();
341 cqn = roce_get_field(ceqe->ceqe.comp,
342 HNS_ROCE_CEQE_CEQE_COMP_CQN_M,
343 HNS_ROCE_CEQE_CEQE_COMP_CQN_S);
344 hns_roce_cq_completion(hr_dev, cqn);
345
346 ++eq->cons_index;
347 ceqes_found = 1;
348
349 if (eq->cons_index > 2 * hr_dev->caps.ceqe_depth[eq->eqn] - 1) {
350 dev_warn(&eq->hr_dev->pdev->dev,
351 "cons_index overflow, set back to zero\n");
352 eq->cons_index = 0;
353 }
354 }
355
356 eq_set_cons_index(eq, 0);
357
358 return ceqes_found;
359}
360
361static int hns_roce_aeq_ovf_int(struct hns_roce_dev *hr_dev,
362 struct hns_roce_eq *eq)
363{
364 struct device *dev = &eq->hr_dev->pdev->dev;
365 int eqovf_found = 0;
366 u32 caepaemask_val;
367 u32 cealmovf_val;
368 u32 caepaest_val;
369 u32 aeshift_val;
370 u32 ceshift_val;
371 u32 cemask_val;
372 int i = 0;
373
374 /**
375 * AEQ overflow ECC mult bit err CEQ overflow alarm
376 * must clear interrupt, mask irq, clear irq, cancel mask operation
377 */
378 aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG);
379
380 if (roce_get_bit(aeshift_val,
381 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S) == 1) {
382 dev_warn(dev, "AEQ overflow!\n");
383
384 /* Set mask */
385 caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
386 roce_set_bit(caepaemask_val,
387 ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
388 HNS_ROCE_INT_MASK_ENABLE);
389 roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
390
391 /* Clear int state(INT_WC : write 1 clear) */
392 caepaest_val = roce_read(hr_dev, ROCEE_CAEP_AE_ST_REG);
393 roce_set_bit(caepaest_val,
394 ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1);
395 roce_write(hr_dev, ROCEE_CAEP_AE_ST_REG, caepaest_val);
396
397 /* Clear mask */
398 caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
399 roce_set_bit(caepaemask_val,
400 ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
401 HNS_ROCE_INT_MASK_DISABLE);
402 roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
403 }
404
405 /* CEQ almost overflow */
406 for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) {
407 ceshift_val = roce_read(hr_dev, ROCEE_CAEP_CEQC_SHIFT_0_REG +
408 i * CEQ_REG_OFFSET);
409
410 if (roce_get_bit(ceshift_val,
411 ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S) == 1) {
412 dev_warn(dev, "CEQ[%d] almost overflow!\n", i);
413 eqovf_found++;
414
415 /* Set mask */
416 cemask_val = roce_read(hr_dev,
417 ROCEE_CAEP_CE_IRQ_MASK_0_REG +
418 i * CEQ_REG_OFFSET);
419 roce_set_bit(cemask_val,
420 ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
421 HNS_ROCE_INT_MASK_ENABLE);
422 roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
423 i * CEQ_REG_OFFSET, cemask_val);
424
425 /* Clear int state(INT_WC : write 1 clear) */
426 cealmovf_val = roce_read(hr_dev,
427 ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
428 i * CEQ_REG_OFFSET);
429 roce_set_bit(cealmovf_val,
430 ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S,
431 1);
432 roce_write(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
433 i * CEQ_REG_OFFSET, cealmovf_val);
434
435 /* Clear mask */
436 cemask_val = roce_read(hr_dev,
437 ROCEE_CAEP_CE_IRQ_MASK_0_REG +
438 i * CEQ_REG_OFFSET);
439 roce_set_bit(cemask_val,
440 ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
441 HNS_ROCE_INT_MASK_DISABLE);
442 roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
443 i * CEQ_REG_OFFSET, cemask_val);
444 }
445 }
446
447 /* ECC multi-bit error alarm */
448 dev_warn(dev, "ECC UCERR ALARM: 0x%x, 0x%x, 0x%x\n",
449 roce_read(hr_dev, ROCEE_ECC_UCERR_ALM0_REG),
450 roce_read(hr_dev, ROCEE_ECC_UCERR_ALM1_REG),
451 roce_read(hr_dev, ROCEE_ECC_UCERR_ALM2_REG));
452
453 dev_warn(dev, "ECC CERR ALARM: 0x%x, 0x%x, 0x%x\n",
454 roce_read(hr_dev, ROCEE_ECC_CERR_ALM0_REG),
455 roce_read(hr_dev, ROCEE_ECC_CERR_ALM1_REG),
456 roce_read(hr_dev, ROCEE_ECC_CERR_ALM2_REG));
457
458 return eqovf_found;
459}
460
461static int hns_roce_eq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
462{
463 int eqes_found = 0;
464
465 if (likely(eq->type_flag == HNS_ROCE_CEQ))
466 /* CEQ irq routine, CEQ is pulse irq, not clear */
467 eqes_found = hns_roce_ceq_int(hr_dev, eq);
468 else if (likely(eq->type_flag == HNS_ROCE_AEQ))
469 /* AEQ irq routine, AEQ is pulse irq, not clear */
470 eqes_found = hns_roce_aeq_int(hr_dev, eq);
471 else
472 /* AEQ queue overflow irq */
473 eqes_found = hns_roce_aeq_ovf_int(hr_dev, eq);
474
475 return eqes_found;
476}
477
478static irqreturn_t hns_roce_msi_x_interrupt(int irq, void *eq_ptr)
479{
480 int int_work = 0;
481 struct hns_roce_eq *eq = eq_ptr;
482 struct hns_roce_dev *hr_dev = eq->hr_dev;
483
484 int_work = hns_roce_eq_int(hr_dev, eq);
485
486 return IRQ_RETVAL(int_work);
487}
488
489static void hns_roce_enable_eq(struct hns_roce_dev *hr_dev, int eq_num,
490 int enable_flag)
491{
492 void __iomem *eqc = hr_dev->eq_table.eqc_base[eq_num];
493 u32 val;
494
495 val = readl(eqc);
496
497 if (enable_flag)
498 roce_set_field(val,
499 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
500 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
501 HNS_ROCE_EQ_STAT_VALID);
502 else
503 roce_set_field(val,
504 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
505 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
506 HNS_ROCE_EQ_STAT_INVALID);
507 writel(val, eqc);
508}
509
510static int hns_roce_create_eq(struct hns_roce_dev *hr_dev,
511 struct hns_roce_eq *eq)
512{
513 void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn];
514 struct device *dev = &hr_dev->pdev->dev;
515 dma_addr_t tmp_dma_addr;
516 u32 eqconsindx_val = 0;
517 u32 eqcuridx_val = 0;
518 u32 eqshift_val = 0;
519 int num_bas = 0;
520 int ret;
521 int i;
522
523 num_bas = (PAGE_ALIGN(eq->entries * eq->eqe_size) +
524 HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE;
525
526 if ((eq->entries * eq->eqe_size) > HNS_ROCE_BA_SIZE) {
527 dev_err(dev, "[error]eq buf %d gt ba size(%d) need bas=%d\n",
528 (eq->entries * eq->eqe_size), HNS_ROCE_BA_SIZE,
529 num_bas);
530 return -EINVAL;
531 }
532
533 eq->buf_list = kcalloc(num_bas, sizeof(*eq->buf_list), GFP_KERNEL);
534 if (!eq->buf_list)
535 return -ENOMEM;
536
537 for (i = 0; i < num_bas; ++i) {
538 eq->buf_list[i].buf = dma_alloc_coherent(dev, HNS_ROCE_BA_SIZE,
539 &tmp_dma_addr,
540 GFP_KERNEL);
541 if (!eq->buf_list[i].buf) {
542 ret = -ENOMEM;
543 goto err_out_free_pages;
544 }
545
546 eq->buf_list[i].map = tmp_dma_addr;
547 memset(eq->buf_list[i].buf, 0, HNS_ROCE_BA_SIZE);
548 }
549 eq->cons_index = 0;
550 roce_set_field(eqshift_val,
551 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
552 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
553 HNS_ROCE_EQ_STAT_INVALID);
554 roce_set_field(eqshift_val,
555 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M,
556 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S,
557 eq->log_entries);
558 writel(eqshift_val, eqc);
559
560 /* Configure eq extended address 12~44bit */
561 writel((u32)(eq->buf_list[0].map >> 12), eqc + 4);
562
563 /*
564 * Configure eq extended address 45~49 bit.
565 * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
566 * using 4K page, and shift more 32 because of
567 * caculating the high 32 bit value evaluated to hardware.
568 */
569 roce_set_field(eqcuridx_val, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M,
570 ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S,
571 eq->buf_list[0].map >> 44);
572 roce_set_field(eqcuridx_val,
573 ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M,
574 ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0);
575 writel(eqcuridx_val, eqc + 8);
576
577 /* Configure eq consumer index */
578 roce_set_field(eqconsindx_val,
579 ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M,
580 ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0);
581 writel(eqconsindx_val, eqc + 0xc);
582
583 return 0;
584
585err_out_free_pages:
586 for (i = i - 1; i >= 0; i--)
587 dma_free_coherent(dev, HNS_ROCE_BA_SIZE, eq->buf_list[i].buf,
588 eq->buf_list[i].map);
589
590 kfree(eq->buf_list);
591 return ret;
592}
593
594static void hns_roce_free_eq(struct hns_roce_dev *hr_dev,
595 struct hns_roce_eq *eq)
596{
597 int i = 0;
598 int npages = (PAGE_ALIGN(eq->eqe_size * eq->entries) +
599 HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE;
600
601 if (!eq->buf_list)
602 return;
603
604 for (i = 0; i < npages; ++i)
605 dma_free_coherent(&hr_dev->pdev->dev, HNS_ROCE_BA_SIZE,
606 eq->buf_list[i].buf, eq->buf_list[i].map);
607
608 kfree(eq->buf_list);
609}
610
611static void hns_roce_int_mask_en(struct hns_roce_dev *hr_dev)
612{
613 int i = 0;
614 u32 aemask_val;
615 int masken = 0;
616
617 /* AEQ INT */
618 aemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
619 roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
620 masken);
621 roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken);
622 roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, aemask_val);
623
624 /* CEQ INT */
625 for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) {
626 /* IRQ mask */
627 roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
628 i * CEQ_REG_OFFSET, masken);
629 }
630}
631
632static void hns_roce_ce_int_default_cfg(struct hns_roce_dev *hr_dev)
633{
634 /* Configure ce int interval */
635 roce_write(hr_dev, ROCEE_CAEP_CE_INTERVAL_CFG_REG,
636 HNS_ROCE_CEQ_DEFAULT_INTERVAL);
637
638 /* Configure ce int burst num */
639 roce_write(hr_dev, ROCEE_CAEP_CE_BURST_NUM_CFG_REG,
640 HNS_ROCE_CEQ_DEFAULT_BURST_NUM);
641}
642
643int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev)
644{
645 struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
646 struct device *dev = &hr_dev->pdev->dev;
647 struct hns_roce_eq *eq = NULL;
648 int eq_num = 0;
649 int ret = 0;
650 int i = 0;
651 int j = 0;
652
653 eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
654 eq_table->eq = kcalloc(eq_num, sizeof(*eq_table->eq), GFP_KERNEL);
655 if (!eq_table->eq)
656 return -ENOMEM;
657
658 eq_table->eqc_base = kcalloc(eq_num, sizeof(*eq_table->eqc_base),
659 GFP_KERNEL);
660 if (!eq_table->eqc_base) {
661 ret = -ENOMEM;
662 goto err_eqc_base_alloc_fail;
663 }
664
665 for (i = 0; i < eq_num; i++) {
666 eq = &eq_table->eq[i];
667 eq->hr_dev = hr_dev;
668 eq->eqn = i;
669 eq->irq = hr_dev->irq[i];
670 eq->log_page_size = PAGE_SHIFT;
671
672 if (i < hr_dev->caps.num_comp_vectors) {
673 /* CEQ */
674 eq_table->eqc_base[i] = hr_dev->reg_base +
675 ROCEE_CAEP_CEQC_SHIFT_0_REG +
676 HNS_ROCE_CEQC_REG_OFFSET * i;
677 eq->type_flag = HNS_ROCE_CEQ;
678 eq->doorbell = hr_dev->reg_base +
679 ROCEE_CAEP_CEQC_CONS_IDX_0_REG +
680 HNS_ROCE_CEQC_REG_OFFSET * i;
681 eq->entries = hr_dev->caps.ceqe_depth[i];
682 eq->log_entries = ilog2(eq->entries);
683 eq->eqe_size = sizeof(struct hns_roce_ceqe);
684 } else {
685 /* AEQ */
686 eq_table->eqc_base[i] = hr_dev->reg_base +
687 ROCEE_CAEP_AEQC_AEQE_SHIFT_REG;
688 eq->type_flag = HNS_ROCE_AEQ;
689 eq->doorbell = hr_dev->reg_base +
690 ROCEE_CAEP_AEQE_CONS_IDX_REG;
691 eq->entries = hr_dev->caps.aeqe_depth;
692 eq->log_entries = ilog2(eq->entries);
693 eq->eqe_size = sizeof(struct hns_roce_aeqe);
694 }
695 }
696
697 /* Disable irq */
698 hns_roce_int_mask_en(hr_dev);
699
700 /* Configure CE irq interval and burst num */
701 hns_roce_ce_int_default_cfg(hr_dev);
702
703 for (i = 0; i < eq_num; i++) {
704 ret = hns_roce_create_eq(hr_dev, &eq_table->eq[i]);
705 if (ret) {
706 dev_err(dev, "eq create failed\n");
707 goto err_create_eq_fail;
708 }
709 }
710
711 for (j = 0; j < eq_num; j++) {
712 ret = request_irq(eq_table->eq[j].irq, hns_roce_msi_x_interrupt,
713 0, hr_dev->irq_names[j], eq_table->eq + j);
714 if (ret) {
715 dev_err(dev, "request irq error!\n");
716 goto err_request_irq_fail;
717 }
718 }
719
720 for (i = 0; i < eq_num; i++)
721 hns_roce_enable_eq(hr_dev, i, EQ_ENABLE);
722
723 return 0;
724
725err_request_irq_fail:
726 for (j = j - 1; j >= 0; j--)
727 free_irq(eq_table->eq[j].irq, eq_table->eq + j);
728
729err_create_eq_fail:
730 for (i = i - 1; i >= 0; i--)
731 hns_roce_free_eq(hr_dev, &eq_table->eq[i]);
732
733 kfree(eq_table->eqc_base);
734
735err_eqc_base_alloc_fail:
736 kfree(eq_table->eq);
737
738 return ret;
739}
740
741void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev)
742{
743 int i;
744 int eq_num;
745 struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
746
747 eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
748 for (i = 0; i < eq_num; i++) {
749 /* Disable EQ */
750 hns_roce_enable_eq(hr_dev, i, EQ_DISABLE);
751
752 free_irq(eq_table->eq[i].irq, eq_table->eq + i);
753
754 hns_roce_free_eq(hr_dev, &eq_table->eq[i]);
755 }
756
757 kfree(eq_table->eqc_base);
758 kfree(eq_table->eq);
759}
diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.h b/drivers/infiniband/hw/hns/hns_roce_eq.h
deleted file mode 100644
index c6d212d12e03..000000000000
--- a/drivers/infiniband/hw/hns/hns_roce_eq.h
+++ /dev/null
@@ -1,134 +0,0 @@
1/*
2 * Copyright (c) 2016 Hisilicon Limited.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef _HNS_ROCE_EQ_H
34#define _HNS_ROCE_EQ_H
35
36#define HNS_ROCE_CEQ 1
37#define HNS_ROCE_AEQ 2
38
39#define HNS_ROCE_CEQ_ENTRY_SIZE 0x4
40#define HNS_ROCE_AEQ_ENTRY_SIZE 0x10
41#define HNS_ROCE_CEQC_REG_OFFSET 0x18
42
43#define HNS_ROCE_CEQ_DEFAULT_INTERVAL 0x10
44#define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x10
45
46#define HNS_ROCE_INT_MASK_DISABLE 0
47#define HNS_ROCE_INT_MASK_ENABLE 1
48
49#define EQ_ENABLE 1
50#define EQ_DISABLE 0
51#define CONS_INDEX_MASK 0xffff
52
53#define CEQ_REG_OFFSET 0x18
54
55enum {
56 HNS_ROCE_EQ_STAT_INVALID = 0,
57 HNS_ROCE_EQ_STAT_VALID = 2,
58};
59
60struct hns_roce_aeqe {
61 u32 asyn;
62 union {
63 struct {
64 u32 qp;
65 u32 rsv0;
66 u32 rsv1;
67 } qp_event;
68
69 struct {
70 u32 cq;
71 u32 rsv0;
72 u32 rsv1;
73 } cq_event;
74
75 struct {
76 u32 port;
77 u32 rsv0;
78 u32 rsv1;
79 } port_event;
80
81 struct {
82 u32 ceqe;
83 u32 rsv0;
84 u32 rsv1;
85 } ce_event;
86
87 struct {
88 __le64 out_param;
89 __le16 token;
90 u8 status;
91 u8 rsv0;
92 } __packed cmd;
93 } event;
94};
95
96#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S 16
97#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M \
98 (((1UL << 8) - 1) << HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S)
99
100#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S 24
101#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M \
102 (((1UL << 7) - 1) << HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)
103
104#define HNS_ROCE_AEQE_U32_4_OWNER_S 31
105
106#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S 0
107#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M \
108 (((1UL << 24) - 1) << HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S)
109
110#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S 25
111#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M \
112 (((1UL << 3) - 1) << HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S)
113
114#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S 0
115#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M \
116 (((1UL << 16) - 1) << HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S)
117
118#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S 0
119#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M \
120 (((1UL << 5) - 1) << HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S)
121
122struct hns_roce_ceqe {
123 union {
124 int comp;
125 } ceqe;
126};
127
128#define HNS_ROCE_CEQE_CEQE_COMP_OWNER_S 0
129
130#define HNS_ROCE_CEQE_CEQE_COMP_CQN_S 16
131#define HNS_ROCE_CEQE_CEQE_COMP_CQN_M \
132 (((1UL << 16) - 1) << HNS_ROCE_CEQE_CEQE_COMP_CQN_S)
133
134#endif /* _HNS_ROCE_EQ_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index af27168faf0f..21ca9fa7c9d1 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -33,6 +33,7 @@
33#include <linux/platform_device.h> 33#include <linux/platform_device.h>
34#include <linux/acpi.h> 34#include <linux/acpi.h>
35#include <linux/etherdevice.h> 35#include <linux/etherdevice.h>
36#include <linux/interrupt.h>
36#include <linux/of.h> 37#include <linux/of.h>
37#include <linux/of_platform.h> 38#include <linux/of_platform.h>
38#include <rdma/ib_umem.h> 39#include <rdma/ib_umem.h>
@@ -774,7 +775,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
774 goto create_lp_qp_failed; 775 goto create_lp_qp_failed;
775 } 776 }
776 777
777 ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask, 778 ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, IB_QP_DEST_QPN,
778 IB_QPS_INIT, IB_QPS_RTR); 779 IB_QPS_INIT, IB_QPS_RTR);
779 if (ret) { 780 if (ret) {
780 dev_err(dev, "modify qp failed(%d)!\n", ret); 781 dev_err(dev, "modify qp failed(%d)!\n", ret);
@@ -1492,9 +1493,9 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
1492 caps->max_sq_inline = HNS_ROCE_V1_INLINE_SIZE; 1493 caps->max_sq_inline = HNS_ROCE_V1_INLINE_SIZE;
1493 caps->num_uars = HNS_ROCE_V1_UAR_NUM; 1494 caps->num_uars = HNS_ROCE_V1_UAR_NUM;
1494 caps->phy_num_uars = HNS_ROCE_V1_PHY_UAR_NUM; 1495 caps->phy_num_uars = HNS_ROCE_V1_PHY_UAR_NUM;
1495 caps->num_aeq_vectors = HNS_ROCE_AEQE_VEC_NUM; 1496 caps->num_aeq_vectors = HNS_ROCE_V1_AEQE_VEC_NUM;
1496 caps->num_comp_vectors = HNS_ROCE_COMP_VEC_NUM; 1497 caps->num_comp_vectors = HNS_ROCE_V1_COMP_VEC_NUM;
1497 caps->num_other_vectors = HNS_ROCE_AEQE_OF_VEC_NUM; 1498 caps->num_other_vectors = HNS_ROCE_V1_ABNORMAL_VEC_NUM;
1498 caps->num_mtpts = HNS_ROCE_V1_MAX_MTPT_NUM; 1499 caps->num_mtpts = HNS_ROCE_V1_MAX_MTPT_NUM;
1499 caps->num_mtt_segs = HNS_ROCE_V1_MAX_MTT_SEGS; 1500 caps->num_mtt_segs = HNS_ROCE_V1_MAX_MTT_SEGS;
1500 caps->num_pds = HNS_ROCE_V1_MAX_PD_NUM; 1501 caps->num_pds = HNS_ROCE_V1_MAX_PD_NUM;
@@ -1529,10 +1530,8 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
1529 caps->num_ports + 1; 1530 caps->num_ports + 1;
1530 } 1531 }
1531 1532
1532 for (i = 0; i < caps->num_comp_vectors; i++) 1533 caps->ceqe_depth = HNS_ROCE_V1_COMP_EQE_NUM;
1533 caps->ceqe_depth[i] = HNS_ROCE_V1_NUM_COMP_EQE; 1534 caps->aeqe_depth = HNS_ROCE_V1_ASYNC_EQE_NUM;
1534
1535 caps->aeqe_depth = HNS_ROCE_V1_NUM_ASYNC_EQE;
1536 caps->local_ca_ack_delay = le32_to_cpu(roce_read(hr_dev, 1535 caps->local_ca_ack_delay = le32_to_cpu(roce_read(hr_dev,
1537 ROCEE_ACK_DELAY_REG)); 1536 ROCEE_ACK_DELAY_REG));
1538 caps->max_mtu = IB_MTU_2048; 1537 caps->max_mtu = IB_MTU_2048;
@@ -2312,15 +2311,16 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq,
2312 case HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE: 2311 case HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE:
2313 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; 2312 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
2314 wc->wc_flags = IB_WC_WITH_IMM; 2313 wc->wc_flags = IB_WC_WITH_IMM;
2315 wc->ex.imm_data = le32_to_cpu(cqe->immediate_data); 2314 wc->ex.imm_data =
2315 cpu_to_be32(le32_to_cpu(cqe->immediate_data));
2316 break; 2316 break;
2317 case HNS_ROCE_OPCODE_SEND_DATA_RECEIVE: 2317 case HNS_ROCE_OPCODE_SEND_DATA_RECEIVE:
2318 if (roce_get_bit(cqe->cqe_byte_4, 2318 if (roce_get_bit(cqe->cqe_byte_4,
2319 CQE_BYTE_4_IMM_INDICATOR_S)) { 2319 CQE_BYTE_4_IMM_INDICATOR_S)) {
2320 wc->opcode = IB_WC_RECV; 2320 wc->opcode = IB_WC_RECV;
2321 wc->wc_flags = IB_WC_WITH_IMM; 2321 wc->wc_flags = IB_WC_WITH_IMM;
2322 wc->ex.imm_data = le32_to_cpu( 2322 wc->ex.imm_data = cpu_to_be32(
2323 cqe->immediate_data); 2323 le32_to_cpu(cqe->immediate_data));
2324 } else { 2324 } else {
2325 wc->opcode = IB_WC_RECV; 2325 wc->opcode = IB_WC_RECV;
2326 wc->wc_flags = 0; 2326 wc->wc_flags = 0;
@@ -3960,6 +3960,732 @@ static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq)
3960 return ret; 3960 return ret;
3961} 3961}
3962 3962
3963static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not)
3964{
3965 roce_raw_write((eq->cons_index & HNS_ROCE_V1_CONS_IDX_M) |
3966 (req_not << eq->log_entries), eq->doorbell);
3967}
3968
3969static void hns_roce_v1_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
3970 struct hns_roce_aeqe *aeqe, int qpn)
3971{
3972 struct device *dev = &hr_dev->pdev->dev;
3973
3974 dev_warn(dev, "Local Work Queue Catastrophic Error.\n");
3975 switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
3976 HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
3977 case HNS_ROCE_LWQCE_QPC_ERROR:
3978 dev_warn(dev, "QP %d, QPC error.\n", qpn);
3979 break;
3980 case HNS_ROCE_LWQCE_MTU_ERROR:
3981 dev_warn(dev, "QP %d, MTU error.\n", qpn);
3982 break;
3983 case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
3984 dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn);
3985 break;
3986 case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
3987 dev_warn(dev, "QP %d, WQE addr error.\n", qpn);
3988 break;
3989 case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
3990 dev_warn(dev, "QP %d, WQE shift error\n", qpn);
3991 break;
3992 case HNS_ROCE_LWQCE_SL_ERROR:
3993 dev_warn(dev, "QP %d, SL error.\n", qpn);
3994 break;
3995 case HNS_ROCE_LWQCE_PORT_ERROR:
3996 dev_warn(dev, "QP %d, port error.\n", qpn);
3997 break;
3998 default:
3999 break;
4000 }
4001}
4002
4003static void hns_roce_v1_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
4004 struct hns_roce_aeqe *aeqe,
4005 int qpn)
4006{
4007 struct device *dev = &hr_dev->pdev->dev;
4008
4009 dev_warn(dev, "Local Access Violation Work Queue Error.\n");
4010 switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
4011 HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
4012 case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
4013 dev_warn(dev, "QP %d, R_key violation.\n", qpn);
4014 break;
4015 case HNS_ROCE_LAVWQE_LENGTH_ERROR:
4016 dev_warn(dev, "QP %d, length error.\n", qpn);
4017 break;
4018 case HNS_ROCE_LAVWQE_VA_ERROR:
4019 dev_warn(dev, "QP %d, VA error.\n", qpn);
4020 break;
4021 case HNS_ROCE_LAVWQE_PD_ERROR:
4022 dev_err(dev, "QP %d, PD error.\n", qpn);
4023 break;
4024 case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
4025 dev_warn(dev, "QP %d, rw acc error.\n", qpn);
4026 break;
4027 case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
4028 dev_warn(dev, "QP %d, key state error.\n", qpn);
4029 break;
4030 case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
4031 dev_warn(dev, "QP %d, MR operation error.\n", qpn);
4032 break;
4033 default:
4034 break;
4035 }
4036}
4037
4038static void hns_roce_v1_qp_err_handle(struct hns_roce_dev *hr_dev,
4039 struct hns_roce_aeqe *aeqe,
4040 int event_type)
4041{
4042 struct device *dev = &hr_dev->pdev->dev;
4043 int phy_port;
4044 int qpn;
4045
4046 qpn = roce_get_field(aeqe->event.qp_event.qp,
4047 HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M,
4048 HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S);
4049 phy_port = roce_get_field(aeqe->event.qp_event.qp,
4050 HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M,
4051 HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S);
4052 if (qpn <= 1)
4053 qpn = HNS_ROCE_MAX_PORTS * qpn + phy_port;
4054
4055 switch (event_type) {
4056 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
4057 dev_warn(dev, "Invalid Req Local Work Queue Error.\n"
4058 "QP %d, phy_port %d.\n", qpn, phy_port);
4059 break;
4060 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
4061 hns_roce_v1_wq_catas_err_handle(hr_dev, aeqe, qpn);
4062 break;
4063 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
4064 hns_roce_v1_local_wq_access_err_handle(hr_dev, aeqe, qpn);
4065 break;
4066 default:
4067 break;
4068 }
4069
4070 hns_roce_qp_event(hr_dev, qpn, event_type);
4071}
4072
4073static void hns_roce_v1_cq_err_handle(struct hns_roce_dev *hr_dev,
4074 struct hns_roce_aeqe *aeqe,
4075 int event_type)
4076{
4077 struct device *dev = &hr_dev->pdev->dev;
4078 u32 cqn;
4079
4080 cqn = le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq,
4081 HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M,
4082 HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S));
4083
4084 switch (event_type) {
4085 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
4086 dev_warn(dev, "CQ 0x%x access err.\n", cqn);
4087 break;
4088 case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
4089 dev_warn(dev, "CQ 0x%x overflow\n", cqn);
4090 break;
4091 case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID:
4092 dev_warn(dev, "CQ 0x%x ID invalid.\n", cqn);
4093 break;
4094 default:
4095 break;
4096 }
4097
4098 hns_roce_cq_event(hr_dev, cqn, event_type);
4099}
4100
4101static void hns_roce_v1_db_overflow_handle(struct hns_roce_dev *hr_dev,
4102 struct hns_roce_aeqe *aeqe)
4103{
4104 struct device *dev = &hr_dev->pdev->dev;
4105
4106 switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
4107 HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
4108 case HNS_ROCE_DB_SUBTYPE_SDB_OVF:
4109 dev_warn(dev, "SDB overflow.\n");
4110 break;
4111 case HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF:
4112 dev_warn(dev, "SDB almost overflow.\n");
4113 break;
4114 case HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP:
4115 dev_warn(dev, "SDB almost empty.\n");
4116 break;
4117 case HNS_ROCE_DB_SUBTYPE_ODB_OVF:
4118 dev_warn(dev, "ODB overflow.\n");
4119 break;
4120 case HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF:
4121 dev_warn(dev, "ODB almost overflow.\n");
4122 break;
4123 case HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP:
4124 dev_warn(dev, "SDB almost empty.\n");
4125 break;
4126 default:
4127 break;
4128 }
4129}
4130
4131static struct hns_roce_aeqe *get_aeqe_v1(struct hns_roce_eq *eq, u32 entry)
4132{
4133 unsigned long off = (entry & (eq->entries - 1)) *
4134 HNS_ROCE_AEQ_ENTRY_SIZE;
4135
4136 return (struct hns_roce_aeqe *)((u8 *)
4137 (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
4138 off % HNS_ROCE_BA_SIZE);
4139}
4140
4141static struct hns_roce_aeqe *next_aeqe_sw_v1(struct hns_roce_eq *eq)
4142{
4143 struct hns_roce_aeqe *aeqe = get_aeqe_v1(eq, eq->cons_index);
4144
4145 return (roce_get_bit(aeqe->asyn, HNS_ROCE_AEQE_U32_4_OWNER_S) ^
4146 !!(eq->cons_index & eq->entries)) ? aeqe : NULL;
4147}
4148
4149static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev,
4150 struct hns_roce_eq *eq)
4151{
4152 struct device *dev = &hr_dev->pdev->dev;
4153 struct hns_roce_aeqe *aeqe;
4154 int aeqes_found = 0;
4155 int event_type;
4156
4157 while ((aeqe = next_aeqe_sw_v1(eq))) {
4158
4159 /* Make sure we read the AEQ entry after we have checked the
4160 * ownership bit
4161 */
4162 dma_rmb();
4163
4164 dev_dbg(dev, "aeqe = %p, aeqe->asyn.event_type = 0x%lx\n", aeqe,
4165 roce_get_field(aeqe->asyn,
4166 HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
4167 HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S));
4168 event_type = roce_get_field(aeqe->asyn,
4169 HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
4170 HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S);
4171 switch (event_type) {
4172 case HNS_ROCE_EVENT_TYPE_PATH_MIG:
4173 dev_warn(dev, "PATH MIG not supported\n");
4174 break;
4175 case HNS_ROCE_EVENT_TYPE_COMM_EST:
4176 dev_warn(dev, "COMMUNICATION established\n");
4177 break;
4178 case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
4179 dev_warn(dev, "SQ DRAINED not supported\n");
4180 break;
4181 case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
4182 dev_warn(dev, "PATH MIG failed\n");
4183 break;
4184 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
4185 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
4186 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
4187 hns_roce_v1_qp_err_handle(hr_dev, aeqe, event_type);
4188 break;
4189 case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
4190 case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
4191 case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
4192 dev_warn(dev, "SRQ not support!\n");
4193 break;
4194 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
4195 case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
4196 case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID:
4197 hns_roce_v1_cq_err_handle(hr_dev, aeqe, event_type);
4198 break;
4199 case HNS_ROCE_EVENT_TYPE_PORT_CHANGE:
4200 dev_warn(dev, "port change.\n");
4201 break;
4202 case HNS_ROCE_EVENT_TYPE_MB:
4203 hns_roce_cmd_event(hr_dev,
4204 le16_to_cpu(aeqe->event.cmd.token),
4205 aeqe->event.cmd.status,
4206 le64_to_cpu(aeqe->event.cmd.out_param
4207 ));
4208 break;
4209 case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
4210 hns_roce_v1_db_overflow_handle(hr_dev, aeqe);
4211 break;
4212 case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
4213 dev_warn(dev, "CEQ 0x%lx overflow.\n",
4214 roce_get_field(aeqe->event.ce_event.ceqe,
4215 HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M,
4216 HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S));
4217 break;
4218 default:
4219 dev_warn(dev, "Unhandled event %d on EQ %d at idx %u.\n",
4220 event_type, eq->eqn, eq->cons_index);
4221 break;
4222 }
4223
4224 eq->cons_index++;
4225 aeqes_found = 1;
4226
4227 if (eq->cons_index > 2 * hr_dev->caps.aeqe_depth - 1) {
4228 dev_warn(dev, "cons_index overflow, set back to 0.\n");
4229 eq->cons_index = 0;
4230 }
4231 }
4232
4233 set_eq_cons_index_v1(eq, 0);
4234
4235 return aeqes_found;
4236}
4237
4238static struct hns_roce_ceqe *get_ceqe_v1(struct hns_roce_eq *eq, u32 entry)
4239{
4240 unsigned long off = (entry & (eq->entries - 1)) *
4241 HNS_ROCE_CEQ_ENTRY_SIZE;
4242
4243 return (struct hns_roce_ceqe *)((u8 *)
4244 (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
4245 off % HNS_ROCE_BA_SIZE);
4246}
4247
4248static struct hns_roce_ceqe *next_ceqe_sw_v1(struct hns_roce_eq *eq)
4249{
4250 struct hns_roce_ceqe *ceqe = get_ceqe_v1(eq, eq->cons_index);
4251
4252 return (!!(roce_get_bit(ceqe->comp,
4253 HNS_ROCE_CEQE_CEQE_COMP_OWNER_S))) ^
4254 (!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
4255}
4256
4257static int hns_roce_v1_ceq_int(struct hns_roce_dev *hr_dev,
4258 struct hns_roce_eq *eq)
4259{
4260 struct hns_roce_ceqe *ceqe;
4261 int ceqes_found = 0;
4262 u32 cqn;
4263
4264 while ((ceqe = next_ceqe_sw_v1(eq))) {
4265
4266 /* Make sure we read CEQ entry after we have checked the
4267 * ownership bit
4268 */
4269 dma_rmb();
4270
4271 cqn = roce_get_field(ceqe->comp,
4272 HNS_ROCE_CEQE_CEQE_COMP_CQN_M,
4273 HNS_ROCE_CEQE_CEQE_COMP_CQN_S);
4274 hns_roce_cq_completion(hr_dev, cqn);
4275
4276 ++eq->cons_index;
4277 ceqes_found = 1;
4278
4279 if (eq->cons_index > 2 * hr_dev->caps.ceqe_depth - 1) {
4280 dev_warn(&eq->hr_dev->pdev->dev,
4281 "cons_index overflow, set back to 0.\n");
4282 eq->cons_index = 0;
4283 }
4284 }
4285
4286 set_eq_cons_index_v1(eq, 0);
4287
4288 return ceqes_found;
4289}
4290
4291static irqreturn_t hns_roce_v1_msix_interrupt_eq(int irq, void *eq_ptr)
4292{
4293 struct hns_roce_eq *eq = eq_ptr;
4294 struct hns_roce_dev *hr_dev = eq->hr_dev;
4295 int int_work = 0;
4296
4297 if (eq->type_flag == HNS_ROCE_CEQ)
4298 /* CEQ irq routine, CEQ is pulse irq, not clear */
4299 int_work = hns_roce_v1_ceq_int(hr_dev, eq);
4300 else
4301 /* AEQ irq routine, AEQ is pulse irq, not clear */
4302 int_work = hns_roce_v1_aeq_int(hr_dev, eq);
4303
4304 return IRQ_RETVAL(int_work);
4305}
4306
4307static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
4308{
4309 struct hns_roce_dev *hr_dev = dev_id;
4310 struct device *dev = &hr_dev->pdev->dev;
4311 int int_work = 0;
4312 u32 caepaemask_val;
4313 u32 cealmovf_val;
4314 u32 caepaest_val;
4315 u32 aeshift_val;
4316 u32 ceshift_val;
4317 u32 cemask_val;
4318 int i;
4319
4320 /*
4321 * Abnormal interrupt:
4322 * AEQ overflow, ECC multi-bit err, CEQ overflow must clear
4323 * interrupt, mask irq, clear irq, cancel mask operation
4324 */
4325 aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG);
4326
4327 /* AEQE overflow */
4328 if (roce_get_bit(aeshift_val,
4329 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S) == 1) {
4330 dev_warn(dev, "AEQ overflow!\n");
4331
4332 /* Set mask */
4333 caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
4334 roce_set_bit(caepaemask_val,
4335 ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
4336 HNS_ROCE_INT_MASK_ENABLE);
4337 roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
4338
4339 /* Clear int state(INT_WC : write 1 clear) */
4340 caepaest_val = roce_read(hr_dev, ROCEE_CAEP_AE_ST_REG);
4341 roce_set_bit(caepaest_val,
4342 ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1);
4343 roce_write(hr_dev, ROCEE_CAEP_AE_ST_REG, caepaest_val);
4344
4345 /* Clear mask */
4346 caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
4347 roce_set_bit(caepaemask_val,
4348 ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
4349 HNS_ROCE_INT_MASK_DISABLE);
4350 roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
4351 }
4352
4353 /* CEQ almost overflow */
4354 for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) {
4355 ceshift_val = roce_read(hr_dev, ROCEE_CAEP_CEQC_SHIFT_0_REG +
4356 i * CEQ_REG_OFFSET);
4357
4358 if (roce_get_bit(ceshift_val,
4359 ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S) == 1) {
4360 dev_warn(dev, "CEQ[%d] almost overflow!\n", i);
4361 int_work++;
4362
4363 /* Set mask */
4364 cemask_val = roce_read(hr_dev,
4365 ROCEE_CAEP_CE_IRQ_MASK_0_REG +
4366 i * CEQ_REG_OFFSET);
4367 roce_set_bit(cemask_val,
4368 ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
4369 HNS_ROCE_INT_MASK_ENABLE);
4370 roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
4371 i * CEQ_REG_OFFSET, cemask_val);
4372
4373 /* Clear int state(INT_WC : write 1 clear) */
4374 cealmovf_val = roce_read(hr_dev,
4375 ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
4376 i * CEQ_REG_OFFSET);
4377 roce_set_bit(cealmovf_val,
4378 ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S,
4379 1);
4380 roce_write(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
4381 i * CEQ_REG_OFFSET, cealmovf_val);
4382
4383 /* Clear mask */
4384 cemask_val = roce_read(hr_dev,
4385 ROCEE_CAEP_CE_IRQ_MASK_0_REG +
4386 i * CEQ_REG_OFFSET);
4387 roce_set_bit(cemask_val,
4388 ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
4389 HNS_ROCE_INT_MASK_DISABLE);
4390 roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
4391 i * CEQ_REG_OFFSET, cemask_val);
4392 }
4393 }
4394
4395 /* ECC multi-bit error alarm */
4396 dev_warn(dev, "ECC UCERR ALARM: 0x%x, 0x%x, 0x%x\n",
4397 roce_read(hr_dev, ROCEE_ECC_UCERR_ALM0_REG),
4398 roce_read(hr_dev, ROCEE_ECC_UCERR_ALM1_REG),
4399 roce_read(hr_dev, ROCEE_ECC_UCERR_ALM2_REG));
4400
4401 dev_warn(dev, "ECC CERR ALARM: 0x%x, 0x%x, 0x%x\n",
4402 roce_read(hr_dev, ROCEE_ECC_CERR_ALM0_REG),
4403 roce_read(hr_dev, ROCEE_ECC_CERR_ALM1_REG),
4404 roce_read(hr_dev, ROCEE_ECC_CERR_ALM2_REG));
4405
4406 return IRQ_RETVAL(int_work);
4407}
4408
4409static void hns_roce_v1_int_mask_enable(struct hns_roce_dev *hr_dev)
4410{
4411 u32 aemask_val;
4412 int masken = 0;
4413 int i;
4414
4415 /* AEQ INT */
4416 aemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
4417 roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
4418 masken);
4419 roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken);
4420 roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, aemask_val);
4421
4422 /* CEQ INT */
4423 for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) {
4424 /* IRQ mask */
4425 roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
4426 i * CEQ_REG_OFFSET, masken);
4427 }
4428}
4429
4430static void hns_roce_v1_free_eq(struct hns_roce_dev *hr_dev,
4431 struct hns_roce_eq *eq)
4432{
4433 int npages = (PAGE_ALIGN(eq->eqe_size * eq->entries) +
4434 HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE;
4435 int i;
4436
4437 if (!eq->buf_list)
4438 return;
4439
4440 for (i = 0; i < npages; ++i)
4441 dma_free_coherent(&hr_dev->pdev->dev, HNS_ROCE_BA_SIZE,
4442 eq->buf_list[i].buf, eq->buf_list[i].map);
4443
4444 kfree(eq->buf_list);
4445}
4446
4447static void hns_roce_v1_enable_eq(struct hns_roce_dev *hr_dev, int eq_num,
4448 int enable_flag)
4449{
4450 void __iomem *eqc = hr_dev->eq_table.eqc_base[eq_num];
4451 u32 val;
4452
4453 val = readl(eqc);
4454
4455 if (enable_flag)
4456 roce_set_field(val,
4457 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
4458 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
4459 HNS_ROCE_EQ_STAT_VALID);
4460 else
4461 roce_set_field(val,
4462 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
4463 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
4464 HNS_ROCE_EQ_STAT_INVALID);
4465 writel(val, eqc);
4466}
4467
4468static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev,
4469 struct hns_roce_eq *eq)
4470{
4471 void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn];
4472 struct device *dev = &hr_dev->pdev->dev;
4473 dma_addr_t tmp_dma_addr;
4474 u32 eqconsindx_val = 0;
4475 u32 eqcuridx_val = 0;
4476 u32 eqshift_val = 0;
4477 int num_bas;
4478 int ret;
4479 int i;
4480
4481 num_bas = (PAGE_ALIGN(eq->entries * eq->eqe_size) +
4482 HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE;
4483
4484 if ((eq->entries * eq->eqe_size) > HNS_ROCE_BA_SIZE) {
4485 dev_err(dev, "[error]eq buf %d gt ba size(%d) need bas=%d\n",
4486 (eq->entries * eq->eqe_size), HNS_ROCE_BA_SIZE,
4487 num_bas);
4488 return -EINVAL;
4489 }
4490
4491 eq->buf_list = kcalloc(num_bas, sizeof(*eq->buf_list), GFP_KERNEL);
4492 if (!eq->buf_list)
4493 return -ENOMEM;
4494
4495 for (i = 0; i < num_bas; ++i) {
4496 eq->buf_list[i].buf = dma_alloc_coherent(dev, HNS_ROCE_BA_SIZE,
4497 &tmp_dma_addr,
4498 GFP_KERNEL);
4499 if (!eq->buf_list[i].buf) {
4500 ret = -ENOMEM;
4501 goto err_out_free_pages;
4502 }
4503
4504 eq->buf_list[i].map = tmp_dma_addr;
4505 memset(eq->buf_list[i].buf, 0, HNS_ROCE_BA_SIZE);
4506 }
4507 eq->cons_index = 0;
4508 roce_set_field(eqshift_val,
4509 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
4510 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
4511 HNS_ROCE_EQ_STAT_INVALID);
4512 roce_set_field(eqshift_val,
4513 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M,
4514 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S,
4515 eq->log_entries);
4516 writel(eqshift_val, eqc);
4517
4518 /* Configure eq extended address 12~44bit */
4519 writel((u32)(eq->buf_list[0].map >> 12), eqc + 4);
4520
4521 /*
4522 * Configure eq extended address 45~49 bit.
4523 * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
4524 * using 4K page, and shift more 32 because of
4525 * caculating the high 32 bit value evaluated to hardware.
4526 */
4527 roce_set_field(eqcuridx_val, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M,
4528 ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S,
4529 eq->buf_list[0].map >> 44);
4530 roce_set_field(eqcuridx_val,
4531 ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M,
4532 ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0);
4533 writel(eqcuridx_val, eqc + 8);
4534
4535 /* Configure eq consumer index */
4536 roce_set_field(eqconsindx_val,
4537 ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M,
4538 ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0);
4539 writel(eqconsindx_val, eqc + 0xc);
4540
4541 return 0;
4542
4543err_out_free_pages:
4544 for (i -= 1; i >= 0; i--)
4545 dma_free_coherent(dev, HNS_ROCE_BA_SIZE, eq->buf_list[i].buf,
4546 eq->buf_list[i].map);
4547
4548 kfree(eq->buf_list);
4549 return ret;
4550}
4551
4552static int hns_roce_v1_init_eq_table(struct hns_roce_dev *hr_dev)
4553{
4554 struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
4555 struct device *dev = &hr_dev->pdev->dev;
4556 struct hns_roce_eq *eq;
4557 int irq_num;
4558 int eq_num;
4559 int ret;
4560 int i, j;
4561
4562 eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
4563 irq_num = eq_num + hr_dev->caps.num_other_vectors;
4564
4565 eq_table->eq = kcalloc(eq_num, sizeof(*eq_table->eq), GFP_KERNEL);
4566 if (!eq_table->eq)
4567 return -ENOMEM;
4568
4569 eq_table->eqc_base = kcalloc(eq_num, sizeof(*eq_table->eqc_base),
4570 GFP_KERNEL);
4571 if (!eq_table->eqc_base) {
4572 ret = -ENOMEM;
4573 goto err_eqc_base_alloc_fail;
4574 }
4575
4576 for (i = 0; i < eq_num; i++) {
4577 eq = &eq_table->eq[i];
4578 eq->hr_dev = hr_dev;
4579 eq->eqn = i;
4580 eq->irq = hr_dev->irq[i];
4581 eq->log_page_size = PAGE_SHIFT;
4582
4583 if (i < hr_dev->caps.num_comp_vectors) {
4584 /* CEQ */
4585 eq_table->eqc_base[i] = hr_dev->reg_base +
4586 ROCEE_CAEP_CEQC_SHIFT_0_REG +
4587 CEQ_REG_OFFSET * i;
4588 eq->type_flag = HNS_ROCE_CEQ;
4589 eq->doorbell = hr_dev->reg_base +
4590 ROCEE_CAEP_CEQC_CONS_IDX_0_REG +
4591 CEQ_REG_OFFSET * i;
4592 eq->entries = hr_dev->caps.ceqe_depth;
4593 eq->log_entries = ilog2(eq->entries);
4594 eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE;
4595 } else {
4596 /* AEQ */
4597 eq_table->eqc_base[i] = hr_dev->reg_base +
4598 ROCEE_CAEP_AEQC_AEQE_SHIFT_REG;
4599 eq->type_flag = HNS_ROCE_AEQ;
4600 eq->doorbell = hr_dev->reg_base +
4601 ROCEE_CAEP_AEQE_CONS_IDX_REG;
4602 eq->entries = hr_dev->caps.aeqe_depth;
4603 eq->log_entries = ilog2(eq->entries);
4604 eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE;
4605 }
4606 }
4607
4608 /* Disable irq */
4609 hns_roce_v1_int_mask_enable(hr_dev);
4610
4611 /* Configure ce int interval */
4612 roce_write(hr_dev, ROCEE_CAEP_CE_INTERVAL_CFG_REG,
4613 HNS_ROCE_CEQ_DEFAULT_INTERVAL);
4614
4615 /* Configure ce int burst num */
4616 roce_write(hr_dev, ROCEE_CAEP_CE_BURST_NUM_CFG_REG,
4617 HNS_ROCE_CEQ_DEFAULT_BURST_NUM);
4618
4619 for (i = 0; i < eq_num; i++) {
4620 ret = hns_roce_v1_create_eq(hr_dev, &eq_table->eq[i]);
4621 if (ret) {
4622 dev_err(dev, "eq create failed\n");
4623 goto err_create_eq_fail;
4624 }
4625 }
4626
4627 for (j = 0; j < irq_num; j++) {
4628 if (j < eq_num)
4629 ret = request_irq(hr_dev->irq[j],
4630 hns_roce_v1_msix_interrupt_eq, 0,
4631 hr_dev->irq_names[j],
4632 &eq_table->eq[j]);
4633 else
4634 ret = request_irq(hr_dev->irq[j],
4635 hns_roce_v1_msix_interrupt_abn, 0,
4636 hr_dev->irq_names[j], hr_dev);
4637
4638 if (ret) {
4639 dev_err(dev, "request irq error!\n");
4640 goto err_request_irq_fail;
4641 }
4642 }
4643
4644 for (i = 0; i < eq_num; i++)
4645 hns_roce_v1_enable_eq(hr_dev, i, EQ_ENABLE);
4646
4647 return 0;
4648
4649err_request_irq_fail:
4650 for (j -= 1; j >= 0; j--)
4651 free_irq(hr_dev->irq[j], &eq_table->eq[j]);
4652
4653err_create_eq_fail:
4654 for (i -= 1; i >= 0; i--)
4655 hns_roce_v1_free_eq(hr_dev, &eq_table->eq[i]);
4656
4657 kfree(eq_table->eqc_base);
4658
4659err_eqc_base_alloc_fail:
4660 kfree(eq_table->eq);
4661
4662 return ret;
4663}
4664
4665static void hns_roce_v1_cleanup_eq_table(struct hns_roce_dev *hr_dev)
4666{
4667 struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
4668 int irq_num;
4669 int eq_num;
4670 int i;
4671
4672 eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
4673 irq_num = eq_num + hr_dev->caps.num_other_vectors;
4674 for (i = 0; i < eq_num; i++) {
4675 /* Disable EQ */
4676 hns_roce_v1_enable_eq(hr_dev, i, EQ_DISABLE);
4677
4678 free_irq(hr_dev->irq[i], &eq_table->eq[i]);
4679
4680 hns_roce_v1_free_eq(hr_dev, &eq_table->eq[i]);
4681 }
4682 for (i = eq_num; i < irq_num; i++)
4683 free_irq(hr_dev->irq[i], hr_dev);
4684
4685 kfree(eq_table->eqc_base);
4686 kfree(eq_table->eq);
4687}
4688
3963static const struct hns_roce_hw hns_roce_hw_v1 = { 4689static const struct hns_roce_hw hns_roce_hw_v1 = {
3964 .reset = hns_roce_v1_reset, 4690 .reset = hns_roce_v1_reset,
3965 .hw_profile = hns_roce_v1_profile, 4691 .hw_profile = hns_roce_v1_profile,
@@ -3983,6 +4709,8 @@ static const struct hns_roce_hw hns_roce_hw_v1 = {
3983 .poll_cq = hns_roce_v1_poll_cq, 4709 .poll_cq = hns_roce_v1_poll_cq,
3984 .dereg_mr = hns_roce_v1_dereg_mr, 4710 .dereg_mr = hns_roce_v1_dereg_mr,
3985 .destroy_cq = hns_roce_v1_destroy_cq, 4711 .destroy_cq = hns_roce_v1_destroy_cq,
4712 .init_eq = hns_roce_v1_init_eq_table,
4713 .cleanup_eq = hns_roce_v1_cleanup_eq_table,
3986}; 4714};
3987 4715
3988static const struct of_device_id hns_roce_of_match[] = { 4716static const struct of_device_id hns_roce_of_match[] = {
@@ -4060,10 +4788,6 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
4060 4788
4061 /* get the mapped register base address */ 4789 /* get the mapped register base address */
4062 res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0); 4790 res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0);
4063 if (!res) {
4064 dev_err(dev, "memory resource not found!\n");
4065 return -EINVAL;
4066 }
4067 hr_dev->reg_base = devm_ioremap_resource(dev, res); 4791 hr_dev->reg_base = devm_ioremap_resource(dev, res);
4068 if (IS_ERR(hr_dev->reg_base)) 4792 if (IS_ERR(hr_dev->reg_base))
4069 return PTR_ERR(hr_dev->reg_base); 4793 return PTR_ERR(hr_dev->reg_base);
@@ -4132,14 +4856,14 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
4132 /* read the interrupt names from the DT or ACPI */ 4856 /* read the interrupt names from the DT or ACPI */
4133 ret = device_property_read_string_array(dev, "interrupt-names", 4857 ret = device_property_read_string_array(dev, "interrupt-names",
4134 hr_dev->irq_names, 4858 hr_dev->irq_names,
4135 HNS_ROCE_MAX_IRQ_NUM); 4859 HNS_ROCE_V1_MAX_IRQ_NUM);
4136 if (ret < 0) { 4860 if (ret < 0) {
4137 dev_err(dev, "couldn't get interrupt names from DT or ACPI!\n"); 4861 dev_err(dev, "couldn't get interrupt names from DT or ACPI!\n");
4138 return ret; 4862 return ret;
4139 } 4863 }
4140 4864
4141 /* fetch the interrupt numbers */ 4865 /* fetch the interrupt numbers */
4142 for (i = 0; i < HNS_ROCE_MAX_IRQ_NUM; i++) { 4866 for (i = 0; i < HNS_ROCE_V1_MAX_IRQ_NUM; i++) {
4143 hr_dev->irq[i] = platform_get_irq(hr_dev->pdev, i); 4867 hr_dev->irq[i] = platform_get_irq(hr_dev->pdev, i);
4144 if (hr_dev->irq[i] <= 0) { 4868 if (hr_dev->irq[i] <= 0) {
4145 dev_err(dev, "platform get of irq[=%d] failed!\n", i); 4869 dev_err(dev, "platform get of irq[=%d] failed!\n", i);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 21a07ef0afc9..b44ddd239060 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -60,8 +60,13 @@
60#define HNS_ROCE_V1_GID_NUM 16 60#define HNS_ROCE_V1_GID_NUM 16
61#define HNS_ROCE_V1_RESV_QP 8 61#define HNS_ROCE_V1_RESV_QP 8
62 62
63#define HNS_ROCE_V1_NUM_COMP_EQE 0x8000 63#define HNS_ROCE_V1_MAX_IRQ_NUM 34
64#define HNS_ROCE_V1_NUM_ASYNC_EQE 0x400 64#define HNS_ROCE_V1_COMP_VEC_NUM 32
65#define HNS_ROCE_V1_AEQE_VEC_NUM 1
66#define HNS_ROCE_V1_ABNORMAL_VEC_NUM 1
67
68#define HNS_ROCE_V1_COMP_EQE_NUM 0x8000
69#define HNS_ROCE_V1_ASYNC_EQE_NUM 0x400
65 70
66#define HNS_ROCE_V1_QPC_ENTRY_SIZE 256 71#define HNS_ROCE_V1_QPC_ENTRY_SIZE 256
67#define HNS_ROCE_V1_IRRL_ENTRY_SIZE 8 72#define HNS_ROCE_V1_IRRL_ENTRY_SIZE 8
@@ -159,6 +164,41 @@
159#define SDB_INV_CNT_OFFSET 8 164#define SDB_INV_CNT_OFFSET 8
160#define SDB_ST_CMP_VAL 8 165#define SDB_ST_CMP_VAL 8
161 166
167#define HNS_ROCE_CEQ_DEFAULT_INTERVAL 0x10
168#define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x10
169
170#define HNS_ROCE_INT_MASK_DISABLE 0
171#define HNS_ROCE_INT_MASK_ENABLE 1
172
173#define CEQ_REG_OFFSET 0x18
174
175#define HNS_ROCE_CEQE_CEQE_COMP_OWNER_S 0
176
177#define HNS_ROCE_V1_CONS_IDX_M GENMASK(15, 0)
178
179#define HNS_ROCE_CEQE_CEQE_COMP_CQN_S 16
180#define HNS_ROCE_CEQE_CEQE_COMP_CQN_M GENMASK(31, 16)
181
182#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S 16
183#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M GENMASK(23, 16)
184
185#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S 24
186#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M GENMASK(30, 24)
187
188#define HNS_ROCE_AEQE_U32_4_OWNER_S 31
189
190#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S 0
191#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M GENMASK(23, 0)
192
193#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S 25
194#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M GENMASK(27, 25)
195
196#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S 0
197#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M GENMASK(15, 0)
198
199#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S 0
200#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M GENMASK(4, 0)
201
162struct hns_roce_cq_context { 202struct hns_roce_cq_context {
163 u32 cqc_byte_4; 203 u32 cqc_byte_4;
164 u32 cq_bt_l; 204 u32 cq_bt_l;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 8e18445714a9..256fe110107a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -34,6 +34,7 @@
34#include <linux/etherdevice.h> 34#include <linux/etherdevice.h>
35#include <linux/interrupt.h> 35#include <linux/interrupt.h>
36#include <linux/kernel.h> 36#include <linux/kernel.h>
37#include <net/addrconf.h>
37#include <rdma/ib_umem.h> 38#include <rdma/ib_umem.h>
38 39
39#include "hnae3.h" 40#include "hnae3.h"
@@ -51,32 +52,106 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
51 dseg->len = cpu_to_le32(sg->length); 52 dseg->len = cpu_to_le32(sg->length);
52} 53}
53 54
55static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
56 struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
57 void *wqe, unsigned int *sge_ind,
58 struct ib_send_wr **bad_wr)
59{
60 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
61 struct hns_roce_v2_wqe_data_seg *dseg = wqe;
62 struct hns_roce_qp *qp = to_hr_qp(ibqp);
63 int i;
64
65 if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) {
66 if (rc_sq_wqe->msg_len > hr_dev->caps.max_sq_inline) {
67 *bad_wr = wr;
68 dev_err(hr_dev->dev, "inline len(1-%d)=%d, illegal",
69 rc_sq_wqe->msg_len, hr_dev->caps.max_sq_inline);
70 return -EINVAL;
71 }
72
73 for (i = 0; i < wr->num_sge; i++) {
74 memcpy(wqe, ((void *)wr->sg_list[i].addr),
75 wr->sg_list[i].length);
76 wqe += wr->sg_list[i].length;
77 }
78
79 roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
80 1);
81 } else {
82 if (wr->num_sge <= 2) {
83 for (i = 0; i < wr->num_sge; i++) {
84 if (likely(wr->sg_list[i].length)) {
85 set_data_seg_v2(dseg, wr->sg_list + i);
86 dseg++;
87 }
88 }
89 } else {
90 roce_set_field(rc_sq_wqe->byte_20,
91 V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
92 V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
93 (*sge_ind) & (qp->sge.sge_cnt - 1));
94
95 for (i = 0; i < 2; i++) {
96 if (likely(wr->sg_list[i].length)) {
97 set_data_seg_v2(dseg, wr->sg_list + i);
98 dseg++;
99 }
100 }
101
102 dseg = get_send_extend_sge(qp,
103 (*sge_ind) & (qp->sge.sge_cnt - 1));
104
105 for (i = 0; i < wr->num_sge - 2; i++) {
106 if (likely(wr->sg_list[i + 2].length)) {
107 set_data_seg_v2(dseg,
108 wr->sg_list + 2 + i);
109 dseg++;
110 (*sge_ind)++;
111 }
112 }
113 }
114
115 roce_set_field(rc_sq_wqe->byte_16,
116 V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
117 V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, wr->num_sge);
118 }
119
120 return 0;
121}
122
54static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 123static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
55 struct ib_send_wr **bad_wr) 124 struct ib_send_wr **bad_wr)
56{ 125{
57 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); 126 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
127 struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
128 struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
58 struct hns_roce_v2_rc_send_wqe *rc_sq_wqe; 129 struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
59 struct hns_roce_qp *qp = to_hr_qp(ibqp); 130 struct hns_roce_qp *qp = to_hr_qp(ibqp);
60 struct hns_roce_v2_wqe_data_seg *dseg; 131 struct hns_roce_v2_wqe_data_seg *dseg;
61 struct device *dev = hr_dev->dev; 132 struct device *dev = hr_dev->dev;
62 struct hns_roce_v2_db sq_db; 133 struct hns_roce_v2_db sq_db;
63 unsigned int sge_ind = 0; 134 unsigned int sge_ind = 0;
64 unsigned int wqe_sz = 0;
65 unsigned int owner_bit; 135 unsigned int owner_bit;
66 unsigned long flags; 136 unsigned long flags;
67 unsigned int ind; 137 unsigned int ind;
68 void *wqe = NULL; 138 void *wqe = NULL;
139 bool loopback;
69 int ret = 0; 140 int ret = 0;
141 u8 *smac;
70 int nreq; 142 int nreq;
71 int i; 143 int i;
72 144
73 if (unlikely(ibqp->qp_type != IB_QPT_RC)) { 145 if (unlikely(ibqp->qp_type != IB_QPT_RC &&
146 ibqp->qp_type != IB_QPT_GSI &&
147 ibqp->qp_type != IB_QPT_UD)) {
74 dev_err(dev, "Not supported QP(0x%x)type!\n", ibqp->qp_type); 148 dev_err(dev, "Not supported QP(0x%x)type!\n", ibqp->qp_type);
75 *bad_wr = NULL; 149 *bad_wr = NULL;
76 return -EOPNOTSUPP; 150 return -EOPNOTSUPP;
77 } 151 }
78 152
79 if (unlikely(qp->state != IB_QPS_RTS && qp->state != IB_QPS_SQD)) { 153 if (unlikely(qp->state == IB_QPS_RESET || qp->state == IB_QPS_INIT ||
154 qp->state == IB_QPS_RTR)) {
80 dev_err(dev, "Post WQE fail, QP state %d err!\n", qp->state); 155 dev_err(dev, "Post WQE fail, QP state %d err!\n", qp->state);
81 *bad_wr = wr; 156 *bad_wr = wr;
82 return -EINVAL; 157 return -EINVAL;
@@ -106,161 +181,255 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
106 wr->wr_id; 181 wr->wr_id;
107 182
108 owner_bit = ~(qp->sq.head >> ilog2(qp->sq.wqe_cnt)) & 0x1; 183 owner_bit = ~(qp->sq.head >> ilog2(qp->sq.wqe_cnt)) & 0x1;
109 rc_sq_wqe = wqe;
110 memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe));
111 for (i = 0; i < wr->num_sge; i++)
112 rc_sq_wqe->msg_len += wr->sg_list[i].length;
113 184
114 rc_sq_wqe->inv_key_immtdata = send_ieth(wr); 185 /* Corresponding to the QP type, wqe process separately */
186 if (ibqp->qp_type == IB_QPT_GSI) {
187 ud_sq_wqe = wqe;
188 memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe));
189
190 roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M,
191 V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]);
192 roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M,
193 V2_UD_SEND_WQE_DMAC_1_S, ah->av.mac[1]);
194 roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_2_M,
195 V2_UD_SEND_WQE_DMAC_2_S, ah->av.mac[2]);
196 roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_3_M,
197 V2_UD_SEND_WQE_DMAC_3_S, ah->av.mac[3]);
198 roce_set_field(ud_sq_wqe->byte_48,
199 V2_UD_SEND_WQE_BYTE_48_DMAC_4_M,
200 V2_UD_SEND_WQE_BYTE_48_DMAC_4_S,
201 ah->av.mac[4]);
202 roce_set_field(ud_sq_wqe->byte_48,
203 V2_UD_SEND_WQE_BYTE_48_DMAC_5_M,
204 V2_UD_SEND_WQE_BYTE_48_DMAC_5_S,
205 ah->av.mac[5]);
206
207 /* MAC loopback */
208 smac = (u8 *)hr_dev->dev_addr[qp->port];
209 loopback = ether_addr_equal_unaligned(ah->av.mac,
210 smac) ? 1 : 0;
211
212 roce_set_bit(ud_sq_wqe->byte_40,
213 V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback);
214
215 roce_set_field(ud_sq_wqe->byte_4,
216 V2_UD_SEND_WQE_BYTE_4_OPCODE_M,
217 V2_UD_SEND_WQE_BYTE_4_OPCODE_S,
218 HNS_ROCE_V2_WQE_OP_SEND);
115 219
116 roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S, 220 for (i = 0; i < wr->num_sge; i++)
117 (wr->send_flags & IB_SEND_FENCE) ? 1 : 0); 221 ud_sq_wqe->msg_len += wr->sg_list[i].length;
118 222
119 roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SE_S, 223 ud_sq_wqe->immtdata = send_ieth(wr);
120 (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
121 224
122 roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S, 225 /* Set sig attr */
123 (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); 226 roce_set_bit(ud_sq_wqe->byte_4,
227 V2_UD_SEND_WQE_BYTE_4_CQE_S,
228 (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
124 229
125 roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, 230 /* Set se attr */
126 owner_bit); 231 roce_set_bit(ud_sq_wqe->byte_4,
232 V2_UD_SEND_WQE_BYTE_4_SE_S,
233 (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
127 234
128 switch (wr->opcode) { 235 roce_set_bit(ud_sq_wqe->byte_4,
129 case IB_WR_RDMA_READ: 236 V2_UD_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
130 roce_set_field(rc_sq_wqe->byte_4, 237
131 V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 238 roce_set_field(ud_sq_wqe->byte_16,
132 V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 239 V2_UD_SEND_WQE_BYTE_16_PD_M,
133 HNS_ROCE_V2_WQE_OP_RDMA_READ); 240 V2_UD_SEND_WQE_BYTE_16_PD_S,
134 rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey); 241 to_hr_pd(ibqp->pd)->pdn);
135 rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr); 242
136 break; 243 roce_set_field(ud_sq_wqe->byte_16,
137 case IB_WR_RDMA_WRITE: 244 V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M,
138 roce_set_field(rc_sq_wqe->byte_4, 245 V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S,
139 V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 246 wr->num_sge);
140 V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 247
141 HNS_ROCE_V2_WQE_OP_RDMA_WRITE); 248 roce_set_field(ud_sq_wqe->byte_20,
142 rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey); 249 V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
143 rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr); 250 V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
144 break; 251 sge_ind & (qp->sge.sge_cnt - 1));
145 case IB_WR_RDMA_WRITE_WITH_IMM: 252
146 roce_set_field(rc_sq_wqe->byte_4, 253 roce_set_field(ud_sq_wqe->byte_24,
254 V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
255 V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, 0);
256 ud_sq_wqe->qkey =
257 cpu_to_be32(ud_wr(wr)->remote_qkey & 0x80000000) ?
258 qp->qkey : ud_wr(wr)->remote_qkey;
259 roce_set_field(ud_sq_wqe->byte_32,
260 V2_UD_SEND_WQE_BYTE_32_DQPN_M,
261 V2_UD_SEND_WQE_BYTE_32_DQPN_S,
262 ud_wr(wr)->remote_qpn);
263
264 roce_set_field(ud_sq_wqe->byte_36,
265 V2_UD_SEND_WQE_BYTE_36_VLAN_M,
266 V2_UD_SEND_WQE_BYTE_36_VLAN_S,
267 ah->av.vlan);
268 roce_set_field(ud_sq_wqe->byte_36,
269 V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M,
270 V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S,
271 ah->av.hop_limit);
272 roce_set_field(ud_sq_wqe->byte_36,
273 V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
274 V2_UD_SEND_WQE_BYTE_36_TCLASS_S,
275 0);
276 roce_set_field(ud_sq_wqe->byte_36,
277 V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
278 V2_UD_SEND_WQE_BYTE_36_TCLASS_S,
279 0);
280 roce_set_field(ud_sq_wqe->byte_40,
281 V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M,
282 V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, 0);
283 roce_set_field(ud_sq_wqe->byte_40,
284 V2_UD_SEND_WQE_BYTE_40_SL_M,
285 V2_UD_SEND_WQE_BYTE_40_SL_S,
286 ah->av.sl_tclass_flowlabel >>
287 HNS_ROCE_SL_SHIFT);
288 roce_set_field(ud_sq_wqe->byte_40,
289 V2_UD_SEND_WQE_BYTE_40_PORTN_M,
290 V2_UD_SEND_WQE_BYTE_40_PORTN_S,
291 qp->port);
292
293 roce_set_field(ud_sq_wqe->byte_48,
294 V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M,
295 V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S,
296 hns_get_gid_index(hr_dev, qp->phy_port,
297 ah->av.gid_index));
298
299 memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0],
300 GID_LEN_V2);
301
302 dseg = get_send_extend_sge(qp,
303 sge_ind & (qp->sge.sge_cnt - 1));
304 for (i = 0; i < wr->num_sge; i++) {
305 set_data_seg_v2(dseg + i, wr->sg_list + i);
306 sge_ind++;
307 }
308
309 ind++;
310 } else if (ibqp->qp_type == IB_QPT_RC) {
311 rc_sq_wqe = wqe;
312 memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe));
313 for (i = 0; i < wr->num_sge; i++)
314 rc_sq_wqe->msg_len += wr->sg_list[i].length;
315
316 rc_sq_wqe->inv_key_immtdata = send_ieth(wr);
317
318 roce_set_bit(rc_sq_wqe->byte_4,
319 V2_RC_SEND_WQE_BYTE_4_FENCE_S,
320 (wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
321
322 roce_set_bit(rc_sq_wqe->byte_4,
323 V2_RC_SEND_WQE_BYTE_4_SE_S,
324 (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
325
326 roce_set_bit(rc_sq_wqe->byte_4,
327 V2_RC_SEND_WQE_BYTE_4_CQE_S,
328 (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
329
330 roce_set_bit(rc_sq_wqe->byte_4,
331 V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
332
333 switch (wr->opcode) {
334 case IB_WR_RDMA_READ:
335 roce_set_field(rc_sq_wqe->byte_4,
336 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
337 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
338 HNS_ROCE_V2_WQE_OP_RDMA_READ);
339 rc_sq_wqe->rkey =
340 cpu_to_le32(rdma_wr(wr)->rkey);
341 rc_sq_wqe->va =
342 cpu_to_le64(rdma_wr(wr)->remote_addr);
343 break;
344 case IB_WR_RDMA_WRITE:
345 roce_set_field(rc_sq_wqe->byte_4,
346 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
347 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
348 HNS_ROCE_V2_WQE_OP_RDMA_WRITE);
349 rc_sq_wqe->rkey =
350 cpu_to_le32(rdma_wr(wr)->rkey);
351 rc_sq_wqe->va =
352 cpu_to_le64(rdma_wr(wr)->remote_addr);
353 break;
354 case IB_WR_RDMA_WRITE_WITH_IMM:
355 roce_set_field(rc_sq_wqe->byte_4,
147 V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 356 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
148 V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 357 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
149 HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM); 358 HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM);
150 rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey); 359 rc_sq_wqe->rkey =
151 rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr); 360 cpu_to_le32(rdma_wr(wr)->rkey);
152 break; 361 rc_sq_wqe->va =
153 case IB_WR_SEND: 362 cpu_to_le64(rdma_wr(wr)->remote_addr);
154 roce_set_field(rc_sq_wqe->byte_4, 363 break;
155 V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 364 case IB_WR_SEND:
156 V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 365 roce_set_field(rc_sq_wqe->byte_4,
157 HNS_ROCE_V2_WQE_OP_SEND); 366 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
158 break; 367 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
159 case IB_WR_SEND_WITH_INV: 368 HNS_ROCE_V2_WQE_OP_SEND);
160 roce_set_field(rc_sq_wqe->byte_4, 369 break;
370 case IB_WR_SEND_WITH_INV:
371 roce_set_field(rc_sq_wqe->byte_4,
161 V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 372 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
162 V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 373 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
163 HNS_ROCE_V2_WQE_OP_SEND_WITH_INV); 374 HNS_ROCE_V2_WQE_OP_SEND_WITH_INV);
164 break; 375 break;
165 case IB_WR_SEND_WITH_IMM: 376 case IB_WR_SEND_WITH_IMM:
166 roce_set_field(rc_sq_wqe->byte_4, 377 roce_set_field(rc_sq_wqe->byte_4,
167 V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 378 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
168 V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 379 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
169 HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM); 380 HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM);
170 break; 381 break;
171 case IB_WR_LOCAL_INV: 382 case IB_WR_LOCAL_INV:
172 roce_set_field(rc_sq_wqe->byte_4, 383 roce_set_field(rc_sq_wqe->byte_4,
173 V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 384 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
174 V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 385 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
175 HNS_ROCE_V2_WQE_OP_LOCAL_INV); 386 HNS_ROCE_V2_WQE_OP_LOCAL_INV);
176 break; 387 break;
177 case IB_WR_ATOMIC_CMP_AND_SWP: 388 case IB_WR_ATOMIC_CMP_AND_SWP:
178 roce_set_field(rc_sq_wqe->byte_4, 389 roce_set_field(rc_sq_wqe->byte_4,
179 V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 390 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
180 V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 391 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
181 HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP); 392 HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP);
182 break; 393 break;
183 case IB_WR_ATOMIC_FETCH_AND_ADD: 394 case IB_WR_ATOMIC_FETCH_AND_ADD:
184 roce_set_field(rc_sq_wqe->byte_4, 395 roce_set_field(rc_sq_wqe->byte_4,
185 V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 396 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
186 V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 397 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
187 HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD); 398 HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD);
188 break; 399 break;
189 case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: 400 case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
190 roce_set_field(rc_sq_wqe->byte_4, 401 roce_set_field(rc_sq_wqe->byte_4,
191 V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 402 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
192 V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 403 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
193 HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP); 404 HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP);
194 break; 405 break;
195 case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: 406 case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
196 roce_set_field(rc_sq_wqe->byte_4, 407 roce_set_field(rc_sq_wqe->byte_4,
197 V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 408 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
198 V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 409 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
199 HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD); 410 HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD);
200 break; 411 break;
201 default: 412 default:
202 roce_set_field(rc_sq_wqe->byte_4, 413 roce_set_field(rc_sq_wqe->byte_4,
203 V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 414 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
204 V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 415 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
205 HNS_ROCE_V2_WQE_OP_MASK); 416 HNS_ROCE_V2_WQE_OP_MASK);
206 break; 417 break;
207 }
208
209 wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
210 dseg = wqe;
211 if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) {
212 if (rc_sq_wqe->msg_len >
213 hr_dev->caps.max_sq_inline) {
214 ret = -EINVAL;
215 *bad_wr = wr;
216 dev_err(dev, "inline len(1-%d)=%d, illegal",
217 rc_sq_wqe->msg_len,
218 hr_dev->caps.max_sq_inline);
219 goto out;
220 } 418 }
221 419
222 for (i = 0; i < wr->num_sge; i++) { 420 wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
223 memcpy(wqe, ((void *)wr->sg_list[i].addr), 421 dseg = wqe;
224 wr->sg_list[i].length);
225 wqe += wr->sg_list[i].length;
226 wqe_sz += wr->sg_list[i].length;
227 }
228 422
229 roce_set_bit(rc_sq_wqe->byte_4, 423 ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe,
230 V2_RC_SEND_WQE_BYTE_4_INLINE_S, 1); 424 &sge_ind, bad_wr);
425 if (ret)
426 goto out;
427 ind++;
231 } else { 428 } else {
232 if (wr->num_sge <= 2) { 429 dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
233 for (i = 0; i < wr->num_sge; i++) 430 spin_unlock_irqrestore(&qp->sq.lock, flags);
234 set_data_seg_v2(dseg + i, 431 return -EOPNOTSUPP;
235 wr->sg_list + i);
236 } else {
237 roce_set_field(rc_sq_wqe->byte_20,
238 V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
239 V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
240 sge_ind & (qp->sge.sge_cnt - 1));
241
242 for (i = 0; i < 2; i++)
243 set_data_seg_v2(dseg + i,
244 wr->sg_list + i);
245
246 dseg = get_send_extend_sge(qp,
247 sge_ind & (qp->sge.sge_cnt - 1));
248
249 for (i = 0; i < wr->num_sge - 2; i++) {
250 set_data_seg_v2(dseg + i,
251 wr->sg_list + 2 + i);
252 sge_ind++;
253 }
254 }
255
256 roce_set_field(rc_sq_wqe->byte_16,
257 V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
258 V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
259 wr->num_sge);
260 wqe_sz += wr->num_sge *
261 sizeof(struct hns_roce_v2_wqe_data_seg);
262 } 432 }
263 ind++;
264 } 433 }
265 434
266out: 435out:
@@ -299,6 +468,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
299 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); 468 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
300 struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); 469 struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
301 struct hns_roce_v2_wqe_data_seg *dseg; 470 struct hns_roce_v2_wqe_data_seg *dseg;
471 struct hns_roce_rinl_sge *sge_list;
302 struct device *dev = hr_dev->dev; 472 struct device *dev = hr_dev->dev;
303 struct hns_roce_v2_db rq_db; 473 struct hns_roce_v2_db rq_db;
304 unsigned long flags; 474 unsigned long flags;
@@ -347,6 +517,14 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
347 dseg[i].addr = 0; 517 dseg[i].addr = 0;
348 } 518 }
349 519
520 /* rq support inline data */
521 sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list;
522 hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = (u32)wr->num_sge;
523 for (i = 0; i < wr->num_sge; i++) {
524 sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr;
525 sge_list[i].len = wr->sg_list[i].length;
526 }
527
350 hr_qp->rq.wrid[ind] = wr->wr_id; 528 hr_qp->rq.wrid[ind] = wr->wr_id;
351 529
352 ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1); 530 ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1);
@@ -908,9 +1086,9 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
908 caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE; 1086 caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
909 caps->num_uars = HNS_ROCE_V2_UAR_NUM; 1087 caps->num_uars = HNS_ROCE_V2_UAR_NUM;
910 caps->phy_num_uars = HNS_ROCE_V2_PHY_UAR_NUM; 1088 caps->phy_num_uars = HNS_ROCE_V2_PHY_UAR_NUM;
911 caps->num_aeq_vectors = 1; 1089 caps->num_aeq_vectors = HNS_ROCE_V2_AEQE_VEC_NUM;
912 caps->num_comp_vectors = 63; 1090 caps->num_comp_vectors = HNS_ROCE_V2_COMP_VEC_NUM;
913 caps->num_other_vectors = 0; 1091 caps->num_other_vectors = HNS_ROCE_V2_ABNORMAL_VEC_NUM;
914 caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM; 1092 caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM;
915 caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; 1093 caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS;
916 caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS; 1094 caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS;
@@ -955,12 +1133,18 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
955 caps->cqe_ba_pg_sz = 0; 1133 caps->cqe_ba_pg_sz = 0;
956 caps->cqe_buf_pg_sz = 0; 1134 caps->cqe_buf_pg_sz = 0;
957 caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM; 1135 caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM;
1136 caps->eqe_ba_pg_sz = 0;
1137 caps->eqe_buf_pg_sz = 0;
1138 caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM;
958 caps->chunk_sz = HNS_ROCE_V2_TABLE_CHUNK_SIZE; 1139 caps->chunk_sz = HNS_ROCE_V2_TABLE_CHUNK_SIZE;
959 1140
960 caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR | 1141 caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR |
961 HNS_ROCE_CAP_FLAG_ROCE_V1_V2; 1142 HNS_ROCE_CAP_FLAG_ROCE_V1_V2 |
1143 HNS_ROCE_CAP_FLAG_RQ_INLINE;
962 caps->pkey_table_len[0] = 1; 1144 caps->pkey_table_len[0] = 1;
963 caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; 1145 caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
1146 caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM;
1147 caps->aeqe_depth = HNS_ROCE_V2_ASYNC_EQE_NUM;
964 caps->local_ca_ack_delay = 0; 1148 caps->local_ca_ack_delay = 0;
965 caps->max_mtu = IB_MTU_4096; 1149 caps->max_mtu = IB_MTU_4096;
966 1150
@@ -1382,6 +1566,8 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
1382 1566
1383 roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CQ_ST_M, 1567 roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CQ_ST_M,
1384 V2_CQC_BYTE_4_CQ_ST_S, V2_CQ_STATE_VALID); 1568 V2_CQC_BYTE_4_CQ_ST_S, V2_CQ_STATE_VALID);
1569 roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_ARM_ST_M,
1570 V2_CQC_BYTE_4_ARM_ST_S, REG_NXT_CEQE);
1385 roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_SHIFT_M, 1571 roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_SHIFT_M,
1386 V2_CQC_BYTE_4_SHIFT_S, ilog2((unsigned int)nent)); 1572 V2_CQC_BYTE_4_SHIFT_S, ilog2((unsigned int)nent));
1387 roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CEQN_M, 1573 roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CEQN_M,
@@ -1422,6 +1608,15 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
1422 1608
1423 roce_set_field(cq_context->byte_40_cqe_ba, V2_CQC_BYTE_40_CQE_BA_M, 1609 roce_set_field(cq_context->byte_40_cqe_ba, V2_CQC_BYTE_40_CQE_BA_M,
1424 V2_CQC_BYTE_40_CQE_BA_S, (dma_handle >> (32 + 3))); 1610 V2_CQC_BYTE_40_CQE_BA_S, (dma_handle >> (32 + 3)));
1611
1612 roce_set_field(cq_context->byte_56_cqe_period_maxcnt,
1613 V2_CQC_BYTE_56_CQ_MAX_CNT_M,
1614 V2_CQC_BYTE_56_CQ_MAX_CNT_S,
1615 HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM);
1616 roce_set_field(cq_context->byte_56_cqe_period_maxcnt,
1617 V2_CQC_BYTE_56_CQ_PERIOD_M,
1618 V2_CQC_BYTE_56_CQ_PERIOD_S,
1619 HNS_ROCE_V2_CQ_DEFAULT_INTERVAL);
1425} 1620}
1426 1621
1427static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq, 1622static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
@@ -1457,6 +1652,40 @@ static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
1457 return 0; 1652 return 0;
1458} 1653}
1459 1654
1655static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
1656 struct hns_roce_qp **cur_qp,
1657 struct ib_wc *wc)
1658{
1659 struct hns_roce_rinl_sge *sge_list;
1660 u32 wr_num, wr_cnt, sge_num;
1661 u32 sge_cnt, data_len, size;
1662 void *wqe_buf;
1663
1664 wr_num = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_WQE_INDX_M,
1665 V2_CQE_BYTE_4_WQE_INDX_S) & 0xffff;
1666 wr_cnt = wr_num & ((*cur_qp)->rq.wqe_cnt - 1);
1667
1668 sge_list = (*cur_qp)->rq_inl_buf.wqe_list[wr_cnt].sg_list;
1669 sge_num = (*cur_qp)->rq_inl_buf.wqe_list[wr_cnt].sge_cnt;
1670 wqe_buf = get_recv_wqe(*cur_qp, wr_cnt);
1671 data_len = wc->byte_len;
1672
1673 for (sge_cnt = 0; (sge_cnt < sge_num) && (data_len); sge_cnt++) {
1674 size = min(sge_list[sge_cnt].len, data_len);
1675 memcpy((void *)sge_list[sge_cnt].addr, wqe_buf, size);
1676
1677 data_len -= size;
1678 wqe_buf += size;
1679 }
1680
1681 if (data_len) {
1682 wc->status = IB_WC_LOC_LEN_ERR;
1683 return -EAGAIN;
1684 }
1685
1686 return 0;
1687}
1688
1460static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, 1689static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
1461 struct hns_roce_qp **cur_qp, struct ib_wc *wc) 1690 struct hns_roce_qp **cur_qp, struct ib_wc *wc)
1462{ 1691{
@@ -1469,6 +1698,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
1469 u32 opcode; 1698 u32 opcode;
1470 u32 status; 1699 u32 status;
1471 int qpn; 1700 int qpn;
1701 int ret;
1472 1702
1473 /* Find cqe according to consumer index */ 1703 /* Find cqe according to consumer index */
1474 cqe = next_cqe_sw_v2(hr_cq); 1704 cqe = next_cqe_sw_v2(hr_cq);
@@ -1636,7 +1866,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
1636 case HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM: 1866 case HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM:
1637 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; 1867 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
1638 wc->wc_flags = IB_WC_WITH_IMM; 1868 wc->wc_flags = IB_WC_WITH_IMM;
1639 wc->ex.imm_data = le32_to_cpu(cqe->rkey_immtdata); 1869 wc->ex.imm_data = cqe->immtdata;
1640 break; 1870 break;
1641 case HNS_ROCE_V2_OPCODE_SEND: 1871 case HNS_ROCE_V2_OPCODE_SEND:
1642 wc->opcode = IB_WC_RECV; 1872 wc->opcode = IB_WC_RECV;
@@ -1645,18 +1875,29 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
1645 case HNS_ROCE_V2_OPCODE_SEND_WITH_IMM: 1875 case HNS_ROCE_V2_OPCODE_SEND_WITH_IMM:
1646 wc->opcode = IB_WC_RECV; 1876 wc->opcode = IB_WC_RECV;
1647 wc->wc_flags = IB_WC_WITH_IMM; 1877 wc->wc_flags = IB_WC_WITH_IMM;
1648 wc->ex.imm_data = le32_to_cpu(cqe->rkey_immtdata); 1878 wc->ex.imm_data = cqe->immtdata;
1649 break; 1879 break;
1650 case HNS_ROCE_V2_OPCODE_SEND_WITH_INV: 1880 case HNS_ROCE_V2_OPCODE_SEND_WITH_INV:
1651 wc->opcode = IB_WC_RECV; 1881 wc->opcode = IB_WC_RECV;
1652 wc->wc_flags = IB_WC_WITH_INVALIDATE; 1882 wc->wc_flags = IB_WC_WITH_INVALIDATE;
1653 wc->ex.invalidate_rkey = cqe->rkey_immtdata; 1883 wc->ex.invalidate_rkey = le32_to_cpu(cqe->rkey);
1654 break; 1884 break;
1655 default: 1885 default:
1656 wc->status = IB_WC_GENERAL_ERR; 1886 wc->status = IB_WC_GENERAL_ERR;
1657 break; 1887 break;
1658 } 1888 }
1659 1889
1890 if ((wc->qp->qp_type == IB_QPT_RC ||
1891 wc->qp->qp_type == IB_QPT_UC) &&
1892 (opcode == HNS_ROCE_V2_OPCODE_SEND ||
1893 opcode == HNS_ROCE_V2_OPCODE_SEND_WITH_IMM ||
1894 opcode == HNS_ROCE_V2_OPCODE_SEND_WITH_INV) &&
1895 (roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_RQ_INLINE_S))) {
1896 ret = hns_roce_handle_recv_inl_wqe(cqe, cur_qp, wc);
1897 if (ret)
1898 return -EAGAIN;
1899 }
1900
1660 /* Update tail pointer, record wr_id */ 1901 /* Update tail pointer, record wr_id */
1661 wq = &(*cur_qp)->rq; 1902 wq = &(*cur_qp)->rq;
1662 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; 1903 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
@@ -1670,6 +1911,21 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
1670 wc->wc_flags |= (roce_get_bit(cqe->byte_32, 1911 wc->wc_flags |= (roce_get_bit(cqe->byte_32,
1671 V2_CQE_BYTE_32_GRH_S) ? 1912 V2_CQE_BYTE_32_GRH_S) ?
1672 IB_WC_GRH : 0); 1913 IB_WC_GRH : 0);
1914 wc->port_num = roce_get_field(cqe->byte_32,
1915 V2_CQE_BYTE_32_PORTN_M, V2_CQE_BYTE_32_PORTN_S);
1916 wc->pkey_index = 0;
1917 memcpy(wc->smac, cqe->smac, 4);
1918 wc->smac[4] = roce_get_field(cqe->byte_28,
1919 V2_CQE_BYTE_28_SMAC_4_M,
1920 V2_CQE_BYTE_28_SMAC_4_S);
1921 wc->smac[5] = roce_get_field(cqe->byte_28,
1922 V2_CQE_BYTE_28_SMAC_5_M,
1923 V2_CQE_BYTE_28_SMAC_5_S);
1924 wc->vlan_id = 0xffff;
1925 wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
1926 wc->network_hdr_type = roce_get_field(cqe->byte_28,
1927 V2_CQE_BYTE_28_PORT_TYPE_M,
1928 V2_CQE_BYTE_28_PORT_TYPE_S);
1673 } 1929 }
1674 1930
1675 return 0; 1931 return 0;
@@ -1859,8 +2115,39 @@ static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev,
1859 return ret; 2115 return ret;
1860} 2116}
1861 2117
2118static void set_access_flags(struct hns_roce_qp *hr_qp,
2119 struct hns_roce_v2_qp_context *context,
2120 struct hns_roce_v2_qp_context *qpc_mask,
2121 const struct ib_qp_attr *attr, int attr_mask)
2122{
2123 u8 dest_rd_atomic;
2124 u32 access_flags;
2125
2126 dest_rd_atomic = !!(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) ?
2127 attr->max_dest_rd_atomic : hr_qp->resp_depth;
2128
2129 access_flags = !!(attr_mask & IB_QP_ACCESS_FLAGS) ?
2130 attr->qp_access_flags : hr_qp->atomic_rd_en;
2131
2132 if (!dest_rd_atomic)
2133 access_flags &= IB_ACCESS_REMOTE_WRITE;
2134
2135 roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
2136 !!(access_flags & IB_ACCESS_REMOTE_READ));
2137 roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, 0);
2138
2139 roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S,
2140 !!(access_flags & IB_ACCESS_REMOTE_WRITE));
2141 roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, 0);
2142
2143 roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
2144 !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
2145 roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0);
2146}
2147
1862static void modify_qp_reset_to_init(struct ib_qp *ibqp, 2148static void modify_qp_reset_to_init(struct ib_qp *ibqp,
1863 const struct ib_qp_attr *attr, 2149 const struct ib_qp_attr *attr,
2150 int attr_mask,
1864 struct hns_roce_v2_qp_context *context, 2151 struct hns_roce_v2_qp_context *context,
1865 struct hns_roce_v2_qp_context *qpc_mask) 2152 struct hns_roce_v2_qp_context *qpc_mask)
1866{ 2153{
@@ -1877,9 +2164,18 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
1877 roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M, 2164 roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
1878 V2_QPC_BYTE_4_TST_S, 0); 2165 V2_QPC_BYTE_4_TST_S, 0);
1879 2166
1880 roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, 2167 if (ibqp->qp_type == IB_QPT_GSI)
1881 V2_QPC_BYTE_4_SGE_SHIFT_S, hr_qp->sq.max_gs > 2 ? 2168 roce_set_field(context->byte_4_sqpn_tst,
1882 ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0); 2169 V2_QPC_BYTE_4_SGE_SHIFT_M,
2170 V2_QPC_BYTE_4_SGE_SHIFT_S,
2171 ilog2((unsigned int)hr_qp->sge.sge_cnt));
2172 else
2173 roce_set_field(context->byte_4_sqpn_tst,
2174 V2_QPC_BYTE_4_SGE_SHIFT_M,
2175 V2_QPC_BYTE_4_SGE_SHIFT_S,
2176 hr_qp->sq.max_gs > 2 ?
2177 ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
2178
1883 roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, 2179 roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
1884 V2_QPC_BYTE_4_SGE_SHIFT_S, 0); 2180 V2_QPC_BYTE_4_SGE_SHIFT_S, 0);
1885 2181
@@ -1944,18 +2240,13 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
1944 roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0); 2240 roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0);
1945 roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0); 2241 roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0);
1946 2242
1947 roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, 2243 if (attr_mask & IB_QP_QKEY) {
1948 !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ)); 2244 context->qkey_xrcd = attr->qkey;
1949 roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, 0); 2245 qpc_mask->qkey_xrcd = 0;
1950 2246 hr_qp->qkey = attr->qkey;
1951 roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, 2247 }
1952 !!(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE));
1953 roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, 0);
1954
1955 roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
1956 !!(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC));
1957 roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0);
1958 2248
2249 roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 1);
1959 roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0); 2250 roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0);
1960 2251
1961 roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M, 2252 roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
@@ -2176,9 +2467,17 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
2176 roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M, 2467 roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
2177 V2_QPC_BYTE_4_TST_S, 0); 2468 V2_QPC_BYTE_4_TST_S, 0);
2178 2469
2179 roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, 2470 if (ibqp->qp_type == IB_QPT_GSI)
2180 V2_QPC_BYTE_4_SGE_SHIFT_S, hr_qp->sq.max_gs > 2 ? 2471 roce_set_field(context->byte_4_sqpn_tst,
2181 ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0); 2472 V2_QPC_BYTE_4_SGE_SHIFT_M,
2473 V2_QPC_BYTE_4_SGE_SHIFT_S,
2474 ilog2((unsigned int)hr_qp->sge.sge_cnt));
2475 else
2476 roce_set_field(context->byte_4_sqpn_tst,
2477 V2_QPC_BYTE_4_SGE_SHIFT_M,
2478 V2_QPC_BYTE_4_SGE_SHIFT_S, hr_qp->sq.max_gs > 2 ?
2479 ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
2480
2182 roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, 2481 roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
2183 V2_QPC_BYTE_4_SGE_SHIFT_S, 0); 2482 V2_QPC_BYTE_4_SGE_SHIFT_S, 0);
2184 2483
@@ -2239,7 +2538,7 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
2239 V2_QPC_BYTE_80_RX_CQN_S, 0); 2538 V2_QPC_BYTE_80_RX_CQN_S, 0);
2240 2539
2241 roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, 2540 roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
2242 V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->recv_cq)->cqn); 2541 V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn);
2243 roce_set_field(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M, 2542 roce_set_field(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
2244 V2_QPC_BYTE_252_TX_CQN_S, 0); 2543 V2_QPC_BYTE_252_TX_CQN_S, 0);
2245 2544
@@ -2255,10 +2554,10 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
2255 V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S, 0); 2554 V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S, 0);
2256 } 2555 }
2257 2556
2258 if (attr_mask & IB_QP_PKEY_INDEX) 2557 if (attr_mask & IB_QP_QKEY) {
2259 context->qkey_xrcd = attr->pkey_index; 2558 context->qkey_xrcd = attr->qkey;
2260 else 2559 qpc_mask->qkey_xrcd = 0;
2261 context->qkey_xrcd = hr_qp->pkey_index; 2560 }
2262 2561
2263 roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M, 2562 roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M,
2264 V2_QPC_BYTE_4_SQPN_S, hr_qp->qpn); 2563 V2_QPC_BYTE_4_SQPN_S, hr_qp->qpn);
@@ -2354,7 +2653,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
2354 roce_set_field(context->byte_20_smac_sgid_idx, 2653 roce_set_field(context->byte_20_smac_sgid_idx,
2355 V2_QPC_BYTE_20_SGE_HOP_NUM_M, 2654 V2_QPC_BYTE_20_SGE_HOP_NUM_M,
2356 V2_QPC_BYTE_20_SGE_HOP_NUM_S, 2655 V2_QPC_BYTE_20_SGE_HOP_NUM_S,
2357 hr_qp->sq.max_gs > 2 ? hr_dev->caps.mtt_hop_num : 0); 2656 ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > 2) ?
2657 hr_dev->caps.mtt_hop_num : 0);
2358 roce_set_field(qpc_mask->byte_20_smac_sgid_idx, 2658 roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
2359 V2_QPC_BYTE_20_SGE_HOP_NUM_M, 2659 V2_QPC_BYTE_20_SGE_HOP_NUM_M,
2360 V2_QPC_BYTE_20_SGE_HOP_NUM_S, 0); 2660 V2_QPC_BYTE_20_SGE_HOP_NUM_S, 0);
@@ -2463,11 +2763,14 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
2463 roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_LBI_S, 0); 2763 roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_LBI_S, 0);
2464 } 2764 }
2465 2765
2466 roce_set_field(context->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M, 2766 if ((attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) &&
2467 V2_QPC_BYTE_140_RR_MAX_S, 2767 attr->max_dest_rd_atomic) {
2468 ilog2((unsigned int)attr->max_dest_rd_atomic)); 2768 roce_set_field(context->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M,
2469 roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M, 2769 V2_QPC_BYTE_140_RR_MAX_S,
2470 V2_QPC_BYTE_140_RR_MAX_S, 0); 2770 fls(attr->max_dest_rd_atomic - 1));
2771 roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M,
2772 V2_QPC_BYTE_140_RR_MAX_S, 0);
2773 }
2471 2774
2472 roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, 2775 roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M,
2473 V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num); 2776 V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num);
@@ -2511,8 +2814,13 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
2511 roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, 2814 roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
2512 V2_QPC_BYTE_24_TC_S, 0); 2815 V2_QPC_BYTE_24_TC_S, 0);
2513 2816
2514 roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, 2817 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD)
2515 V2_QPC_BYTE_24_MTU_S, attr->path_mtu); 2818 roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
2819 V2_QPC_BYTE_24_MTU_S, IB_MTU_4096);
2820 else
2821 roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
2822 V2_QPC_BYTE_24_MTU_S, attr->path_mtu);
2823
2516 roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, 2824 roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
2517 V2_QPC_BYTE_24_MTU_S, 0); 2825 V2_QPC_BYTE_24_MTU_S, 0);
2518 2826
@@ -2557,12 +2865,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
2557 V2_QPC_BYTE_168_LP_SGEN_INI_M, 2865 V2_QPC_BYTE_168_LP_SGEN_INI_M,
2558 V2_QPC_BYTE_168_LP_SGEN_INI_S, 0); 2866 V2_QPC_BYTE_168_LP_SGEN_INI_S, 0);
2559 2867
2560 roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_SR_MAX_M,
2561 V2_QPC_BYTE_208_SR_MAX_S,
2562 ilog2((unsigned int)attr->max_rd_atomic));
2563 roce_set_field(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_SR_MAX_M,
2564 V2_QPC_BYTE_208_SR_MAX_S, 0);
2565
2566 roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, 2868 roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
2567 V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr)); 2869 V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr));
2568 roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, 2870 roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
@@ -2625,13 +2927,14 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
2625 V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, 0); 2927 V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, 0);
2626 2928
2627 page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); 2929 page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
2628 context->sq_cur_sge_blk_addr = hr_qp->sq.max_gs > 2 ? 2930 context->sq_cur_sge_blk_addr =
2931 ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > 2) ?
2629 ((u32)(mtts[hr_qp->sge.offset / page_size] 2932 ((u32)(mtts[hr_qp->sge.offset / page_size]
2630 >> PAGE_ADDR_SHIFT)) : 0; 2933 >> PAGE_ADDR_SHIFT)) : 0;
2631 roce_set_field(context->byte_184_irrl_idx, 2934 roce_set_field(context->byte_184_irrl_idx,
2632 V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, 2935 V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M,
2633 V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, 2936 V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S,
2634 hr_qp->sq.max_gs > 2 ? 2937 ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > 2) ?
2635 (mtts[hr_qp->sge.offset / page_size] >> 2938 (mtts[hr_qp->sge.offset / page_size] >>
2636 (32 + PAGE_ADDR_SHIFT)) : 0); 2939 (32 + PAGE_ADDR_SHIFT)) : 0);
2637 qpc_mask->sq_cur_sge_blk_addr = 0; 2940 qpc_mask->sq_cur_sge_blk_addr = 0;
@@ -2766,6 +3069,14 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
2766 roce_set_field(qpc_mask->byte_196_sq_psn, V2_QPC_BYTE_196_SQ_MAX_PSN_M, 3069 roce_set_field(qpc_mask->byte_196_sq_psn, V2_QPC_BYTE_196_SQ_MAX_PSN_M,
2767 V2_QPC_BYTE_196_SQ_MAX_PSN_S, 0); 3070 V2_QPC_BYTE_196_SQ_MAX_PSN_S, 0);
2768 3071
3072 if ((attr_mask & IB_QP_MAX_QP_RD_ATOMIC) && attr->max_rd_atomic) {
3073 roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_SR_MAX_M,
3074 V2_QPC_BYTE_208_SR_MAX_S,
3075 fls(attr->max_rd_atomic - 1));
3076 roce_set_field(qpc_mask->byte_208_irrl,
3077 V2_QPC_BYTE_208_SR_MAX_M,
3078 V2_QPC_BYTE_208_SR_MAX_S, 0);
3079 }
2769 return 0; 3080 return 0;
2770} 3081}
2771 3082
@@ -2794,7 +3105,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
2794 */ 3105 */
2795 memset(qpc_mask, 0xff, sizeof(*qpc_mask)); 3106 memset(qpc_mask, 0xff, sizeof(*qpc_mask));
2796 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { 3107 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
2797 modify_qp_reset_to_init(ibqp, attr, context, qpc_mask); 3108 modify_qp_reset_to_init(ibqp, attr, attr_mask, context,
3109 qpc_mask);
2798 } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { 3110 } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
2799 modify_qp_init_to_init(ibqp, attr, attr_mask, context, 3111 modify_qp_init_to_init(ibqp, attr, attr_mask, context,
2800 qpc_mask); 3112 qpc_mask);
@@ -2829,6 +3141,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
2829 goto out; 3141 goto out;
2830 } 3142 }
2831 3143
3144 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
3145 set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
3146
2832 /* Every status migrate must change state */ 3147 /* Every status migrate must change state */
2833 roce_set_field(context->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M, 3148 roce_set_field(context->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M,
2834 V2_QPC_BYTE_60_QP_ST_S, new_state); 3149 V2_QPC_BYTE_60_QP_ST_S, new_state);
@@ -2845,6 +3160,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
2845 3160
2846 hr_qp->state = new_state; 3161 hr_qp->state = new_state;
2847 3162
3163 if (attr_mask & IB_QP_ACCESS_FLAGS)
3164 hr_qp->atomic_rd_en = attr->qp_access_flags;
3165
2848 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 3166 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
2849 hr_qp->resp_depth = attr->max_dest_rd_atomic; 3167 hr_qp->resp_depth = attr->max_dest_rd_atomic;
2850 if (attr_mask & IB_QP_PORT) { 3168 if (attr_mask & IB_QP_PORT) {
@@ -3098,6 +3416,11 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
3098 hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); 3416 hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
3099 } 3417 }
3100 3418
3419 if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
3420 kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
3421 kfree(hr_qp->rq_inl_buf.wqe_list);
3422 }
3423
3101 return 0; 3424 return 0;
3102} 3425}
3103 3426
@@ -3162,6 +3485,1146 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
3162 return ret; 3485 return ret;
3163} 3486}
3164 3487
3488static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
3489{
3490 u32 doorbell[2];
3491
3492 doorbell[0] = 0;
3493 doorbell[1] = 0;
3494
3495 if (eq->type_flag == HNS_ROCE_AEQ) {
3496 roce_set_field(doorbell[0], HNS_ROCE_V2_EQ_DB_CMD_M,
3497 HNS_ROCE_V2_EQ_DB_CMD_S,
3498 eq->arm_st == HNS_ROCE_V2_EQ_ALWAYS_ARMED ?
3499 HNS_ROCE_EQ_DB_CMD_AEQ :
3500 HNS_ROCE_EQ_DB_CMD_AEQ_ARMED);
3501 } else {
3502 roce_set_field(doorbell[0], HNS_ROCE_V2_EQ_DB_TAG_M,
3503 HNS_ROCE_V2_EQ_DB_TAG_S, eq->eqn);
3504
3505 roce_set_field(doorbell[0], HNS_ROCE_V2_EQ_DB_CMD_M,
3506 HNS_ROCE_V2_EQ_DB_CMD_S,
3507 eq->arm_st == HNS_ROCE_V2_EQ_ALWAYS_ARMED ?
3508 HNS_ROCE_EQ_DB_CMD_CEQ :
3509 HNS_ROCE_EQ_DB_CMD_CEQ_ARMED);
3510 }
3511
3512 roce_set_field(doorbell[1], HNS_ROCE_V2_EQ_DB_PARA_M,
3513 HNS_ROCE_V2_EQ_DB_PARA_S,
3514 (eq->cons_index & HNS_ROCE_V2_CONS_IDX_M));
3515
3516 hns_roce_write64_k(doorbell, eq->doorbell);
3517}
3518
3519static void hns_roce_v2_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
3520 struct hns_roce_aeqe *aeqe,
3521 u32 qpn)
3522{
3523 struct device *dev = hr_dev->dev;
3524 int sub_type;
3525
3526 dev_warn(dev, "Local work queue catastrophic error.\n");
3527 sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
3528 HNS_ROCE_V2_AEQE_SUB_TYPE_S);
3529 switch (sub_type) {
3530 case HNS_ROCE_LWQCE_QPC_ERROR:
3531 dev_warn(dev, "QP %d, QPC error.\n", qpn);
3532 break;
3533 case HNS_ROCE_LWQCE_MTU_ERROR:
3534 dev_warn(dev, "QP %d, MTU error.\n", qpn);
3535 break;
3536 case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
3537 dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn);
3538 break;
3539 case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
3540 dev_warn(dev, "QP %d, WQE addr error.\n", qpn);
3541 break;
3542 case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
3543 dev_warn(dev, "QP %d, WQE shift error.\n", qpn);
3544 break;
3545 default:
3546 dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
3547 break;
3548 }
3549}
3550
3551static void hns_roce_v2_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
3552 struct hns_roce_aeqe *aeqe, u32 qpn)
3553{
3554 struct device *dev = hr_dev->dev;
3555 int sub_type;
3556
3557 dev_warn(dev, "Local access violation work queue error.\n");
3558 sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
3559 HNS_ROCE_V2_AEQE_SUB_TYPE_S);
3560 switch (sub_type) {
3561 case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
3562 dev_warn(dev, "QP %d, R_key violation.\n", qpn);
3563 break;
3564 case HNS_ROCE_LAVWQE_LENGTH_ERROR:
3565 dev_warn(dev, "QP %d, length error.\n", qpn);
3566 break;
3567 case HNS_ROCE_LAVWQE_VA_ERROR:
3568 dev_warn(dev, "QP %d, VA error.\n", qpn);
3569 break;
3570 case HNS_ROCE_LAVWQE_PD_ERROR:
3571 dev_err(dev, "QP %d, PD error.\n", qpn);
3572 break;
3573 case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
3574 dev_warn(dev, "QP %d, rw acc error.\n", qpn);
3575 break;
3576 case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
3577 dev_warn(dev, "QP %d, key state error.\n", qpn);
3578 break;
3579 case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
3580 dev_warn(dev, "QP %d, MR operation error.\n", qpn);
3581 break;
3582 default:
3583 dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
3584 break;
3585 }
3586}
3587
3588static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev,
3589 struct hns_roce_aeqe *aeqe,
3590 int event_type)
3591{
3592 struct device *dev = hr_dev->dev;
3593 u32 qpn;
3594
3595 qpn = roce_get_field(aeqe->event.qp_event.qp,
3596 HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
3597 HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
3598
3599 switch (event_type) {
3600 case HNS_ROCE_EVENT_TYPE_COMM_EST:
3601 dev_warn(dev, "Communication established.\n");
3602 break;
3603 case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
3604 dev_warn(dev, "Send queue drained.\n");
3605 break;
3606 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
3607 hns_roce_v2_wq_catas_err_handle(hr_dev, aeqe, qpn);
3608 break;
3609 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
3610 dev_warn(dev, "Invalid request local work queue error.\n");
3611 break;
3612 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
3613 hns_roce_v2_local_wq_access_err_handle(hr_dev, aeqe, qpn);
3614 break;
3615 default:
3616 break;
3617 }
3618
3619 hns_roce_qp_event(hr_dev, qpn, event_type);
3620}
3621
3622static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev,
3623 struct hns_roce_aeqe *aeqe,
3624 int event_type)
3625{
3626 struct device *dev = hr_dev->dev;
3627 u32 cqn;
3628
3629 cqn = roce_get_field(aeqe->event.cq_event.cq,
3630 HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
3631 HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
3632
3633 switch (event_type) {
3634 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
3635 dev_warn(dev, "CQ 0x%x access err.\n", cqn);
3636 break;
3637 case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
3638 dev_warn(dev, "CQ 0x%x overflow\n", cqn);
3639 break;
3640 default:
3641 break;
3642 }
3643
3644 hns_roce_cq_event(hr_dev, cqn, event_type);
3645}
3646
3647static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
3648{
3649 u32 buf_chk_sz;
3650 unsigned long off;
3651
3652 buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
3653 off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
3654
3655 return (struct hns_roce_aeqe *)((char *)(eq->buf_list->buf) +
3656 off % buf_chk_sz);
3657}
3658
3659static struct hns_roce_aeqe *mhop_get_aeqe(struct hns_roce_eq *eq, u32 entry)
3660{
3661 u32 buf_chk_sz;
3662 unsigned long off;
3663
3664 buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
3665
3666 off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
3667
3668 if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
3669 return (struct hns_roce_aeqe *)((u8 *)(eq->bt_l0) +
3670 off % buf_chk_sz);
3671 else
3672 return (struct hns_roce_aeqe *)((u8 *)
3673 (eq->buf[off / buf_chk_sz]) + off % buf_chk_sz);
3674}
3675
3676static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
3677{
3678 struct hns_roce_aeqe *aeqe;
3679
3680 if (!eq->hop_num)
3681 aeqe = get_aeqe_v2(eq, eq->cons_index);
3682 else
3683 aeqe = mhop_get_aeqe(eq, eq->cons_index);
3684
3685 return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
3686 !!(eq->cons_index & eq->entries)) ? aeqe : NULL;
3687}
3688
3689static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
3690 struct hns_roce_eq *eq)
3691{
3692 struct device *dev = hr_dev->dev;
3693 struct hns_roce_aeqe *aeqe;
3694 int aeqe_found = 0;
3695 int event_type;
3696
3697 while ((aeqe = next_aeqe_sw_v2(eq))) {
3698
3699 /* Make sure we read AEQ entry after we have checked the
3700 * ownership bit
3701 */
3702 dma_rmb();
3703
3704 event_type = roce_get_field(aeqe->asyn,
3705 HNS_ROCE_V2_AEQE_EVENT_TYPE_M,
3706 HNS_ROCE_V2_AEQE_EVENT_TYPE_S);
3707
3708 switch (event_type) {
3709 case HNS_ROCE_EVENT_TYPE_PATH_MIG:
3710 dev_warn(dev, "Path migrated succeeded.\n");
3711 break;
3712 case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
3713 dev_warn(dev, "Path migration failed.\n");
3714 break;
3715 case HNS_ROCE_EVENT_TYPE_COMM_EST:
3716 case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
3717 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
3718 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
3719 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
3720 hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type);
3721 break;
3722 case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
3723 case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
3724 case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
3725 dev_warn(dev, "SRQ not support.\n");
3726 break;
3727 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
3728 case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
3729 hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type);
3730 break;
3731 case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
3732 dev_warn(dev, "DB overflow.\n");
3733 break;
3734 case HNS_ROCE_EVENT_TYPE_MB:
3735 hns_roce_cmd_event(hr_dev,
3736 le16_to_cpu(aeqe->event.cmd.token),
3737 aeqe->event.cmd.status,
3738 le64_to_cpu(aeqe->event.cmd.out_param));
3739 break;
3740 case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
3741 dev_warn(dev, "CEQ overflow.\n");
3742 break;
3743 case HNS_ROCE_EVENT_TYPE_FLR:
3744 dev_warn(dev, "Function level reset.\n");
3745 break;
3746 default:
3747 dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n",
3748 event_type, eq->eqn, eq->cons_index);
3749 break;
3750 };
3751
3752 ++eq->cons_index;
3753 aeqe_found = 1;
3754
3755 if (eq->cons_index > (2 * eq->entries - 1)) {
3756 dev_warn(dev, "cons_index overflow, set back to 0.\n");
3757 eq->cons_index = 0;
3758 }
3759 }
3760
3761 set_eq_cons_index_v2(eq);
3762 return aeqe_found;
3763}
3764
3765static struct hns_roce_ceqe *get_ceqe_v2(struct hns_roce_eq *eq, u32 entry)
3766{
3767 u32 buf_chk_sz;
3768 unsigned long off;
3769
3770 buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
3771 off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
3772
3773 return (struct hns_roce_ceqe *)((char *)(eq->buf_list->buf) +
3774 off % buf_chk_sz);
3775}
3776
3777static struct hns_roce_ceqe *mhop_get_ceqe(struct hns_roce_eq *eq, u32 entry)
3778{
3779 u32 buf_chk_sz;
3780 unsigned long off;
3781
3782 buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
3783
3784 off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
3785
3786 if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
3787 return (struct hns_roce_ceqe *)((u8 *)(eq->bt_l0) +
3788 off % buf_chk_sz);
3789 else
3790 return (struct hns_roce_ceqe *)((u8 *)(eq->buf[off /
3791 buf_chk_sz]) + off % buf_chk_sz);
3792}
3793
3794static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
3795{
3796 struct hns_roce_ceqe *ceqe;
3797
3798 if (!eq->hop_num)
3799 ceqe = get_ceqe_v2(eq, eq->cons_index);
3800 else
3801 ceqe = mhop_get_ceqe(eq, eq->cons_index);
3802
3803 return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
3804 (!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
3805}
3806
3807static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
3808 struct hns_roce_eq *eq)
3809{
3810 struct device *dev = hr_dev->dev;
3811 struct hns_roce_ceqe *ceqe;
3812 int ceqe_found = 0;
3813 u32 cqn;
3814
3815 while ((ceqe = next_ceqe_sw_v2(eq))) {
3816
3817 /* Make sure we read CEQ entry after we have checked the
3818 * ownership bit
3819 */
3820 dma_rmb();
3821
3822 cqn = roce_get_field(ceqe->comp,
3823 HNS_ROCE_V2_CEQE_COMP_CQN_M,
3824 HNS_ROCE_V2_CEQE_COMP_CQN_S);
3825
3826 hns_roce_cq_completion(hr_dev, cqn);
3827
3828 ++eq->cons_index;
3829 ceqe_found = 1;
3830
3831 if (eq->cons_index > (2 * eq->entries - 1)) {
3832 dev_warn(dev, "cons_index overflow, set back to 0.\n");
3833 eq->cons_index = 0;
3834 }
3835 }
3836
3837 set_eq_cons_index_v2(eq);
3838
3839 return ceqe_found;
3840}
3841
3842static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
3843{
3844 struct hns_roce_eq *eq = eq_ptr;
3845 struct hns_roce_dev *hr_dev = eq->hr_dev;
3846 int int_work = 0;
3847
3848 if (eq->type_flag == HNS_ROCE_CEQ)
3849 /* Completion event interrupt */
3850 int_work = hns_roce_v2_ceq_int(hr_dev, eq);
3851 else
3852 /* Asychronous event interrupt */
3853 int_work = hns_roce_v2_aeq_int(hr_dev, eq);
3854
3855 return IRQ_RETVAL(int_work);
3856}
3857
3858static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
3859{
3860 struct hns_roce_dev *hr_dev = dev_id;
3861 struct device *dev = hr_dev->dev;
3862 int int_work = 0;
3863 u32 int_st;
3864 u32 int_en;
3865
3866 /* Abnormal interrupt */
3867 int_st = roce_read(hr_dev, ROCEE_VF_ABN_INT_ST_REG);
3868 int_en = roce_read(hr_dev, ROCEE_VF_ABN_INT_EN_REG);
3869
3870 if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) {
3871 dev_err(dev, "AEQ overflow!\n");
3872
3873 roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S, 1);
3874 roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
3875
3876 roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1);
3877 roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
3878
3879 int_work = 1;
3880 } else if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S)) {
3881 dev_err(dev, "BUS ERR!\n");
3882
3883 roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S, 1);
3884 roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
3885
3886 roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1);
3887 roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
3888
3889 int_work = 1;
3890 } else if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S)) {
3891 dev_err(dev, "OTHER ERR!\n");
3892
3893 roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S, 1);
3894 roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
3895
3896 roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1);
3897 roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
3898
3899 int_work = 1;
3900 } else
3901 dev_err(dev, "There is no abnormal irq found!\n");
3902
3903 return IRQ_RETVAL(int_work);
3904}
3905
3906static void hns_roce_v2_int_mask_enable(struct hns_roce_dev *hr_dev,
3907 int eq_num, int enable_flag)
3908{
3909 int i;
3910
3911 if (enable_flag == EQ_ENABLE) {
3912 for (i = 0; i < eq_num; i++)
3913 roce_write(hr_dev, ROCEE_VF_EVENT_INT_EN_REG +
3914 i * EQ_REG_OFFSET,
3915 HNS_ROCE_V2_VF_EVENT_INT_EN_M);
3916
3917 roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG,
3918 HNS_ROCE_V2_VF_ABN_INT_EN_M);
3919 roce_write(hr_dev, ROCEE_VF_ABN_INT_CFG_REG,
3920 HNS_ROCE_V2_VF_ABN_INT_CFG_M);
3921 } else {
3922 for (i = 0; i < eq_num; i++)
3923 roce_write(hr_dev, ROCEE_VF_EVENT_INT_EN_REG +
3924 i * EQ_REG_OFFSET,
3925 HNS_ROCE_V2_VF_EVENT_INT_EN_M & 0x0);
3926
3927 roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG,
3928 HNS_ROCE_V2_VF_ABN_INT_EN_M & 0x0);
3929 roce_write(hr_dev, ROCEE_VF_ABN_INT_CFG_REG,
3930 HNS_ROCE_V2_VF_ABN_INT_CFG_M & 0x0);
3931 }
3932}
3933
3934static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn)
3935{
3936 struct device *dev = hr_dev->dev;
3937 int ret;
3938
3939 if (eqn < hr_dev->caps.num_comp_vectors)
3940 ret = hns_roce_cmd_mbox(hr_dev, 0, 0, eqn & HNS_ROCE_V2_EQN_M,
3941 0, HNS_ROCE_CMD_DESTROY_CEQC,
3942 HNS_ROCE_CMD_TIMEOUT_MSECS);
3943 else
3944 ret = hns_roce_cmd_mbox(hr_dev, 0, 0, eqn & HNS_ROCE_V2_EQN_M,
3945 0, HNS_ROCE_CMD_DESTROY_AEQC,
3946 HNS_ROCE_CMD_TIMEOUT_MSECS);
3947 if (ret)
3948 dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn);
3949}
3950
3951static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev,
3952 struct hns_roce_eq *eq)
3953{
3954 struct device *dev = hr_dev->dev;
3955 u64 idx;
3956 u64 size;
3957 u32 buf_chk_sz;
3958 u32 bt_chk_sz;
3959 u32 mhop_num;
3960 int eqe_alloc;
3961 int ba_num;
3962 int i = 0;
3963 int j = 0;
3964
3965 mhop_num = hr_dev->caps.eqe_hop_num;
3966 buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
3967 bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
3968 ba_num = (PAGE_ALIGN(eq->entries * eq->eqe_size) + buf_chk_sz - 1) /
3969 buf_chk_sz;
3970
3971 /* hop_num = 0 */
3972 if (mhop_num == HNS_ROCE_HOP_NUM_0) {
3973 dma_free_coherent(dev, (unsigned int)(eq->entries *
3974 eq->eqe_size), eq->bt_l0, eq->l0_dma);
3975 return;
3976 }
3977
3978 /* hop_num = 1 or hop = 2 */
3979 dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
3980 if (mhop_num == 1) {
3981 for (i = 0; i < eq->l0_last_num; i++) {
3982 if (i == eq->l0_last_num - 1) {
3983 eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
3984 size = (eq->entries - eqe_alloc) * eq->eqe_size;
3985 dma_free_coherent(dev, size, eq->buf[i],
3986 eq->buf_dma[i]);
3987 break;
3988 }
3989 dma_free_coherent(dev, buf_chk_sz, eq->buf[i],
3990 eq->buf_dma[i]);
3991 }
3992 } else if (mhop_num == 2) {
3993 for (i = 0; i < eq->l0_last_num; i++) {
3994 dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
3995 eq->l1_dma[i]);
3996
3997 for (j = 0; j < bt_chk_sz / 8; j++) {
3998 idx = i * (bt_chk_sz / 8) + j;
3999 if ((i == eq->l0_last_num - 1)
4000 && j == eq->l1_last_num - 1) {
4001 eqe_alloc = (buf_chk_sz / eq->eqe_size)
4002 * idx;
4003 size = (eq->entries - eqe_alloc)
4004 * eq->eqe_size;
4005 dma_free_coherent(dev, size,
4006 eq->buf[idx],
4007 eq->buf_dma[idx]);
4008 break;
4009 }
4010 dma_free_coherent(dev, buf_chk_sz, eq->buf[idx],
4011 eq->buf_dma[idx]);
4012 }
4013 }
4014 }
4015 kfree(eq->buf_dma);
4016 kfree(eq->buf);
4017 kfree(eq->l1_dma);
4018 kfree(eq->bt_l1);
4019 eq->buf_dma = NULL;
4020 eq->buf = NULL;
4021 eq->l1_dma = NULL;
4022 eq->bt_l1 = NULL;
4023}
4024
4025static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev,
4026 struct hns_roce_eq *eq)
4027{
4028 u32 buf_chk_sz;
4029
4030 buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
4031
4032 if (hr_dev->caps.eqe_hop_num) {
4033 hns_roce_mhop_free_eq(hr_dev, eq);
4034 return;
4035 }
4036
4037 if (eq->buf_list)
4038 dma_free_coherent(hr_dev->dev, buf_chk_sz,
4039 eq->buf_list->buf, eq->buf_list->map);
4040}
4041
4042static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
4043 struct hns_roce_eq *eq,
4044 void *mb_buf)
4045{
4046 struct hns_roce_eq_context *eqc;
4047
4048 eqc = mb_buf;
4049 memset(eqc, 0, sizeof(struct hns_roce_eq_context));
4050
4051 /* init eqc */
4052 eq->doorbell = hr_dev->reg_base + ROCEE_VF_EQ_DB_CFG0_REG;
4053 eq->hop_num = hr_dev->caps.eqe_hop_num;
4054 eq->cons_index = 0;
4055 eq->over_ignore = HNS_ROCE_V2_EQ_OVER_IGNORE_0;
4056 eq->coalesce = HNS_ROCE_V2_EQ_COALESCE_0;
4057 eq->arm_st = HNS_ROCE_V2_EQ_ALWAYS_ARMED;
4058 eq->eqe_ba_pg_sz = hr_dev->caps.eqe_ba_pg_sz;
4059 eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz;
4060 eq->shift = ilog2((unsigned int)eq->entries);
4061
4062 if (!eq->hop_num)
4063 eq->eqe_ba = eq->buf_list->map;
4064 else
4065 eq->eqe_ba = eq->l0_dma;
4066
4067 /* set eqc state */
4068 roce_set_field(eqc->byte_4,
4069 HNS_ROCE_EQC_EQ_ST_M,
4070 HNS_ROCE_EQC_EQ_ST_S,
4071 HNS_ROCE_V2_EQ_STATE_VALID);
4072
4073 /* set eqe hop num */
4074 roce_set_field(eqc->byte_4,
4075 HNS_ROCE_EQC_HOP_NUM_M,
4076 HNS_ROCE_EQC_HOP_NUM_S, eq->hop_num);
4077
4078 /* set eqc over_ignore */
4079 roce_set_field(eqc->byte_4,
4080 HNS_ROCE_EQC_OVER_IGNORE_M,
4081 HNS_ROCE_EQC_OVER_IGNORE_S, eq->over_ignore);
4082
4083 /* set eqc coalesce */
4084 roce_set_field(eqc->byte_4,
4085 HNS_ROCE_EQC_COALESCE_M,
4086 HNS_ROCE_EQC_COALESCE_S, eq->coalesce);
4087
4088 /* set eqc arm_state */
4089 roce_set_field(eqc->byte_4,
4090 HNS_ROCE_EQC_ARM_ST_M,
4091 HNS_ROCE_EQC_ARM_ST_S, eq->arm_st);
4092
4093 /* set eqn */
4094 roce_set_field(eqc->byte_4,
4095 HNS_ROCE_EQC_EQN_M,
4096 HNS_ROCE_EQC_EQN_S, eq->eqn);
4097
4098 /* set eqe_cnt */
4099 roce_set_field(eqc->byte_4,
4100 HNS_ROCE_EQC_EQE_CNT_M,
4101 HNS_ROCE_EQC_EQE_CNT_S,
4102 HNS_ROCE_EQ_INIT_EQE_CNT);
4103
4104 /* set eqe_ba_pg_sz */
4105 roce_set_field(eqc->byte_8,
4106 HNS_ROCE_EQC_BA_PG_SZ_M,
4107 HNS_ROCE_EQC_BA_PG_SZ_S, eq->eqe_ba_pg_sz);
4108
4109 /* set eqe_buf_pg_sz */
4110 roce_set_field(eqc->byte_8,
4111 HNS_ROCE_EQC_BUF_PG_SZ_M,
4112 HNS_ROCE_EQC_BUF_PG_SZ_S, eq->eqe_buf_pg_sz);
4113
4114 /* set eq_producer_idx */
4115 roce_set_field(eqc->byte_8,
4116 HNS_ROCE_EQC_PROD_INDX_M,
4117 HNS_ROCE_EQC_PROD_INDX_S,
4118 HNS_ROCE_EQ_INIT_PROD_IDX);
4119
4120 /* set eq_max_cnt */
4121 roce_set_field(eqc->byte_12,
4122 HNS_ROCE_EQC_MAX_CNT_M,
4123 HNS_ROCE_EQC_MAX_CNT_S, eq->eq_max_cnt);
4124
4125 /* set eq_period */
4126 roce_set_field(eqc->byte_12,
4127 HNS_ROCE_EQC_PERIOD_M,
4128 HNS_ROCE_EQC_PERIOD_S, eq->eq_period);
4129
4130 /* set eqe_report_timer */
4131 roce_set_field(eqc->eqe_report_timer,
4132 HNS_ROCE_EQC_REPORT_TIMER_M,
4133 HNS_ROCE_EQC_REPORT_TIMER_S,
4134 HNS_ROCE_EQ_INIT_REPORT_TIMER);
4135
4136 /* set eqe_ba [34:3] */
4137 roce_set_field(eqc->eqe_ba0,
4138 HNS_ROCE_EQC_EQE_BA_L_M,
4139 HNS_ROCE_EQC_EQE_BA_L_S, eq->eqe_ba >> 3);
4140
4141 /* set eqe_ba [64:35] */
4142 roce_set_field(eqc->eqe_ba1,
4143 HNS_ROCE_EQC_EQE_BA_H_M,
4144 HNS_ROCE_EQC_EQE_BA_H_S, eq->eqe_ba >> 35);
4145
4146 /* set eq shift */
4147 roce_set_field(eqc->byte_28,
4148 HNS_ROCE_EQC_SHIFT_M,
4149 HNS_ROCE_EQC_SHIFT_S, eq->shift);
4150
4151 /* set eq MSI_IDX */
4152 roce_set_field(eqc->byte_28,
4153 HNS_ROCE_EQC_MSI_INDX_M,
4154 HNS_ROCE_EQC_MSI_INDX_S,
4155 HNS_ROCE_EQ_INIT_MSI_IDX);
4156
4157 /* set cur_eqe_ba [27:12] */
4158 roce_set_field(eqc->byte_28,
4159 HNS_ROCE_EQC_CUR_EQE_BA_L_M,
4160 HNS_ROCE_EQC_CUR_EQE_BA_L_S, eq->cur_eqe_ba >> 12);
4161
4162 /* set cur_eqe_ba [59:28] */
4163 roce_set_field(eqc->byte_32,
4164 HNS_ROCE_EQC_CUR_EQE_BA_M_M,
4165 HNS_ROCE_EQC_CUR_EQE_BA_M_S, eq->cur_eqe_ba >> 28);
4166
4167 /* set cur_eqe_ba [63:60] */
4168 roce_set_field(eqc->byte_36,
4169 HNS_ROCE_EQC_CUR_EQE_BA_H_M,
4170 HNS_ROCE_EQC_CUR_EQE_BA_H_S, eq->cur_eqe_ba >> 60);
4171
4172 /* set eq consumer idx */
4173 roce_set_field(eqc->byte_36,
4174 HNS_ROCE_EQC_CONS_INDX_M,
4175 HNS_ROCE_EQC_CONS_INDX_S,
4176 HNS_ROCE_EQ_INIT_CONS_IDX);
4177
4178 /* set nex_eqe_ba[43:12] */
4179 roce_set_field(eqc->nxt_eqe_ba0,
4180 HNS_ROCE_EQC_NXT_EQE_BA_L_M,
4181 HNS_ROCE_EQC_NXT_EQE_BA_L_S, eq->nxt_eqe_ba >> 12);
4182
4183 /* set nex_eqe_ba[63:44] */
4184 roce_set_field(eqc->nxt_eqe_ba1,
4185 HNS_ROCE_EQC_NXT_EQE_BA_H_M,
4186 HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44);
4187}
4188
4189static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
4190 struct hns_roce_eq *eq)
4191{
4192 struct device *dev = hr_dev->dev;
4193 int eq_alloc_done = 0;
4194 int eq_buf_cnt = 0;
4195 int eqe_alloc;
4196 u32 buf_chk_sz;
4197 u32 bt_chk_sz;
4198 u32 mhop_num;
4199 u64 size;
4200 u64 idx;
4201 int ba_num;
4202 int bt_num;
4203 int record_i;
4204 int record_j;
4205 int i = 0;
4206 int j = 0;
4207
4208 mhop_num = hr_dev->caps.eqe_hop_num;
4209 buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
4210 bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
4211
4212 ba_num = (PAGE_ALIGN(eq->entries * eq->eqe_size) + buf_chk_sz - 1)
4213 / buf_chk_sz;
4214 bt_num = (ba_num + bt_chk_sz / 8 - 1) / (bt_chk_sz / 8);
4215
4216 /* hop_num = 0 */
4217 if (mhop_num == HNS_ROCE_HOP_NUM_0) {
4218 if (eq->entries > buf_chk_sz / eq->eqe_size) {
4219 dev_err(dev, "eq entries %d is larger than buf_pg_sz!",
4220 eq->entries);
4221 return -EINVAL;
4222 }
4223 eq->bt_l0 = dma_alloc_coherent(dev, eq->entries * eq->eqe_size,
4224 &(eq->l0_dma), GFP_KERNEL);
4225 if (!eq->bt_l0)
4226 return -ENOMEM;
4227
4228 eq->cur_eqe_ba = eq->l0_dma;
4229 eq->nxt_eqe_ba = 0;
4230
4231 memset(eq->bt_l0, 0, eq->entries * eq->eqe_size);
4232
4233 return 0;
4234 }
4235
4236 eq->buf_dma = kcalloc(ba_num, sizeof(*eq->buf_dma), GFP_KERNEL);
4237 if (!eq->buf_dma)
4238 return -ENOMEM;
4239 eq->buf = kcalloc(ba_num, sizeof(*eq->buf), GFP_KERNEL);
4240 if (!eq->buf)
4241 goto err_kcalloc_buf;
4242
4243 if (mhop_num == 2) {
4244 eq->l1_dma = kcalloc(bt_num, sizeof(*eq->l1_dma), GFP_KERNEL);
4245 if (!eq->l1_dma)
4246 goto err_kcalloc_l1_dma;
4247
4248 eq->bt_l1 = kcalloc(bt_num, sizeof(*eq->bt_l1), GFP_KERNEL);
4249 if (!eq->bt_l1)
4250 goto err_kcalloc_bt_l1;
4251 }
4252
4253 /* alloc L0 BT */
4254 eq->bt_l0 = dma_alloc_coherent(dev, bt_chk_sz, &eq->l0_dma, GFP_KERNEL);
4255 if (!eq->bt_l0)
4256 goto err_dma_alloc_l0;
4257
4258 if (mhop_num == 1) {
4259 if (ba_num > (bt_chk_sz / 8))
4260 dev_err(dev, "ba_num %d is too large for 1 hop\n",
4261 ba_num);
4262
4263 /* alloc buf */
4264 for (i = 0; i < bt_chk_sz / 8; i++) {
4265 if (eq_buf_cnt + 1 < ba_num) {
4266 size = buf_chk_sz;
4267 } else {
4268 eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
4269 size = (eq->entries - eqe_alloc) * eq->eqe_size;
4270 }
4271 eq->buf[i] = dma_alloc_coherent(dev, size,
4272 &(eq->buf_dma[i]),
4273 GFP_KERNEL);
4274 if (!eq->buf[i])
4275 goto err_dma_alloc_buf;
4276
4277 memset(eq->buf[i], 0, size);
4278 *(eq->bt_l0 + i) = eq->buf_dma[i];
4279
4280 eq_buf_cnt++;
4281 if (eq_buf_cnt >= ba_num)
4282 break;
4283 }
4284 eq->cur_eqe_ba = eq->buf_dma[0];
4285 eq->nxt_eqe_ba = eq->buf_dma[1];
4286
4287 } else if (mhop_num == 2) {
4288 /* alloc L1 BT and buf */
4289 for (i = 0; i < bt_chk_sz / 8; i++) {
4290 eq->bt_l1[i] = dma_alloc_coherent(dev, bt_chk_sz,
4291 &(eq->l1_dma[i]),
4292 GFP_KERNEL);
4293 if (!eq->bt_l1[i])
4294 goto err_dma_alloc_l1;
4295 *(eq->bt_l0 + i) = eq->l1_dma[i];
4296
4297 for (j = 0; j < bt_chk_sz / 8; j++) {
4298 idx = i * bt_chk_sz / 8 + j;
4299 if (eq_buf_cnt + 1 < ba_num) {
4300 size = buf_chk_sz;
4301 } else {
4302 eqe_alloc = (buf_chk_sz / eq->eqe_size)
4303 * idx;
4304 size = (eq->entries - eqe_alloc)
4305 * eq->eqe_size;
4306 }
4307 eq->buf[idx] = dma_alloc_coherent(dev, size,
4308 &(eq->buf_dma[idx]),
4309 GFP_KERNEL);
4310 if (!eq->buf[idx])
4311 goto err_dma_alloc_buf;
4312
4313 memset(eq->buf[idx], 0, size);
4314 *(eq->bt_l1[i] + j) = eq->buf_dma[idx];
4315
4316 eq_buf_cnt++;
4317 if (eq_buf_cnt >= ba_num) {
4318 eq_alloc_done = 1;
4319 break;
4320 }
4321 }
4322
4323 if (eq_alloc_done)
4324 break;
4325 }
4326 eq->cur_eqe_ba = eq->buf_dma[0];
4327 eq->nxt_eqe_ba = eq->buf_dma[1];
4328 }
4329
4330 eq->l0_last_num = i + 1;
4331 if (mhop_num == 2)
4332 eq->l1_last_num = j + 1;
4333
4334 return 0;
4335
4336err_dma_alloc_l1:
4337 dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
4338 eq->bt_l0 = NULL;
4339 eq->l0_dma = 0;
4340 for (i -= 1; i >= 0; i--) {
4341 dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
4342 eq->l1_dma[i]);
4343
4344 for (j = 0; j < bt_chk_sz / 8; j++) {
4345 idx = i * bt_chk_sz / 8 + j;
4346 dma_free_coherent(dev, buf_chk_sz, eq->buf[idx],
4347 eq->buf_dma[idx]);
4348 }
4349 }
4350 goto err_dma_alloc_l0;
4351
4352err_dma_alloc_buf:
4353 dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
4354 eq->bt_l0 = NULL;
4355 eq->l0_dma = 0;
4356
4357 if (mhop_num == 1)
4358 for (i -= i; i >= 0; i--)
4359 dma_free_coherent(dev, buf_chk_sz, eq->buf[i],
4360 eq->buf_dma[i]);
4361 else if (mhop_num == 2) {
4362 record_i = i;
4363 record_j = j;
4364 for (; i >= 0; i--) {
4365 dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
4366 eq->l1_dma[i]);
4367
4368 for (j = 0; j < bt_chk_sz / 8; j++) {
4369 if (i == record_i && j >= record_j)
4370 break;
4371
4372 idx = i * bt_chk_sz / 8 + j;
4373 dma_free_coherent(dev, buf_chk_sz,
4374 eq->buf[idx],
4375 eq->buf_dma[idx]);
4376 }
4377 }
4378 }
4379
4380err_dma_alloc_l0:
4381 kfree(eq->bt_l1);
4382 eq->bt_l1 = NULL;
4383
4384err_kcalloc_bt_l1:
4385 kfree(eq->l1_dma);
4386 eq->l1_dma = NULL;
4387
4388err_kcalloc_l1_dma:
4389 kfree(eq->buf);
4390 eq->buf = NULL;
4391
4392err_kcalloc_buf:
4393 kfree(eq->buf_dma);
4394 eq->buf_dma = NULL;
4395
4396 return -ENOMEM;
4397}
4398
4399static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
4400 struct hns_roce_eq *eq,
4401 unsigned int eq_cmd)
4402{
4403 struct device *dev = hr_dev->dev;
4404 struct hns_roce_cmd_mailbox *mailbox;
4405 u32 buf_chk_sz = 0;
4406 int ret;
4407
4408 /* Allocate mailbox memory */
4409 mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
4410 if (IS_ERR(mailbox))
4411 return PTR_ERR(mailbox);
4412
4413 if (!hr_dev->caps.eqe_hop_num) {
4414 buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
4415
4416 eq->buf_list = kzalloc(sizeof(struct hns_roce_buf_list),
4417 GFP_KERNEL);
4418 if (!eq->buf_list) {
4419 ret = -ENOMEM;
4420 goto free_cmd_mbox;
4421 }
4422
4423 eq->buf_list->buf = dma_alloc_coherent(dev, buf_chk_sz,
4424 &(eq->buf_list->map),
4425 GFP_KERNEL);
4426 if (!eq->buf_list->buf) {
4427 ret = -ENOMEM;
4428 goto err_alloc_buf;
4429 }
4430
4431 memset(eq->buf_list->buf, 0, buf_chk_sz);
4432 } else {
4433 ret = hns_roce_mhop_alloc_eq(hr_dev, eq);
4434 if (ret) {
4435 ret = -ENOMEM;
4436 goto free_cmd_mbox;
4437 }
4438 }
4439
4440 hns_roce_config_eqc(hr_dev, eq, mailbox->buf);
4441
4442 ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0,
4443 eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS);
4444 if (ret) {
4445 dev_err(dev, "[mailbox cmd] creat eqc failed.\n");
4446 goto err_cmd_mbox;
4447 }
4448
4449 hns_roce_free_cmd_mailbox(hr_dev, mailbox);
4450
4451 return 0;
4452
4453err_cmd_mbox:
4454 if (!hr_dev->caps.eqe_hop_num)
4455 dma_free_coherent(dev, buf_chk_sz, eq->buf_list->buf,
4456 eq->buf_list->map);
4457 else {
4458 hns_roce_mhop_free_eq(hr_dev, eq);
4459 goto free_cmd_mbox;
4460 }
4461
4462err_alloc_buf:
4463 kfree(eq->buf_list);
4464
4465free_cmd_mbox:
4466 hns_roce_free_cmd_mailbox(hr_dev, mailbox);
4467
4468 return ret;
4469}
4470
4471static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
4472{
4473 struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
4474 struct device *dev = hr_dev->dev;
4475 struct hns_roce_eq *eq;
4476 unsigned int eq_cmd;
4477 int irq_num;
4478 int eq_num;
4479 int other_num;
4480 int comp_num;
4481 int aeq_num;
4482 int i, j, k;
4483 int ret;
4484
4485 other_num = hr_dev->caps.num_other_vectors;
4486 comp_num = hr_dev->caps.num_comp_vectors;
4487 aeq_num = hr_dev->caps.num_aeq_vectors;
4488
4489 eq_num = comp_num + aeq_num;
4490 irq_num = eq_num + other_num;
4491
4492 eq_table->eq = kcalloc(eq_num, sizeof(*eq_table->eq), GFP_KERNEL);
4493 if (!eq_table->eq)
4494 return -ENOMEM;
4495
4496 for (i = 0; i < irq_num; i++) {
4497 hr_dev->irq_names[i] = kzalloc(HNS_ROCE_INT_NAME_LEN,
4498 GFP_KERNEL);
4499 if (!hr_dev->irq_names[i]) {
4500 ret = -ENOMEM;
4501 goto err_failed_kzalloc;
4502 }
4503 }
4504
4505 /* create eq */
4506 for (j = 0; j < eq_num; j++) {
4507 eq = &eq_table->eq[j];
4508 eq->hr_dev = hr_dev;
4509 eq->eqn = j;
4510 if (j < comp_num) {
4511 /* CEQ */
4512 eq_cmd = HNS_ROCE_CMD_CREATE_CEQC;
4513 eq->type_flag = HNS_ROCE_CEQ;
4514 eq->entries = hr_dev->caps.ceqe_depth;
4515 eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE;
4516 eq->irq = hr_dev->irq[j + other_num + aeq_num];
4517 eq->eq_max_cnt = HNS_ROCE_CEQ_DEFAULT_BURST_NUM;
4518 eq->eq_period = HNS_ROCE_CEQ_DEFAULT_INTERVAL;
4519 } else {
4520 /* AEQ */
4521 eq_cmd = HNS_ROCE_CMD_CREATE_AEQC;
4522 eq->type_flag = HNS_ROCE_AEQ;
4523 eq->entries = hr_dev->caps.aeqe_depth;
4524 eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE;
4525 eq->irq = hr_dev->irq[j - comp_num + other_num];
4526 eq->eq_max_cnt = HNS_ROCE_AEQ_DEFAULT_BURST_NUM;
4527 eq->eq_period = HNS_ROCE_AEQ_DEFAULT_INTERVAL;
4528 }
4529
4530 ret = hns_roce_v2_create_eq(hr_dev, eq, eq_cmd);
4531 if (ret) {
4532 dev_err(dev, "eq create failed.\n");
4533 goto err_create_eq_fail;
4534 }
4535 }
4536
4537 /* enable irq */
4538 hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_ENABLE);
4539
4540 /* irq contains: abnormal + AEQ + CEQ*/
4541 for (k = 0; k < irq_num; k++)
4542 if (k < other_num)
4543 snprintf((char *)hr_dev->irq_names[k],
4544 HNS_ROCE_INT_NAME_LEN, "hns-abn-%d", k);
4545 else if (k < (other_num + aeq_num))
4546 snprintf((char *)hr_dev->irq_names[k],
4547 HNS_ROCE_INT_NAME_LEN, "hns-aeq-%d",
4548 k - other_num);
4549 else
4550 snprintf((char *)hr_dev->irq_names[k],
4551 HNS_ROCE_INT_NAME_LEN, "hns-ceq-%d",
4552 k - other_num - aeq_num);
4553
4554 for (k = 0; k < irq_num; k++) {
4555 if (k < other_num)
4556 ret = request_irq(hr_dev->irq[k],
4557 hns_roce_v2_msix_interrupt_abn,
4558 0, hr_dev->irq_names[k], hr_dev);
4559
4560 else if (k < (other_num + comp_num))
4561 ret = request_irq(eq_table->eq[k - other_num].irq,
4562 hns_roce_v2_msix_interrupt_eq,
4563 0, hr_dev->irq_names[k + aeq_num],
4564 &eq_table->eq[k - other_num]);
4565 else
4566 ret = request_irq(eq_table->eq[k - other_num].irq,
4567 hns_roce_v2_msix_interrupt_eq,
4568 0, hr_dev->irq_names[k - comp_num],
4569 &eq_table->eq[k - other_num]);
4570 if (ret) {
4571 dev_err(dev, "Request irq error!\n");
4572 goto err_request_irq_fail;
4573 }
4574 }
4575
4576 return 0;
4577
4578err_request_irq_fail:
4579 for (k -= 1; k >= 0; k--)
4580 if (k < other_num)
4581 free_irq(hr_dev->irq[k], hr_dev);
4582 else
4583 free_irq(eq_table->eq[k - other_num].irq,
4584 &eq_table->eq[k - other_num]);
4585
4586err_create_eq_fail:
4587 for (j -= 1; j >= 0; j--)
4588 hns_roce_v2_free_eq(hr_dev, &eq_table->eq[j]);
4589
4590err_failed_kzalloc:
4591 for (i -= 1; i >= 0; i--)
4592 kfree(hr_dev->irq_names[i]);
4593 kfree(eq_table->eq);
4594
4595 return ret;
4596}
4597
4598static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
4599{
4600 struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
4601 int irq_num;
4602 int eq_num;
4603 int i;
4604
4605 eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
4606 irq_num = eq_num + hr_dev->caps.num_other_vectors;
4607
4608 /* Disable irq */
4609 hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_DISABLE);
4610
4611 for (i = 0; i < hr_dev->caps.num_other_vectors; i++)
4612 free_irq(hr_dev->irq[i], hr_dev);
4613
4614 for (i = 0; i < eq_num; i++) {
4615 hns_roce_v2_destroy_eqc(hr_dev, i);
4616
4617 free_irq(eq_table->eq[i].irq, &eq_table->eq[i]);
4618
4619 hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]);
4620 }
4621
4622 for (i = 0; i < irq_num; i++)
4623 kfree(hr_dev->irq_names[i]);
4624
4625 kfree(eq_table->eq);
4626}
4627
3165static const struct hns_roce_hw hns_roce_hw_v2 = { 4628static const struct hns_roce_hw hns_roce_hw_v2 = {
3166 .cmq_init = hns_roce_v2_cmq_init, 4629 .cmq_init = hns_roce_v2_cmq_init,
3167 .cmq_exit = hns_roce_v2_cmq_exit, 4630 .cmq_exit = hns_roce_v2_cmq_exit,
@@ -3183,6 +4646,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
3183 .post_recv = hns_roce_v2_post_recv, 4646 .post_recv = hns_roce_v2_post_recv,
3184 .req_notify_cq = hns_roce_v2_req_notify_cq, 4647 .req_notify_cq = hns_roce_v2_req_notify_cq,
3185 .poll_cq = hns_roce_v2_poll_cq, 4648 .poll_cq = hns_roce_v2_poll_cq,
4649 .init_eq = hns_roce_v2_init_eq_table,
4650 .cleanup_eq = hns_roce_v2_cleanup_eq_table,
3186}; 4651};
3187 4652
3188static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { 4653static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
@@ -3197,6 +4662,7 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
3197 struct hnae3_handle *handle) 4662 struct hnae3_handle *handle)
3198{ 4663{
3199 const struct pci_device_id *id; 4664 const struct pci_device_id *id;
4665 int i;
3200 4666
3201 id = pci_match_id(hns_roce_hw_v2_pci_tbl, hr_dev->pci_dev); 4667 id = pci_match_id(hns_roce_hw_v2_pci_tbl, hr_dev->pci_dev);
3202 if (!id) { 4668 if (!id) {
@@ -3214,8 +4680,15 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
3214 hr_dev->iboe.netdevs[0] = handle->rinfo.netdev; 4680 hr_dev->iboe.netdevs[0] = handle->rinfo.netdev;
3215 hr_dev->iboe.phy_port[0] = 0; 4681 hr_dev->iboe.phy_port[0] = 0;
3216 4682
4683 addrconf_addr_eui48((u8 *)&hr_dev->ib_dev.node_guid,
4684 hr_dev->iboe.netdevs[0]->dev_addr);
4685
4686 for (i = 0; i < HNS_ROCE_V2_MAX_IRQ_NUM; i++)
4687 hr_dev->irq[i] = pci_irq_vector(handle->pdev,
4688 i + handle->rinfo.base_vector);
4689
3217 /* cmd issue mode: 0 is poll, 1 is event */ 4690 /* cmd issue mode: 0 is poll, 1 is event */
3218 hr_dev->cmd_mod = 0; 4691 hr_dev->cmd_mod = 1;
3219 hr_dev->loop_idc = 0; 4692 hr_dev->loop_idc = 0;
3220 4693
3221 return 0; 4694 return 0;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 04b7a51b8efb..960df095392a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -53,6 +53,10 @@
53#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 53#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20
54#define HNS_ROCE_V2_UAR_NUM 256 54#define HNS_ROCE_V2_UAR_NUM 256
55#define HNS_ROCE_V2_PHY_UAR_NUM 1 55#define HNS_ROCE_V2_PHY_UAR_NUM 1
56#define HNS_ROCE_V2_MAX_IRQ_NUM 65
57#define HNS_ROCE_V2_COMP_VEC_NUM 63
58#define HNS_ROCE_V2_AEQE_VEC_NUM 1
59#define HNS_ROCE_V2_ABNORMAL_VEC_NUM 1
56#define HNS_ROCE_V2_MAX_MTPT_NUM 0x8000 60#define HNS_ROCE_V2_MAX_MTPT_NUM 0x8000
57#define HNS_ROCE_V2_MAX_MTT_SEGS 0x1000000 61#define HNS_ROCE_V2_MAX_MTT_SEGS 0x1000000
58#define HNS_ROCE_V2_MAX_CQE_SEGS 0x1000000 62#define HNS_ROCE_V2_MAX_CQE_SEGS 0x1000000
@@ -78,6 +82,8 @@
78#define HNS_ROCE_MTT_HOP_NUM 1 82#define HNS_ROCE_MTT_HOP_NUM 1
79#define HNS_ROCE_CQE_HOP_NUM 1 83#define HNS_ROCE_CQE_HOP_NUM 1
80#define HNS_ROCE_PBL_HOP_NUM 2 84#define HNS_ROCE_PBL_HOP_NUM 2
85#define HNS_ROCE_EQE_HOP_NUM 2
86
81#define HNS_ROCE_V2_GID_INDEX_NUM 256 87#define HNS_ROCE_V2_GID_INDEX_NUM 256
82 88
83#define HNS_ROCE_V2_TABLE_CHUNK_SIZE (1 << 18) 89#define HNS_ROCE_V2_TABLE_CHUNK_SIZE (1 << 18)
@@ -105,6 +111,12 @@
105 (step_idx == 1 && hop_num == 1) || \ 111 (step_idx == 1 && hop_num == 1) || \
106 (step_idx == 2 && hop_num == 2)) 112 (step_idx == 2 && hop_num == 2))
107 113
114enum {
115 NO_ARMED = 0x0,
116 REG_NXT_CEQE = 0x2,
117 REG_NXT_SE_CEQE = 0x3
118};
119
108#define V2_CQ_DB_REQ_NOT_SOL 0 120#define V2_CQ_DB_REQ_NOT_SOL 0
109#define V2_CQ_DB_REQ_NOT 1 121#define V2_CQ_DB_REQ_NOT 1
110 122
@@ -229,6 +241,9 @@ struct hns_roce_v2_cq_context {
229 u32 cqe_report_timer; 241 u32 cqe_report_timer;
230 u32 byte_64_se_cqe_idx; 242 u32 byte_64_se_cqe_idx;
231}; 243};
244#define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0
245#define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0
246
232#define V2_CQC_BYTE_4_CQ_ST_S 0 247#define V2_CQC_BYTE_4_CQ_ST_S 0
233#define V2_CQC_BYTE_4_CQ_ST_M GENMASK(1, 0) 248#define V2_CQC_BYTE_4_CQ_ST_M GENMASK(1, 0)
234 249
@@ -747,11 +762,14 @@ struct hns_roce_v2_qp_context {
747 762
748struct hns_roce_v2_cqe { 763struct hns_roce_v2_cqe {
749 u32 byte_4; 764 u32 byte_4;
750 u32 rkey_immtdata; 765 union {
766 __le32 rkey;
767 __be32 immtdata;
768 };
751 u32 byte_12; 769 u32 byte_12;
752 u32 byte_16; 770 u32 byte_16;
753 u32 byte_cnt; 771 u32 byte_cnt;
754 u32 smac; 772 u8 smac[4];
755 u32 byte_28; 773 u32 byte_28;
756 u32 byte_32; 774 u32 byte_32;
757}; 775};
@@ -901,6 +919,90 @@ struct hns_roce_v2_cq_db {
901 919
902#define V2_CQ_DB_PARAMETER_NOTIFY_S 24 920#define V2_CQ_DB_PARAMETER_NOTIFY_S 24
903 921
922struct hns_roce_v2_ud_send_wqe {
923 u32 byte_4;
924 u32 msg_len;
925 u32 immtdata;
926 u32 byte_16;
927 u32 byte_20;
928 u32 byte_24;
929 u32 qkey;
930 u32 byte_32;
931 u32 byte_36;
932 u32 byte_40;
933 u32 dmac;
934 u32 byte_48;
935 u8 dgid[GID_LEN_V2];
936
937};
938#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0
939#define V2_UD_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
940
941#define V2_UD_SEND_WQE_BYTE_4_OWNER_S 7
942
943#define V2_UD_SEND_WQE_BYTE_4_CQE_S 8
944
945#define V2_UD_SEND_WQE_BYTE_4_SE_S 11
946
947#define V2_UD_SEND_WQE_BYTE_16_PD_S 0
948#define V2_UD_SEND_WQE_BYTE_16_PD_M GENMASK(23, 0)
949
950#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S 24
951#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24)
952
953#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
954#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
955
956#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_S 16
957#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_M GENMASK(31, 16)
958
959#define V2_UD_SEND_WQE_BYTE_32_DQPN_S 0
960#define V2_UD_SEND_WQE_BYTE_32_DQPN_M GENMASK(23, 0)
961
962#define V2_UD_SEND_WQE_BYTE_36_VLAN_S 0
963#define V2_UD_SEND_WQE_BYTE_36_VLAN_M GENMASK(15, 0)
964
965#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S 16
966#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M GENMASK(23, 16)
967
968#define V2_UD_SEND_WQE_BYTE_36_TCLASS_S 24
969#define V2_UD_SEND_WQE_BYTE_36_TCLASS_M GENMASK(31, 24)
970
971#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S 0
972#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M GENMASK(19, 0)
973
974#define V2_UD_SEND_WQE_BYTE_40_SL_S 20
975#define V2_UD_SEND_WQE_BYTE_40_SL_M GENMASK(23, 20)
976
977#define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24
978#define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24)
979
980#define V2_UD_SEND_WQE_BYTE_40_LBI_S 31
981
982#define V2_UD_SEND_WQE_DMAC_0_S 0
983#define V2_UD_SEND_WQE_DMAC_0_M GENMASK(7, 0)
984
985#define V2_UD_SEND_WQE_DMAC_1_S 8
986#define V2_UD_SEND_WQE_DMAC_1_M GENMASK(15, 8)
987
988#define V2_UD_SEND_WQE_DMAC_2_S 16
989#define V2_UD_SEND_WQE_DMAC_2_M GENMASK(23, 16)
990
991#define V2_UD_SEND_WQE_DMAC_3_S 24
992#define V2_UD_SEND_WQE_DMAC_3_M GENMASK(31, 24)
993
994#define V2_UD_SEND_WQE_BYTE_48_DMAC_4_S 0
995#define V2_UD_SEND_WQE_BYTE_48_DMAC_4_M GENMASK(7, 0)
996
997#define V2_UD_SEND_WQE_BYTE_48_DMAC_5_S 8
998#define V2_UD_SEND_WQE_BYTE_48_DMAC_5_M GENMASK(15, 8)
999
1000#define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S 16
1001#define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M GENMASK(23, 16)
1002
1003#define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_S 24
1004#define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_M GENMASK(31, 24)
1005
904struct hns_roce_v2_rc_send_wqe { 1006struct hns_roce_v2_rc_send_wqe {
905 u32 byte_4; 1007 u32 byte_4;
906 u32 msg_len; 1008 u32 msg_len;
@@ -1129,9 +1231,6 @@ struct hns_roce_cmq_desc {
1129 u32 data[6]; 1231 u32 data[6];
1130}; 1232};
1131 1233
1132#define ROCEE_VF_MB_CFG0_REG 0x40
1133#define ROCEE_VF_MB_STATUS_REG 0x58
1134
1135#define HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS 10000 1234#define HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS 10000
1136 1235
1137#define HNS_ROCE_HW_RUN_BIT_SHIFT 31 1236#define HNS_ROCE_HW_RUN_BIT_SHIFT 31
@@ -1174,4 +1273,178 @@ struct hns_roce_v2_priv {
1174 struct hns_roce_v2_cmq cmq; 1273 struct hns_roce_v2_cmq cmq;
1175}; 1274};
1176 1275
1276struct hns_roce_eq_context {
1277 u32 byte_4;
1278 u32 byte_8;
1279 u32 byte_12;
1280 u32 eqe_report_timer;
1281 u32 eqe_ba0;
1282 u32 eqe_ba1;
1283 u32 byte_28;
1284 u32 byte_32;
1285 u32 byte_36;
1286 u32 nxt_eqe_ba0;
1287 u32 nxt_eqe_ba1;
1288 u32 rsv[5];
1289};
1290
1291#define HNS_ROCE_AEQ_DEFAULT_BURST_NUM 0x0
1292#define HNS_ROCE_AEQ_DEFAULT_INTERVAL 0x0
1293#define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x0
1294#define HNS_ROCE_CEQ_DEFAULT_INTERVAL 0x0
1295
1296#define HNS_ROCE_V2_EQ_STATE_INVALID 0
1297#define HNS_ROCE_V2_EQ_STATE_VALID 1
1298#define HNS_ROCE_V2_EQ_STATE_OVERFLOW 2
1299#define HNS_ROCE_V2_EQ_STATE_FAILURE 3
1300
1301#define HNS_ROCE_V2_EQ_OVER_IGNORE_0 0
1302#define HNS_ROCE_V2_EQ_OVER_IGNORE_1 1
1303
1304#define HNS_ROCE_V2_EQ_COALESCE_0 0
1305#define HNS_ROCE_V2_EQ_COALESCE_1 1
1306
1307#define HNS_ROCE_V2_EQ_FIRED 0
1308#define HNS_ROCE_V2_EQ_ARMED 1
1309#define HNS_ROCE_V2_EQ_ALWAYS_ARMED 3
1310
1311#define HNS_ROCE_EQ_INIT_EQE_CNT 0
1312#define HNS_ROCE_EQ_INIT_PROD_IDX 0
1313#define HNS_ROCE_EQ_INIT_REPORT_TIMER 0
1314#define HNS_ROCE_EQ_INIT_MSI_IDX 0
1315#define HNS_ROCE_EQ_INIT_CONS_IDX 0
1316#define HNS_ROCE_EQ_INIT_NXT_EQE_BA 0
1317
1318#define HNS_ROCE_V2_CEQ_CEQE_OWNER_S 31
1319#define HNS_ROCE_V2_AEQ_AEQE_OWNER_S 31
1320
1321#define HNS_ROCE_V2_COMP_EQE_NUM 0x1000
1322#define HNS_ROCE_V2_ASYNC_EQE_NUM 0x1000
1323
1324#define HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S 0
1325#define HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S 1
1326#define HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S 2
1327
1328#define HNS_ROCE_EQ_DB_CMD_AEQ 0x0
1329#define HNS_ROCE_EQ_DB_CMD_AEQ_ARMED 0x1
1330#define HNS_ROCE_EQ_DB_CMD_CEQ 0x2
1331#define HNS_ROCE_EQ_DB_CMD_CEQ_ARMED 0x3
1332
1333#define EQ_ENABLE 1
1334#define EQ_DISABLE 0
1335
1336#define EQ_REG_OFFSET 0x4
1337
1338#define HNS_ROCE_INT_NAME_LEN 32
1339#define HNS_ROCE_V2_EQN_M GENMASK(23, 0)
1340
1341#define HNS_ROCE_V2_CONS_IDX_M GENMASK(23, 0)
1342
1343#define HNS_ROCE_V2_VF_ABN_INT_EN_S 0
1344#define HNS_ROCE_V2_VF_ABN_INT_EN_M GENMASK(0, 0)
1345#define HNS_ROCE_V2_VF_ABN_INT_ST_M GENMASK(2, 0)
1346#define HNS_ROCE_V2_VF_ABN_INT_CFG_M GENMASK(2, 0)
1347#define HNS_ROCE_V2_VF_EVENT_INT_EN_M GENMASK(0, 0)
1348
1349/* WORD0 */
1350#define HNS_ROCE_EQC_EQ_ST_S 0
1351#define HNS_ROCE_EQC_EQ_ST_M GENMASK(1, 0)
1352
1353#define HNS_ROCE_EQC_HOP_NUM_S 2
1354#define HNS_ROCE_EQC_HOP_NUM_M GENMASK(3, 2)
1355
1356#define HNS_ROCE_EQC_OVER_IGNORE_S 4
1357#define HNS_ROCE_EQC_OVER_IGNORE_M GENMASK(4, 4)
1358
1359#define HNS_ROCE_EQC_COALESCE_S 5
1360#define HNS_ROCE_EQC_COALESCE_M GENMASK(5, 5)
1361
1362#define HNS_ROCE_EQC_ARM_ST_S 6
1363#define HNS_ROCE_EQC_ARM_ST_M GENMASK(7, 6)
1364
1365#define HNS_ROCE_EQC_EQN_S 8
1366#define HNS_ROCE_EQC_EQN_M GENMASK(15, 8)
1367
1368#define HNS_ROCE_EQC_EQE_CNT_S 16
1369#define HNS_ROCE_EQC_EQE_CNT_M GENMASK(31, 16)
1370
1371/* WORD1 */
1372#define HNS_ROCE_EQC_BA_PG_SZ_S 0
1373#define HNS_ROCE_EQC_BA_PG_SZ_M GENMASK(3, 0)
1374
1375#define HNS_ROCE_EQC_BUF_PG_SZ_S 4
1376#define HNS_ROCE_EQC_BUF_PG_SZ_M GENMASK(7, 4)
1377
1378#define HNS_ROCE_EQC_PROD_INDX_S 8
1379#define HNS_ROCE_EQC_PROD_INDX_M GENMASK(31, 8)
1380
1381/* WORD2 */
1382#define HNS_ROCE_EQC_MAX_CNT_S 0
1383#define HNS_ROCE_EQC_MAX_CNT_M GENMASK(15, 0)
1384
1385#define HNS_ROCE_EQC_PERIOD_S 16
1386#define HNS_ROCE_EQC_PERIOD_M GENMASK(31, 16)
1387
1388/* WORD3 */
1389#define HNS_ROCE_EQC_REPORT_TIMER_S 0
1390#define HNS_ROCE_EQC_REPORT_TIMER_M GENMASK(31, 0)
1391
1392/* WORD4 */
1393#define HNS_ROCE_EQC_EQE_BA_L_S 0
1394#define HNS_ROCE_EQC_EQE_BA_L_M GENMASK(31, 0)
1395
1396/* WORD5 */
1397#define HNS_ROCE_EQC_EQE_BA_H_S 0
1398#define HNS_ROCE_EQC_EQE_BA_H_M GENMASK(28, 0)
1399
1400/* WORD6 */
1401#define HNS_ROCE_EQC_SHIFT_S 0
1402#define HNS_ROCE_EQC_SHIFT_M GENMASK(7, 0)
1403
1404#define HNS_ROCE_EQC_MSI_INDX_S 8
1405#define HNS_ROCE_EQC_MSI_INDX_M GENMASK(15, 8)
1406
1407#define HNS_ROCE_EQC_CUR_EQE_BA_L_S 16
1408#define HNS_ROCE_EQC_CUR_EQE_BA_L_M GENMASK(31, 16)
1409
1410/* WORD7 */
1411#define HNS_ROCE_EQC_CUR_EQE_BA_M_S 0
1412#define HNS_ROCE_EQC_CUR_EQE_BA_M_M GENMASK(31, 0)
1413
1414/* WORD8 */
1415#define HNS_ROCE_EQC_CUR_EQE_BA_H_S 0
1416#define HNS_ROCE_EQC_CUR_EQE_BA_H_M GENMASK(3, 0)
1417
1418#define HNS_ROCE_EQC_CONS_INDX_S 8
1419#define HNS_ROCE_EQC_CONS_INDX_M GENMASK(31, 8)
1420
1421/* WORD9 */
1422#define HNS_ROCE_EQC_NXT_EQE_BA_L_S 0
1423#define HNS_ROCE_EQC_NXT_EQE_BA_L_M GENMASK(31, 0)
1424
1425/* WORD10 */
1426#define HNS_ROCE_EQC_NXT_EQE_BA_H_S 0
1427#define HNS_ROCE_EQC_NXT_EQE_BA_H_M GENMASK(19, 0)
1428
1429#define HNS_ROCE_V2_CEQE_COMP_CQN_S 0
1430#define HNS_ROCE_V2_CEQE_COMP_CQN_M GENMASK(23, 0)
1431
1432#define HNS_ROCE_V2_AEQE_EVENT_TYPE_S 0
1433#define HNS_ROCE_V2_AEQE_EVENT_TYPE_M GENMASK(7, 0)
1434
1435#define HNS_ROCE_V2_AEQE_SUB_TYPE_S 8
1436#define HNS_ROCE_V2_AEQE_SUB_TYPE_M GENMASK(15, 8)
1437
1438#define HNS_ROCE_V2_EQ_DB_CMD_S 16
1439#define HNS_ROCE_V2_EQ_DB_CMD_M GENMASK(17, 16)
1440
1441#define HNS_ROCE_V2_EQ_DB_TAG_S 0
1442#define HNS_ROCE_V2_EQ_DB_TAG_M GENMASK(7, 0)
1443
1444#define HNS_ROCE_V2_EQ_DB_PARA_S 0
1445#define HNS_ROCE_V2_EQ_DB_PARA_M GENMASK(23, 0)
1446
1447#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0
1448#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0)
1449
1177#endif 1450#endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index cf02ac2d3596..aa0c242ddc50 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -748,12 +748,10 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
748 goto error_failed_cmd_init; 748 goto error_failed_cmd_init;
749 } 749 }
750 750
751 if (hr_dev->cmd_mod) { 751 ret = hr_dev->hw->init_eq(hr_dev);
752 ret = hns_roce_init_eq_table(hr_dev); 752 if (ret) {
753 if (ret) { 753 dev_err(dev, "eq init failed!\n");
754 dev_err(dev, "eq init failed!\n"); 754 goto error_failed_eq_table;
755 goto error_failed_eq_table;
756 }
757 } 755 }
758 756
759 if (hr_dev->cmd_mod) { 757 if (hr_dev->cmd_mod) {
@@ -805,8 +803,7 @@ error_failed_init_hem:
805 hns_roce_cmd_use_polling(hr_dev); 803 hns_roce_cmd_use_polling(hr_dev);
806 804
807error_failed_use_event: 805error_failed_use_event:
808 if (hr_dev->cmd_mod) 806 hr_dev->hw->cleanup_eq(hr_dev);
809 hns_roce_cleanup_eq_table(hr_dev);
810 807
811error_failed_eq_table: 808error_failed_eq_table:
812 hns_roce_cmd_cleanup(hr_dev); 809 hns_roce_cmd_cleanup(hr_dev);
@@ -837,8 +834,7 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev)
837 if (hr_dev->cmd_mod) 834 if (hr_dev->cmd_mod)
838 hns_roce_cmd_use_polling(hr_dev); 835 hns_roce_cmd_use_polling(hr_dev);
839 836
840 if (hr_dev->cmd_mod) 837 hr_dev->hw->cleanup_eq(hr_dev);
841 hns_roce_cleanup_eq_table(hr_dev);
842 hns_roce_cmd_cleanup(hr_dev); 838 hns_roce_cmd_cleanup(hr_dev);
843 if (hr_dev->hw->cmq_exit) 839 if (hr_dev->hw->cmq_exit)
844 hr_dev->hw->cmq_exit(hr_dev); 840 hr_dev->hw->cmq_exit(hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 49586ec8126a..4414cea9ef56 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -65,6 +65,7 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
65 if (atomic_dec_and_test(&qp->refcount)) 65 if (atomic_dec_and_test(&qp->refcount))
66 complete(&qp->free); 66 complete(&qp->free);
67} 67}
68EXPORT_SYMBOL_GPL(hns_roce_qp_event);
68 69
69static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp, 70static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
70 enum hns_roce_event type) 71 enum hns_roce_event type)
@@ -454,6 +455,13 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
454 hr_qp->sge.sge_shift = 4; 455 hr_qp->sge.sge_shift = 4;
455 } 456 }
456 457
458 /* ud sqwqe's sge use extend sge */
459 if (hr_dev->caps.max_sq_sg > 2 && hr_qp->ibqp.qp_type == IB_QPT_GSI) {
460 hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
461 hr_qp->sq.max_gs);
462 hr_qp->sge.sge_shift = 4;
463 }
464
457 /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ 465 /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */
458 page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); 466 page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
459 hr_qp->sq.offset = 0; 467 hr_qp->sq.offset = 0;
@@ -493,6 +501,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
493 int ret = 0; 501 int ret = 0;
494 u32 page_shift; 502 u32 page_shift;
495 u32 npages; 503 u32 npages;
504 int i;
496 505
497 mutex_init(&hr_qp->mutex); 506 mutex_init(&hr_qp->mutex);
498 spin_lock_init(&hr_qp->sq.lock); 507 spin_lock_init(&hr_qp->sq.lock);
@@ -500,6 +509,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
500 509
501 hr_qp->state = IB_QPS_RESET; 510 hr_qp->state = IB_QPS_RESET;
502 511
512 hr_qp->ibqp.qp_type = init_attr->qp_type;
513
503 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) 514 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
504 hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR; 515 hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
505 else 516 else
@@ -512,18 +523,48 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
512 goto err_out; 523 goto err_out;
513 } 524 }
514 525
526 if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
527 /* allocate recv inline buf */
528 hr_qp->rq_inl_buf.wqe_list = kcalloc(hr_qp->rq.wqe_cnt,
529 sizeof(struct hns_roce_rinl_wqe),
530 GFP_KERNEL);
531 if (!hr_qp->rq_inl_buf.wqe_list) {
532 ret = -ENOMEM;
533 goto err_out;
534 }
535
536 hr_qp->rq_inl_buf.wqe_cnt = hr_qp->rq.wqe_cnt;
537
538 /* Firstly, allocate a list of sge space buffer */
539 hr_qp->rq_inl_buf.wqe_list[0].sg_list =
540 kcalloc(hr_qp->rq_inl_buf.wqe_cnt,
541 init_attr->cap.max_recv_sge *
542 sizeof(struct hns_roce_rinl_sge),
543 GFP_KERNEL);
544 if (!hr_qp->rq_inl_buf.wqe_list[0].sg_list) {
545 ret = -ENOMEM;
546 goto err_wqe_list;
547 }
548
549 for (i = 1; i < hr_qp->rq_inl_buf.wqe_cnt; i++)
550 /* Secondly, reallocate the buffer */
551 hr_qp->rq_inl_buf.wqe_list[i].sg_list =
552 &hr_qp->rq_inl_buf.wqe_list[0].sg_list[i *
553 init_attr->cap.max_recv_sge];
554 }
555
515 if (ib_pd->uobject) { 556 if (ib_pd->uobject) {
516 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { 557 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
517 dev_err(dev, "ib_copy_from_udata error for create qp\n"); 558 dev_err(dev, "ib_copy_from_udata error for create qp\n");
518 ret = -EFAULT; 559 ret = -EFAULT;
519 goto err_out; 560 goto err_rq_sge_list;
520 } 561 }
521 562
522 ret = hns_roce_set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, 563 ret = hns_roce_set_user_sq_size(hr_dev, &init_attr->cap, hr_qp,
523 &ucmd); 564 &ucmd);
524 if (ret) { 565 if (ret) {
525 dev_err(dev, "hns_roce_set_user_sq_size error for create qp\n"); 566 dev_err(dev, "hns_roce_set_user_sq_size error for create qp\n");
526 goto err_out; 567 goto err_rq_sge_list;
527 } 568 }
528 569
529 hr_qp->umem = ib_umem_get(ib_pd->uobject->context, 570 hr_qp->umem = ib_umem_get(ib_pd->uobject->context,
@@ -532,7 +573,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
532 if (IS_ERR(hr_qp->umem)) { 573 if (IS_ERR(hr_qp->umem)) {
533 dev_err(dev, "ib_umem_get error for create qp\n"); 574 dev_err(dev, "ib_umem_get error for create qp\n");
534 ret = PTR_ERR(hr_qp->umem); 575 ret = PTR_ERR(hr_qp->umem);
535 goto err_out; 576 goto err_rq_sge_list;
536 } 577 }
537 578
538 hr_qp->mtt.mtt_type = MTT_TYPE_WQE; 579 hr_qp->mtt.mtt_type = MTT_TYPE_WQE;
@@ -566,13 +607,13 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
566 IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { 607 IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
567 dev_err(dev, "init_attr->create_flags error!\n"); 608 dev_err(dev, "init_attr->create_flags error!\n");
568 ret = -EINVAL; 609 ret = -EINVAL;
569 goto err_out; 610 goto err_rq_sge_list;
570 } 611 }
571 612
572 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) { 613 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) {
573 dev_err(dev, "init_attr->create_flags error!\n"); 614 dev_err(dev, "init_attr->create_flags error!\n");
574 ret = -EINVAL; 615 ret = -EINVAL;
575 goto err_out; 616 goto err_rq_sge_list;
576 } 617 }
577 618
578 /* Set SQ size */ 619 /* Set SQ size */
@@ -580,7 +621,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
580 hr_qp); 621 hr_qp);
581 if (ret) { 622 if (ret) {
582 dev_err(dev, "hns_roce_set_kernel_sq_size error!\n"); 623 dev_err(dev, "hns_roce_set_kernel_sq_size error!\n");
583 goto err_out; 624 goto err_rq_sge_list;
584 } 625 }
585 626
586 /* QP doorbell register address */ 627 /* QP doorbell register address */
@@ -596,7 +637,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
596 &hr_qp->hr_buf, page_shift)) { 637 &hr_qp->hr_buf, page_shift)) {
597 dev_err(dev, "hns_roce_buf_alloc error!\n"); 638 dev_err(dev, "hns_roce_buf_alloc error!\n");
598 ret = -ENOMEM; 639 ret = -ENOMEM;
599 goto err_out; 640 goto err_rq_sge_list;
600 } 641 }
601 642
602 hr_qp->mtt.mtt_type = MTT_TYPE_WQE; 643 hr_qp->mtt.mtt_type = MTT_TYPE_WQE;
@@ -678,6 +719,14 @@ err_buf:
678 else 719 else
679 hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); 720 hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
680 721
722err_rq_sge_list:
723 if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE)
724 kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
725
726err_wqe_list:
727 if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE)
728 kfree(hr_qp->rq_inl_buf.wqe_list);
729
681err_out: 730err_out:
682 return ret; 731 return ret;
683} 732}
@@ -724,8 +773,13 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
724 hr_qp = &hr_sqp->hr_qp; 773 hr_qp = &hr_sqp->hr_qp;
725 hr_qp->port = init_attr->port_num - 1; 774 hr_qp->port = init_attr->port_num - 1;
726 hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; 775 hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
727 hr_qp->ibqp.qp_num = HNS_ROCE_MAX_PORTS + 776
728 hr_dev->iboe.phy_port[hr_qp->port]; 777 /* when hw version is v1, the sqpn is allocated */
778 if (hr_dev->caps.max_sq_sg <= 2)
779 hr_qp->ibqp.qp_num = HNS_ROCE_MAX_PORTS +
780 hr_dev->iboe.phy_port[hr_qp->port];
781 else
782 hr_qp->ibqp.qp_num = 1;
729 783
730 ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, 784 ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata,
731 hr_qp->ibqp.qp_num, hr_qp); 785 hr_qp->ibqp.qp_num, hr_qp);
diff --git a/drivers/infiniband/hw/i40iw/Kconfig b/drivers/infiniband/hw/i40iw/Kconfig
index f6d20ba88c03..2962979c06e9 100644
--- a/drivers/infiniband/hw/i40iw/Kconfig
+++ b/drivers/infiniband/hw/i40iw/Kconfig
@@ -5,4 +5,3 @@ config INFINIBAND_I40IW
5 select GENERIC_ALLOCATOR 5 select GENERIC_ALLOCATOR
6 ---help--- 6 ---help---
7 Intel(R) Ethernet X722 iWARP Driver 7 Intel(R) Ethernet X722 iWARP Driver
8 INET && I40IW && INFINIBAND && I40E
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 4ae9131b6350..bcddd7061fc0 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -587,5 +587,8 @@ int i40iw_inet6addr_event(struct notifier_block *notifier,
587int i40iw_net_event(struct notifier_block *notifier, 587int i40iw_net_event(struct notifier_block *notifier,
588 unsigned long event, 588 unsigned long event,
589 void *ptr); 589 void *ptr);
590int i40iw_netdevice_event(struct notifier_block *notifier,
591 unsigned long event,
592 void *ptr);
590 593
591#endif 594#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 77870f9e1736..abf4cd897849 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -92,14 +92,9 @@ void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp)
92static u8 i40iw_derive_hw_ird_setting(u16 cm_ird) 92static u8 i40iw_derive_hw_ird_setting(u16 cm_ird)
93{ 93{
94 u8 encoded_ird_size; 94 u8 encoded_ird_size;
95 u8 pof2_cm_ird = 1;
96
97 /* round-off to next powerof2 */
98 while (pof2_cm_ird < cm_ird)
99 pof2_cm_ird *= 2;
100 95
101 /* ird_size field is encoded in qp_ctx */ 96 /* ird_size field is encoded in qp_ctx */
102 switch (pof2_cm_ird) { 97 switch (cm_ird ? roundup_pow_of_two(cm_ird) : 0) {
103 case I40IW_HW_IRD_SETTING_64: 98 case I40IW_HW_IRD_SETTING_64:
104 encoded_ird_size = 3; 99 encoded_ird_size = 3;
105 break; 100 break;
@@ -125,13 +120,16 @@ static u8 i40iw_derive_hw_ird_setting(u16 cm_ird)
125 * @conn_ird: connection IRD 120 * @conn_ird: connection IRD
126 * @conn_ord: connection ORD 121 * @conn_ord: connection ORD
127 */ 122 */
128static void i40iw_record_ird_ord(struct i40iw_cm_node *cm_node, u16 conn_ird, u16 conn_ord) 123static void i40iw_record_ird_ord(struct i40iw_cm_node *cm_node, u32 conn_ird,
124 u32 conn_ord)
129{ 125{
130 if (conn_ird > I40IW_MAX_IRD_SIZE) 126 if (conn_ird > I40IW_MAX_IRD_SIZE)
131 conn_ird = I40IW_MAX_IRD_SIZE; 127 conn_ird = I40IW_MAX_IRD_SIZE;
132 128
133 if (conn_ord > I40IW_MAX_ORD_SIZE) 129 if (conn_ord > I40IW_MAX_ORD_SIZE)
134 conn_ord = I40IW_MAX_ORD_SIZE; 130 conn_ord = I40IW_MAX_ORD_SIZE;
131 else if (!conn_ord && cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO)
132 conn_ord = 1;
135 133
136 cm_node->ird_size = conn_ird; 134 cm_node->ird_size = conn_ird;
137 cm_node->ord_size = conn_ord; 135 cm_node->ord_size = conn_ord;
@@ -2878,15 +2876,13 @@ static struct i40iw_cm_listener *i40iw_make_listen_node(
2878 * i40iw_create_cm_node - make a connection node with params 2876 * i40iw_create_cm_node - make a connection node with params
2879 * @cm_core: cm's core 2877 * @cm_core: cm's core
2880 * @iwdev: iwarp device structure 2878 * @iwdev: iwarp device structure
2881 * @private_data_len: len to provate data for mpa request 2879 * @conn_param: upper layer connection parameters
2882 * @private_data: pointer to private data for connection
2883 * @cm_info: quad info for connection 2880 * @cm_info: quad info for connection
2884 */ 2881 */
2885static struct i40iw_cm_node *i40iw_create_cm_node( 2882static struct i40iw_cm_node *i40iw_create_cm_node(
2886 struct i40iw_cm_core *cm_core, 2883 struct i40iw_cm_core *cm_core,
2887 struct i40iw_device *iwdev, 2884 struct i40iw_device *iwdev,
2888 u16 private_data_len, 2885 struct iw_cm_conn_param *conn_param,
2889 void *private_data,
2890 struct i40iw_cm_info *cm_info) 2886 struct i40iw_cm_info *cm_info)
2891{ 2887{
2892 struct i40iw_cm_node *cm_node; 2888 struct i40iw_cm_node *cm_node;
@@ -2894,6 +2890,9 @@ static struct i40iw_cm_node *i40iw_create_cm_node(
2894 struct i40iw_cm_node *loopback_remotenode; 2890 struct i40iw_cm_node *loopback_remotenode;
2895 struct i40iw_cm_info loopback_cm_info; 2891 struct i40iw_cm_info loopback_cm_info;
2896 2892
2893 u16 private_data_len = conn_param->private_data_len;
2894 const void *private_data = conn_param->private_data;
2895
2897 /* create a CM connection node */ 2896 /* create a CM connection node */
2898 cm_node = i40iw_make_cm_node(cm_core, iwdev, cm_info, NULL); 2897 cm_node = i40iw_make_cm_node(cm_core, iwdev, cm_info, NULL);
2899 if (!cm_node) 2898 if (!cm_node)
@@ -2902,6 +2901,8 @@ static struct i40iw_cm_node *i40iw_create_cm_node(
2902 cm_node->tcp_cntxt.client = 1; 2901 cm_node->tcp_cntxt.client = 1;
2903 cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE; 2902 cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
2904 2903
2904 i40iw_record_ird_ord(cm_node, conn_param->ird, conn_param->ord);
2905
2905 if (!memcmp(cm_info->loc_addr, cm_info->rem_addr, sizeof(cm_info->loc_addr))) { 2906 if (!memcmp(cm_info->loc_addr, cm_info->rem_addr, sizeof(cm_info->loc_addr))) {
2906 loopback_remotelistener = i40iw_find_listener( 2907 loopback_remotelistener = i40iw_find_listener(
2907 cm_core, 2908 cm_core,
@@ -2935,6 +2936,10 @@ static struct i40iw_cm_node *i40iw_create_cm_node(
2935 private_data_len); 2936 private_data_len);
2936 loopback_remotenode->pdata.size = private_data_len; 2937 loopback_remotenode->pdata.size = private_data_len;
2937 2938
2939 if (loopback_remotenode->ord_size > cm_node->ird_size)
2940 loopback_remotenode->ord_size =
2941 cm_node->ird_size;
2942
2938 cm_node->state = I40IW_CM_STATE_OFFLOADED; 2943 cm_node->state = I40IW_CM_STATE_OFFLOADED;
2939 cm_node->tcp_cntxt.rcv_nxt = 2944 cm_node->tcp_cntxt.rcv_nxt =
2940 loopback_remotenode->tcp_cntxt.loc_seq_num; 2945 loopback_remotenode->tcp_cntxt.loc_seq_num;
@@ -3691,7 +3696,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3691 cm_node->qhash_set = false; 3696 cm_node->qhash_set = false;
3692 i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL); 3697 i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
3693 3698
3694 cm_node->accelerated = 1; 3699 cm_node->accelerated = true;
3695 status = 3700 status =
3696 i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0); 3701 i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0);
3697 if (status) 3702 if (status)
@@ -3815,9 +3820,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3815 __func__, cm_id->tos, cm_info.user_pri); 3820 __func__, cm_id->tos, cm_info.user_pri);
3816 cm_id->add_ref(cm_id); 3821 cm_id->add_ref(cm_id);
3817 cm_node = i40iw_create_cm_node(&iwdev->cm_core, iwdev, 3822 cm_node = i40iw_create_cm_node(&iwdev->cm_core, iwdev,
3818 conn_param->private_data_len, 3823 conn_param, &cm_info);
3819 (void *)conn_param->private_data,
3820 &cm_info);
3821 3824
3822 if (IS_ERR(cm_node)) { 3825 if (IS_ERR(cm_node)) {
3823 ret = PTR_ERR(cm_node); 3826 ret = PTR_ERR(cm_node);
@@ -3849,11 +3852,6 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3849 } 3852 }
3850 3853
3851 cm_node->apbvt_set = true; 3854 cm_node->apbvt_set = true;
3852 i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
3853 if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
3854 !cm_node->ord_size)
3855 cm_node->ord_size = 1;
3856
3857 iwqp->cm_node = cm_node; 3855 iwqp->cm_node = cm_node;
3858 cm_node->iwqp = iwqp; 3856 cm_node->iwqp = iwqp;
3859 iwqp->cm_id = cm_id; 3857 iwqp->cm_id = cm_id;
@@ -4058,7 +4056,7 @@ static void i40iw_cm_event_connected(struct i40iw_cm_event *event)
4058 cm_node->qhash_set = false; 4056 cm_node->qhash_set = false;
4059 i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL); 4057 i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
4060 4058
4061 cm_node->accelerated = 1; 4059 cm_node->accelerated = true;
4062 status = i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, 4060 status = i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY,
4063 0); 4061 0);
4064 if (status) 4062 if (status)
@@ -4242,10 +4240,16 @@ set_qhash:
4242} 4240}
4243 4241
4244/** 4242/**
4245 * i40iw_cm_disconnect_all - disconnect all connected qp's 4243 * i40iw_cm_teardown_connections - teardown QPs
4246 * @iwdev: device pointer 4244 * @iwdev: device pointer
4245 * @ipaddr: Pointer to IPv4 or IPv6 address
4246 * @ipv4: flag indicating IPv4 when true
4247 * @disconnect_all: flag indicating disconnect all QPs
4248 * teardown QPs where source or destination addr matches ip addr
4247 */ 4249 */
4248void i40iw_cm_disconnect_all(struct i40iw_device *iwdev) 4250void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr,
4251 struct i40iw_cm_info *nfo,
4252 bool disconnect_all)
4249{ 4253{
4250 struct i40iw_cm_core *cm_core = &iwdev->cm_core; 4254 struct i40iw_cm_core *cm_core = &iwdev->cm_core;
4251 struct list_head *list_core_temp; 4255 struct list_head *list_core_temp;
@@ -4259,8 +4263,13 @@ void i40iw_cm_disconnect_all(struct i40iw_device *iwdev)
4259 spin_lock_irqsave(&cm_core->ht_lock, flags); 4263 spin_lock_irqsave(&cm_core->ht_lock, flags);
4260 list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) { 4264 list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) {
4261 cm_node = container_of(list_node, struct i40iw_cm_node, list); 4265 cm_node = container_of(list_node, struct i40iw_cm_node, list);
4262 atomic_inc(&cm_node->ref_count); 4266 if (disconnect_all ||
4263 list_add(&cm_node->connected_entry, &connected_list); 4267 (nfo->vlan_id == cm_node->vlan_id &&
4268 (!memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16) ||
4269 !memcmp(cm_node->rem_addr, ipaddr, nfo->ipv4 ? 4 : 16)))) {
4270 atomic_inc(&cm_node->ref_count);
4271 list_add(&cm_node->connected_entry, &connected_list);
4272 }
4264 } 4273 }
4265 spin_unlock_irqrestore(&cm_core->ht_lock, flags); 4274 spin_unlock_irqrestore(&cm_core->ht_lock, flags);
4266 4275
@@ -4294,6 +4303,9 @@ void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
4294 enum i40iw_quad_hash_manage_type op = 4303 enum i40iw_quad_hash_manage_type op =
4295 ifup ? I40IW_QHASH_MANAGE_TYPE_ADD : I40IW_QHASH_MANAGE_TYPE_DELETE; 4304 ifup ? I40IW_QHASH_MANAGE_TYPE_ADD : I40IW_QHASH_MANAGE_TYPE_DELETE;
4296 4305
4306 nfo.vlan_id = vlan_id;
4307 nfo.ipv4 = ipv4;
4308
4297 /* Disable or enable qhash for listeners */ 4309 /* Disable or enable qhash for listeners */
4298 spin_lock_irqsave(&cm_core->listen_list_lock, flags); 4310 spin_lock_irqsave(&cm_core->listen_list_lock, flags);
4299 list_for_each_entry(listen_node, &cm_core->listen_nodes, list) { 4311 list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
@@ -4303,8 +4315,6 @@ void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
4303 memcpy(nfo.loc_addr, listen_node->loc_addr, 4315 memcpy(nfo.loc_addr, listen_node->loc_addr,
4304 sizeof(nfo.loc_addr)); 4316 sizeof(nfo.loc_addr));
4305 nfo.loc_port = listen_node->loc_port; 4317 nfo.loc_port = listen_node->loc_port;
4306 nfo.ipv4 = listen_node->ipv4;
4307 nfo.vlan_id = listen_node->vlan_id;
4308 nfo.user_pri = listen_node->user_pri; 4318 nfo.user_pri = listen_node->user_pri;
4309 if (!list_empty(&listen_node->child_listen_list)) { 4319 if (!list_empty(&listen_node->child_listen_list)) {
4310 i40iw_qhash_ctrl(iwdev, 4320 i40iw_qhash_ctrl(iwdev,
@@ -4326,7 +4336,7 @@ void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
4326 } 4336 }
4327 spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); 4337 spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
4328 4338
4329 /* disconnect any connected qp's on ifdown */ 4339 /* teardown connected qp's on ifdown */
4330 if (!ifup) 4340 if (!ifup)
4331 i40iw_cm_disconnect_all(iwdev); 4341 i40iw_cm_teardown_connections(iwdev, ipaddr, &nfo, false);
4332} 4342}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
index 0d5840d2c4fc..cf60c451e071 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -276,8 +276,6 @@ struct i40iw_cm_tcp_context {
276 u32 mss; 276 u32 mss;
277 u8 snd_wscale; 277 u8 snd_wscale;
278 u8 rcv_wscale; 278 u8 rcv_wscale;
279
280 struct timeval sent_ts;
281}; 279};
282 280
283enum i40iw_cm_listener_state { 281enum i40iw_cm_listener_state {
@@ -337,7 +335,7 @@ struct i40iw_cm_node {
337 u16 mpav2_ird_ord; 335 u16 mpav2_ird_ord;
338 struct iw_cm_id *cm_id; 336 struct iw_cm_id *cm_id;
339 struct list_head list; 337 struct list_head list;
340 int accelerated; 338 bool accelerated;
341 struct i40iw_cm_listener *listener; 339 struct i40iw_cm_listener *listener;
342 int apbvt_set; 340 int apbvt_set;
343 int accept_pend; 341 int accept_pend;
@@ -455,5 +453,7 @@ int i40iw_arp_table(struct i40iw_device *iwdev,
455 453
456void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev, 454void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
457 u32 *ipaddr, bool ipv4, bool ifup); 455 u32 *ipaddr, bool ipv4, bool ifup);
458void i40iw_cm_disconnect_all(struct i40iw_device *iwdev); 456void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr,
457 struct i40iw_cm_info *nfo,
458 bool disconnect_all);
459#endif /* I40IW_CM_H */ 459#endif /* I40IW_CM_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index da9821a10e0d..c74fd3309b93 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -1893,8 +1893,6 @@ static enum i40iw_status_code i40iw_sc_get_next_aeqe(struct i40iw_sc_aeq *aeq,
1893static enum i40iw_status_code i40iw_sc_repost_aeq_entries(struct i40iw_sc_dev *dev, 1893static enum i40iw_status_code i40iw_sc_repost_aeq_entries(struct i40iw_sc_dev *dev,
1894 u32 count) 1894 u32 count)
1895{ 1895{
1896 if (count > I40IW_MAX_AEQ_ALLOCATE_COUNT)
1897 return I40IW_ERR_INVALID_SIZE;
1898 1896
1899 if (dev->is_pf) 1897 if (dev->is_pf)
1900 i40iw_wr32(dev->hw, I40E_PFPE_AEQALLOC, count); 1898 i40iw_wr32(dev->hw, I40E_PFPE_AEQALLOC, count);
@@ -3872,7 +3870,6 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
3872 struct i40iw_virt_mem virt_mem; 3870 struct i40iw_virt_mem virt_mem;
3873 u32 i, mem_size; 3871 u32 i, mem_size;
3874 u32 qpwantedoriginal, qpwanted, mrwanted, pblewanted; 3872 u32 qpwantedoriginal, qpwanted, mrwanted, pblewanted;
3875 u32 powerof2;
3876 u64 sd_needed; 3873 u64 sd_needed;
3877 u32 loop_count = 0; 3874 u32 loop_count = 0;
3878 3875
@@ -3928,8 +3925,10 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
3928 hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].cnt = 1; 3925 hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].cnt = 1;
3929 hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt = mrwanted; 3926 hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt = mrwanted;
3930 3927
3931 hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt = I40IW_MAX_WQ_ENTRIES * qpwanted; 3928 hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt =
3932 hmc_info->hmc_obj[I40IW_HMC_IW_Q1].cnt = 4 * I40IW_MAX_IRD_SIZE * qpwanted; 3929 roundup_pow_of_two(I40IW_MAX_WQ_ENTRIES * qpwanted);
3930 hmc_info->hmc_obj[I40IW_HMC_IW_Q1].cnt =
3931 roundup_pow_of_two(2 * I40IW_MAX_IRD_SIZE * qpwanted);
3933 hmc_info->hmc_obj[I40IW_HMC_IW_XFFL].cnt = 3932 hmc_info->hmc_obj[I40IW_HMC_IW_XFFL].cnt =
3934 hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt / hmc_fpm_misc->xf_block_size; 3933 hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt / hmc_fpm_misc->xf_block_size;
3935 hmc_info->hmc_obj[I40IW_HMC_IW_Q1FL].cnt = 3934 hmc_info->hmc_obj[I40IW_HMC_IW_Q1FL].cnt =
@@ -3945,16 +3944,10 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
3945 if ((loop_count > 1000) || 3944 if ((loop_count > 1000) ||
3946 ((!(loop_count % 10)) && 3945 ((!(loop_count % 10)) &&
3947 (qpwanted > qpwantedoriginal * 2 / 3))) { 3946 (qpwanted > qpwantedoriginal * 2 / 3))) {
3948 if (qpwanted > FPM_MULTIPLIER) { 3947 if (qpwanted > FPM_MULTIPLIER)
3949 qpwanted -= FPM_MULTIPLIER; 3948 qpwanted = roundup_pow_of_two(qpwanted -
3950 powerof2 = 1; 3949 FPM_MULTIPLIER);
3951 while (powerof2 < qpwanted) 3950 qpwanted >>= 1;
3952 powerof2 *= 2;
3953 powerof2 /= 2;
3954 qpwanted = powerof2;
3955 } else {
3956 qpwanted /= 2;
3957 }
3958 } 3951 }
3959 if (mrwanted > FPM_MULTIPLIER * 10) 3952 if (mrwanted > FPM_MULTIPLIER * 10)
3960 mrwanted -= FPM_MULTIPLIER * 10; 3953 mrwanted -= FPM_MULTIPLIER * 10;
@@ -3962,8 +3955,6 @@ enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_
3962 pblewanted -= FPM_MULTIPLIER * 1000; 3955 pblewanted -= FPM_MULTIPLIER * 1000;
3963 } while (sd_needed > hmc_fpm_misc->max_sds && loop_count < 2000); 3956 } while (sd_needed > hmc_fpm_misc->max_sds && loop_count < 2000);
3964 3957
3965 sd_needed = i40iw_est_sd(dev, hmc_info);
3966
3967 i40iw_debug(dev, I40IW_DEBUG_HMC, 3958 i40iw_debug(dev, I40IW_DEBUG_HMC,
3968 "loop_cnt=%d, sd_needed=%lld, qpcnt = %d, cqcnt=%d, mrcnt=%d, pblecnt=%d\n", 3959 "loop_cnt=%d, sd_needed=%lld, qpcnt = %d, cqcnt=%d, mrcnt=%d, pblecnt=%d\n",
3969 loop_count, sd_needed, 3960 loop_count, sd_needed,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
index 029083cb81d5..4b65e4140bd7 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_d.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_d.h
@@ -97,6 +97,7 @@
97#define RDMA_OPCODE_MASK 0x0f 97#define RDMA_OPCODE_MASK 0x0f
98#define RDMA_READ_REQ_OPCODE 1 98#define RDMA_READ_REQ_OPCODE 1
99#define Q2_BAD_FRAME_OFFSET 72 99#define Q2_BAD_FRAME_OFFSET 72
100#define Q2_FPSN_OFFSET 64
100#define CQE_MAJOR_DRV 0x8000 101#define CQE_MAJOR_DRV 0x8000
101 102
102#define I40IW_TERM_SENT 0x01 103#define I40IW_TERM_SENT 0x01
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index e96bdafbcbb3..61540e14e4b9 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -385,6 +385,8 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
385 iwcq->ibcq.event_handler(&ibevent, iwcq->ibcq.cq_context); 385 iwcq->ibcq.event_handler(&ibevent, iwcq->ibcq.cq_context);
386 } 386 }
387 break; 387 break;
388 case I40IW_AE_LLP_DOUBT_REACHABILITY:
389 break;
388 case I40IW_AE_PRIV_OPERATION_DENIED: 390 case I40IW_AE_PRIV_OPERATION_DENIED:
389 case I40IW_AE_STAG_ZERO_INVALID: 391 case I40IW_AE_STAG_ZERO_INVALID:
390 case I40IW_AE_IB_RREQ_AND_Q1_FULL: 392 case I40IW_AE_IB_RREQ_AND_Q1_FULL:
@@ -403,7 +405,6 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
403 case I40IW_AE_LLP_SEGMENT_TOO_SMALL: 405 case I40IW_AE_LLP_SEGMENT_TOO_SMALL:
404 case I40IW_AE_LLP_SYN_RECEIVED: 406 case I40IW_AE_LLP_SYN_RECEIVED:
405 case I40IW_AE_LLP_TOO_MANY_RETRIES: 407 case I40IW_AE_LLP_TOO_MANY_RETRIES:
406 case I40IW_AE_LLP_DOUBT_REACHABILITY:
407 case I40IW_AE_LCE_QP_CATASTROPHIC: 408 case I40IW_AE_LCE_QP_CATASTROPHIC:
408 case I40IW_AE_LCE_FUNCTION_CATASTROPHIC: 409 case I40IW_AE_LCE_FUNCTION_CATASTROPHIC:
409 case I40IW_AE_LCE_CQ_CATASTROPHIC: 410 case I40IW_AE_LCE_CQ_CATASTROPHIC:
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index e824296713e2..b08862978de8 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -99,6 +99,10 @@ static struct notifier_block i40iw_net_notifier = {
99 .notifier_call = i40iw_net_event 99 .notifier_call = i40iw_net_event
100}; 100};
101 101
102static struct notifier_block i40iw_netdevice_notifier = {
103 .notifier_call = i40iw_netdevice_event
104};
105
102/** 106/**
103 * i40iw_find_i40e_handler - find a handler given a client info 107 * i40iw_find_i40e_handler - find a handler given a client info
104 * @ldev: pointer to a client info 108 * @ldev: pointer to a client info
@@ -483,6 +487,7 @@ static enum i40iw_status_code i40iw_create_hmc_objs(struct i40iw_device *iwdev,
483 for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) { 487 for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) {
484 info.rsrc_type = iw_hmc_obj_types[i]; 488 info.rsrc_type = iw_hmc_obj_types[i];
485 info.count = dev->hmc_info->hmc_obj[info.rsrc_type].cnt; 489 info.count = dev->hmc_info->hmc_obj[info.rsrc_type].cnt;
490 info.add_sd_cnt = 0;
486 status = i40iw_create_hmc_obj_type(dev, &info); 491 status = i40iw_create_hmc_obj_type(dev, &info);
487 if (status) { 492 if (status) {
488 i40iw_pr_err("create obj type %d status = %d\n", 493 i40iw_pr_err("create obj type %d status = %d\n",
@@ -607,7 +612,7 @@ static enum i40iw_status_code i40iw_create_cqp(struct i40iw_device *iwdev)
607 INIT_LIST_HEAD(&cqp->cqp_avail_reqs); 612 INIT_LIST_HEAD(&cqp->cqp_avail_reqs);
608 INIT_LIST_HEAD(&cqp->cqp_pending_reqs); 613 INIT_LIST_HEAD(&cqp->cqp_pending_reqs);
609 /* init the waitq of the cqp_requests and add them to the list */ 614 /* init the waitq of the cqp_requests and add them to the list */
610 for (i = 0; i < I40IW_CQP_SW_SQSIZE_2048; i++) { 615 for (i = 0; i < sqsize; i++) {
611 init_waitqueue_head(&cqp->cqp_requests[i].waitq); 616 init_waitqueue_head(&cqp->cqp_requests[i].waitq);
612 list_add_tail(&cqp->cqp_requests[i].list, &cqp->cqp_avail_reqs); 617 list_add_tail(&cqp->cqp_requests[i].list, &cqp->cqp_avail_reqs);
613 } 618 }
@@ -1285,7 +1290,7 @@ static void i40iw_wait_pe_ready(struct i40iw_hw *hw)
1285 __LINE__, statuscpu2); 1290 __LINE__, statuscpu2);
1286 if ((statuscpu0 == 0x80) && (statuscpu1 == 0x80) && (statuscpu2 == 0x80)) 1291 if ((statuscpu0 == 0x80) && (statuscpu1 == 0x80) && (statuscpu2 == 0x80))
1287 break; /* SUCCESS */ 1292 break; /* SUCCESS */
1288 mdelay(1000); 1293 msleep(1000);
1289 retrycount++; 1294 retrycount++;
1290 } while (retrycount < 14); 1295 } while (retrycount < 14);
1291 i40iw_wr32(hw, 0xb4040, 0x4C104C5); 1296 i40iw_wr32(hw, 0xb4040, 0x4C104C5);
@@ -1393,6 +1398,7 @@ static void i40iw_register_notifiers(void)
1393 register_inetaddr_notifier(&i40iw_inetaddr_notifier); 1398 register_inetaddr_notifier(&i40iw_inetaddr_notifier);
1394 register_inet6addr_notifier(&i40iw_inetaddr6_notifier); 1399 register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
1395 register_netevent_notifier(&i40iw_net_notifier); 1400 register_netevent_notifier(&i40iw_net_notifier);
1401 register_netdevice_notifier(&i40iw_netdevice_notifier);
1396} 1402}
1397 1403
1398/** 1404/**
@@ -1404,6 +1410,7 @@ static void i40iw_unregister_notifiers(void)
1404 unregister_netevent_notifier(&i40iw_net_notifier); 1410 unregister_netevent_notifier(&i40iw_net_notifier);
1405 unregister_inetaddr_notifier(&i40iw_inetaddr_notifier); 1411 unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
1406 unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier); 1412 unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
1413 unregister_netdevice_notifier(&i40iw_netdevice_notifier);
1407} 1414}
1408 1415
1409/** 1416/**
@@ -1793,7 +1800,7 @@ static void i40iw_close(struct i40e_info *ldev, struct i40e_client *client, bool
1793 if (reset) 1800 if (reset)
1794 iwdev->reset = true; 1801 iwdev->reset = true;
1795 1802
1796 i40iw_cm_disconnect_all(iwdev); 1803 i40iw_cm_teardown_connections(iwdev, NULL, NULL, true);
1797 destroy_workqueue(iwdev->virtchnl_wq); 1804 destroy_workqueue(iwdev->virtchnl_wq);
1798 i40iw_deinit_device(iwdev); 1805 i40iw_deinit_device(iwdev);
1799} 1806}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
index 796a815b53fd..4c21197830b3 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -48,7 +48,6 @@ static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid);
48static void i40iw_ilq_putback_rcvbuf(struct i40iw_sc_qp *qp, u32 wqe_idx); 48static void i40iw_ilq_putback_rcvbuf(struct i40iw_sc_qp *qp, u32 wqe_idx);
49static enum i40iw_status_code i40iw_puda_replenish_rq(struct i40iw_puda_rsrc 49static enum i40iw_status_code i40iw_puda_replenish_rq(struct i40iw_puda_rsrc
50 *rsrc, bool initial); 50 *rsrc, bool initial);
51static void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp);
52/** 51/**
53 * i40iw_puda_get_listbuf - get buffer from puda list 52 * i40iw_puda_get_listbuf - get buffer from puda list
54 * @list: list to use for buffers (ILQ or IEQ) 53 * @list: list to use for buffers (ILQ or IEQ)
@@ -1378,7 +1377,7 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq,
1378 u32 *hw_host_ctx = (u32 *)qp->hw_host_ctx; 1377 u32 *hw_host_ctx = (u32 *)qp->hw_host_ctx;
1379 u32 rcv_wnd = hw_host_ctx[23]; 1378 u32 rcv_wnd = hw_host_ctx[23];
1380 /* first partial seq # in q2 */ 1379 /* first partial seq # in q2 */
1381 u32 fps = qp->q2_buf[16]; 1380 u32 fps = *(u32 *)(qp->q2_buf + Q2_FPSN_OFFSET);
1382 struct list_head *rxlist = &pfpdu->rxlist; 1381 struct list_head *rxlist = &pfpdu->rxlist;
1383 struct list_head *plist; 1382 struct list_head *plist;
1384 1383
@@ -1483,7 +1482,7 @@ static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid)
1483 * @ieq: ieq resource 1482 * @ieq: ieq resource
1484 * @qp: all pending fpdu buffers 1483 * @qp: all pending fpdu buffers
1485 */ 1484 */
1486static void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp) 1485void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp)
1487{ 1486{
1488 struct i40iw_puda_buf *buf; 1487 struct i40iw_puda_buf *buf;
1489 struct i40iw_pfpdu *pfpdu = &qp->pfpdu; 1488 struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.h b/drivers/infiniband/hw/i40iw/i40iw_puda.h
index 660aa3edae56..53a7d58c84b5 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.h
@@ -184,4 +184,5 @@ enum i40iw_status_code i40iw_cqp_qp_create_cmd(struct i40iw_sc_dev *dev, struct
184enum i40iw_status_code i40iw_cqp_cq_create_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq); 184enum i40iw_status_code i40iw_cqp_cq_create_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq);
185void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp); 185void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
186void i40iw_cqp_cq_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq); 186void i40iw_cqp_cq_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq);
187void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp);
187#endif 188#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
index 3ec5389a81a1..8afa5a67a86b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_uk.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c
@@ -894,20 +894,6 @@ exit:
894} 894}
895 895
896/** 896/**
897 * i40iw_qp_roundup - return round up QP WQ depth
898 * @wqdepth: WQ depth in quantas to round up
899 */
900static int i40iw_qp_round_up(u32 wqdepth)
901{
902 int scount = 1;
903
904 for (wqdepth--; scount <= 16; scount *= 2)
905 wqdepth |= wqdepth >> scount;
906
907 return ++wqdepth;
908}
909
910/**
911 * i40iw_get_wqe_shift - get shift count for maximum wqe size 897 * i40iw_get_wqe_shift - get shift count for maximum wqe size
912 * @sge: Maximum Scatter Gather Elements wqe 898 * @sge: Maximum Scatter Gather Elements wqe
913 * @inline_data: Maximum inline data size 899 * @inline_data: Maximum inline data size
@@ -934,7 +920,7 @@ void i40iw_get_wqe_shift(u32 sge, u32 inline_data, u8 *shift)
934 */ 920 */
935enum i40iw_status_code i40iw_get_sqdepth(u32 sq_size, u8 shift, u32 *sqdepth) 921enum i40iw_status_code i40iw_get_sqdepth(u32 sq_size, u8 shift, u32 *sqdepth)
936{ 922{
937 *sqdepth = i40iw_qp_round_up((sq_size << shift) + I40IW_SQ_RSVD); 923 *sqdepth = roundup_pow_of_two((sq_size << shift) + I40IW_SQ_RSVD);
938 924
939 if (*sqdepth < (I40IW_QP_SW_MIN_WQSIZE << shift)) 925 if (*sqdepth < (I40IW_QP_SW_MIN_WQSIZE << shift))
940 *sqdepth = I40IW_QP_SW_MIN_WQSIZE << shift; 926 *sqdepth = I40IW_QP_SW_MIN_WQSIZE << shift;
@@ -953,7 +939,7 @@ enum i40iw_status_code i40iw_get_sqdepth(u32 sq_size, u8 shift, u32 *sqdepth)
953 */ 939 */
954enum i40iw_status_code i40iw_get_rqdepth(u32 rq_size, u8 shift, u32 *rqdepth) 940enum i40iw_status_code i40iw_get_rqdepth(u32 rq_size, u8 shift, u32 *rqdepth)
955{ 941{
956 *rqdepth = i40iw_qp_round_up((rq_size << shift) + I40IW_RQ_RSVD); 942 *rqdepth = roundup_pow_of_two((rq_size << shift) + I40IW_RQ_RSVD);
957 943
958 if (*rqdepth < (I40IW_QP_SW_MIN_WQSIZE << shift)) 944 if (*rqdepth < (I40IW_QP_SW_MIN_WQSIZE << shift))
959 *rqdepth = I40IW_QP_SW_MIN_WQSIZE << shift; 945 *rqdepth = I40IW_QP_SW_MIN_WQSIZE << shift;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h
index e73efc59a0ab..b125925641e0 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_user.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_user.h
@@ -59,7 +59,6 @@ enum i40iw_device_capabilities_const {
59 I40IW_MAX_CEQ_ENTRIES = 131071, 59 I40IW_MAX_CEQ_ENTRIES = 131071,
60 I40IW_MIN_CQ_SIZE = 1, 60 I40IW_MIN_CQ_SIZE = 1,
61 I40IW_MAX_CQ_SIZE = 1048575, 61 I40IW_MAX_CQ_SIZE = 1048575,
62 I40IW_MAX_AEQ_ALLOCATE_COUNT = 255,
63 I40IW_DB_ID_ZERO = 0, 62 I40IW_DB_ID_ZERO = 0,
64 I40IW_MAX_WQ_FRAGMENT_COUNT = 3, 63 I40IW_MAX_WQ_FRAGMENT_COUNT = 3,
65 I40IW_MAX_SGE_RD = 1, 64 I40IW_MAX_SGE_RD = 1,
@@ -72,7 +71,7 @@ enum i40iw_device_capabilities_const {
72 I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496, 71 I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496,
73 I40IW_MAX_INLINE_DATA_SIZE = 48, 72 I40IW_MAX_INLINE_DATA_SIZE = 48,
74 I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48, 73 I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48,
75 I40IW_MAX_IRD_SIZE = 63, 74 I40IW_MAX_IRD_SIZE = 64,
76 I40IW_MAX_ORD_SIZE = 127, 75 I40IW_MAX_ORD_SIZE = 127,
77 I40IW_MAX_WQ_ENTRIES = 2048, 76 I40IW_MAX_WQ_ENTRIES = 2048,
78 I40IW_Q2_BUFFER_SIZE = (248 + 100), 77 I40IW_Q2_BUFFER_SIZE = (248 + 100),
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 8845dba7c438..ddc1056b0b4e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -137,7 +137,7 @@ inline u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg)
137} 137}
138 138
139/** 139/**
140 * i40iw_inetaddr_event - system notifier for netdev events 140 * i40iw_inetaddr_event - system notifier for ipv4 addr events
141 * @notfier: not used 141 * @notfier: not used
142 * @event: event for notifier 142 * @event: event for notifier
143 * @ptr: if address 143 * @ptr: if address
@@ -200,7 +200,7 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
200} 200}
201 201
202/** 202/**
203 * i40iw_inet6addr_event - system notifier for ipv6 netdev events 203 * i40iw_inet6addr_event - system notifier for ipv6 addr events
204 * @notfier: not used 204 * @notfier: not used
205 * @event: event for notifier 205 * @event: event for notifier
206 * @ptr: if address 206 * @ptr: if address
@@ -252,7 +252,7 @@ int i40iw_inet6addr_event(struct notifier_block *notifier,
252} 252}
253 253
254/** 254/**
255 * i40iw_net_event - system notifier for net events 255 * i40iw_net_event - system notifier for netevents
256 * @notfier: not used 256 * @notfier: not used
257 * @event: event for notifier 257 * @event: event for notifier
258 * @ptr: neighbor 258 * @ptr: neighbor
@@ -297,6 +297,50 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void *
297} 297}
298 298
299/** 299/**
300 * i40iw_netdevice_event - system notifier for netdev events
301 * @notfier: not used
302 * @event: event for notifier
303 * @ptr: netdev
304 */
305int i40iw_netdevice_event(struct notifier_block *notifier,
306 unsigned long event,
307 void *ptr)
308{
309 struct net_device *event_netdev;
310 struct net_device *netdev;
311 struct i40iw_device *iwdev;
312 struct i40iw_handler *hdl;
313
314 event_netdev = netdev_notifier_info_to_dev(ptr);
315
316 hdl = i40iw_find_netdev(event_netdev);
317 if (!hdl)
318 return NOTIFY_DONE;
319
320 iwdev = &hdl->device;
321 if (iwdev->init_state < RDMA_DEV_REGISTERED || iwdev->closing)
322 return NOTIFY_DONE;
323
324 netdev = iwdev->ldev->netdev;
325 if (netdev != event_netdev)
326 return NOTIFY_DONE;
327
328 iwdev->iw_status = 1;
329
330 switch (event) {
331 case NETDEV_DOWN:
332 iwdev->iw_status = 0;
333 /* Fall through */
334 case NETDEV_UP:
335 i40iw_port_ibevent(iwdev);
336 break;
337 default:
338 break;
339 }
340 return NOTIFY_DONE;
341}
342
343/**
300 * i40iw_get_cqp_request - get cqp struct 344 * i40iw_get_cqp_request - get cqp struct
301 * @cqp: device cqp ptr 345 * @cqp: device cqp ptr
302 * @wait: cqp to be used in wait mode 346 * @wait: cqp to be used in wait mode
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 3c6f3ce88f89..70024e8e2692 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -412,6 +412,7 @@ void i40iw_free_qp_resources(struct i40iw_device *iwdev,
412{ 412{
413 struct i40iw_pbl *iwpbl = &iwqp->iwpbl; 413 struct i40iw_pbl *iwpbl = &iwqp->iwpbl;
414 414
415 i40iw_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp);
415 i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp); 416 i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp);
416 if (qp_num) 417 if (qp_num)
417 i40iw_free_resource(iwdev, iwdev->allocated_qps, qp_num); 418 i40iw_free_resource(iwdev, iwdev->allocated_qps, qp_num);
@@ -1637,6 +1638,7 @@ static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd,
1637 err_code = -EOVERFLOW; 1638 err_code = -EOVERFLOW;
1638 goto err; 1639 goto err;
1639 } 1640 }
1641 stag &= ~I40IW_CQPSQ_STAG_KEY_MASK;
1640 iwmr->stag = stag; 1642 iwmr->stag = stag;
1641 iwmr->ibmr.rkey = stag; 1643 iwmr->ibmr.rkey = stag;
1642 iwmr->ibmr.lkey = stag; 1644 iwmr->ibmr.lkey = stag;
@@ -2242,14 +2244,12 @@ static int i40iw_post_send(struct ib_qp *ibqp,
2242 info.op.inline_rdma_write.len = ib_wr->sg_list[0].length; 2244 info.op.inline_rdma_write.len = ib_wr->sg_list[0].length;
2243 info.op.inline_rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; 2245 info.op.inline_rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
2244 info.op.inline_rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey; 2246 info.op.inline_rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
2245 info.op.inline_rdma_write.rem_addr.len = ib_wr->sg_list->length;
2246 ret = ukqp->ops.iw_inline_rdma_write(ukqp, &info, false); 2247 ret = ukqp->ops.iw_inline_rdma_write(ukqp, &info, false);
2247 } else { 2248 } else {
2248 info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list; 2249 info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list;
2249 info.op.rdma_write.num_lo_sges = ib_wr->num_sge; 2250 info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
2250 info.op.rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; 2251 info.op.rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
2251 info.op.rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey; 2252 info.op.rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
2252 info.op.rdma_write.rem_addr.len = ib_wr->sg_list->length;
2253 ret = ukqp->ops.iw_rdma_write(ukqp, &info, false); 2253 ret = ukqp->ops.iw_rdma_write(ukqp, &info, false);
2254 } 2254 }
2255 2255
@@ -2271,7 +2271,6 @@ static int i40iw_post_send(struct ib_qp *ibqp,
2271 info.op_type = I40IW_OP_TYPE_RDMA_READ; 2271 info.op_type = I40IW_OP_TYPE_RDMA_READ;
2272 info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr; 2272 info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
2273 info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey; 2273 info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey;
2274 info.op.rdma_read.rem_addr.len = ib_wr->sg_list->length;
2275 info.op.rdma_read.lo_addr.tag_off = ib_wr->sg_list->addr; 2274 info.op.rdma_read.lo_addr.tag_off = ib_wr->sg_list->addr;
2276 info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey; 2275 info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey;
2277 info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length; 2276 info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length;
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index bf4f14a1b4fc..9a566ee3ceff 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -170,7 +170,7 @@ err_buf:
170 return err; 170 return err;
171} 171}
172 172
173#define CQ_CREATE_FLAGS_SUPPORTED IB_CQ_FLAGS_TIMESTAMP_COMPLETION 173#define CQ_CREATE_FLAGS_SUPPORTED IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION
174struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, 174struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
175 const struct ib_cq_init_attr *attr, 175 const struct ib_cq_init_attr *attr,
176 struct ib_ucontext *context, 176 struct ib_ucontext *context,
@@ -246,7 +246,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
246 246
247 err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, 247 err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
248 cq->db.dma, &cq->mcq, vector, 0, 248 cq->db.dma, &cq->mcq, vector, 0,
249 !!(cq->create_flags & IB_CQ_FLAGS_TIMESTAMP_COMPLETION)); 249 !!(cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION));
250 if (err) 250 if (err)
251 goto err_dbmap; 251 goto err_dbmap;
252 252
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 8c8a16791a3f..8d2ee9322f2e 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -589,6 +589,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
589 if (props->rss_caps.supported_qpts) { 589 if (props->rss_caps.supported_qpts) {
590 resp.rss_caps.rx_hash_function = 590 resp.rss_caps.rx_hash_function =
591 MLX4_IB_RX_HASH_FUNC_TOEPLITZ; 591 MLX4_IB_RX_HASH_FUNC_TOEPLITZ;
592
592 resp.rss_caps.rx_hash_fields_mask = 593 resp.rss_caps.rx_hash_fields_mask =
593 MLX4_IB_RX_HASH_SRC_IPV4 | 594 MLX4_IB_RX_HASH_SRC_IPV4 |
594 MLX4_IB_RX_HASH_DST_IPV4 | 595 MLX4_IB_RX_HASH_DST_IPV4 |
@@ -598,6 +599,11 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
598 MLX4_IB_RX_HASH_DST_PORT_TCP | 599 MLX4_IB_RX_HASH_DST_PORT_TCP |
599 MLX4_IB_RX_HASH_SRC_PORT_UDP | 600 MLX4_IB_RX_HASH_SRC_PORT_UDP |
600 MLX4_IB_RX_HASH_DST_PORT_UDP; 601 MLX4_IB_RX_HASH_DST_PORT_UDP;
602
603 if (dev->dev->caps.tunnel_offload_mode ==
604 MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
605 resp.rss_caps.rx_hash_fields_mask |=
606 MLX4_IB_RX_HASH_INNER;
601 } 607 }
602 } 608 }
603 609
@@ -2995,9 +3001,8 @@ err_steer_free_bitmap:
2995 kfree(ibdev->ib_uc_qpns_bitmap); 3001 kfree(ibdev->ib_uc_qpns_bitmap);
2996 3002
2997err_steer_qp_release: 3003err_steer_qp_release:
2998 if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) 3004 mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
2999 mlx4_qp_release_range(dev, ibdev->steer_qpn_base, 3005 ibdev->steer_qpn_count);
3000 ibdev->steer_qpn_count);
3001err_counter: 3006err_counter:
3002 for (i = 0; i < ibdev->num_ports; ++i) 3007 for (i = 0; i < ibdev->num_ports; ++i)
3003 mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]); 3008 mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
@@ -3102,11 +3107,9 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
3102 ibdev->iboe.nb.notifier_call = NULL; 3107 ibdev->iboe.nb.notifier_call = NULL;
3103 } 3108 }
3104 3109
3105 if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) { 3110 mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
3106 mlx4_qp_release_range(dev, ibdev->steer_qpn_base, 3111 ibdev->steer_qpn_count);
3107 ibdev->steer_qpn_count); 3112 kfree(ibdev->ib_uc_qpns_bitmap);
3108 kfree(ibdev->ib_uc_qpns_bitmap);
3109 }
3110 3113
3111 iounmap(ibdev->uar_map); 3114 iounmap(ibdev->uar_map);
3112 for (p = 0; p < ibdev->num_ports; ++p) 3115 for (p = 0; p < ibdev->num_ports; ++p)
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index caf490ab24c8..f045491f2c14 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -734,10 +734,24 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
734 return (-EOPNOTSUPP); 734 return (-EOPNOTSUPP);
735 } 735 }
736 736
737 if (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_INNER) {
738 if (dev->dev->caps.tunnel_offload_mode ==
739 MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
740 /*
741 * Hash according to inner headers if exist, otherwise
742 * according to outer headers.
743 */
744 rss_ctx->flags |= MLX4_RSS_BY_INNER_HEADERS_IPONLY;
745 } else {
746 pr_debug("RSS Hash for inner headers isn't supported\n");
747 return (-EOPNOTSUPP);
748 }
749 }
750
737 return 0; 751 return 0;
738} 752}
739 753
740static int create_qp_rss(struct mlx4_ib_dev *dev, struct ib_pd *ibpd, 754static int create_qp_rss(struct mlx4_ib_dev *dev,
741 struct ib_qp_init_attr *init_attr, 755 struct ib_qp_init_attr *init_attr,
742 struct mlx4_ib_create_qp_rss *ucmd, 756 struct mlx4_ib_create_qp_rss *ucmd,
743 struct mlx4_ib_qp *qp) 757 struct mlx4_ib_qp *qp)
@@ -860,7 +874,7 @@ static struct ib_qp *_mlx4_ib_create_qp_rss(struct ib_pd *pd,
860 qp->pri.vid = 0xFFFF; 874 qp->pri.vid = 0xFFFF;
861 qp->alt.vid = 0xFFFF; 875 qp->alt.vid = 0xFFFF;
862 876
863 err = create_qp_rss(to_mdev(pd->device), pd, init_attr, &ucmd, qp); 877 err = create_qp_rss(to_mdev(pd->device), init_attr, &ucmd, qp);
864 if (err) { 878 if (err) {
865 kfree(qp); 879 kfree(qp);
866 return ERR_PTR(err); 880 return ERR_PTR(err);
@@ -1836,6 +1850,8 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev,
1836 mlx4_ib_gid_index_to_real_index(dev, port, 1850 mlx4_ib_gid_index_to_real_index(dev, port,
1837 grh->sgid_index); 1851 grh->sgid_index);
1838 1852
1853 if (real_sgid_index < 0)
1854 return real_sgid_index;
1839 if (real_sgid_index >= dev->dev->caps.gid_table_len[port]) { 1855 if (real_sgid_index >= dev->dev->caps.gid_table_len[port]) {
1840 pr_err("sgid_index (%u) too large. max is %d\n", 1856 pr_err("sgid_index (%u) too large. max is %d\n",
1841 real_sgid_index, dev->dev->caps.gid_table_len[port] - 1); 1857 real_sgid_index, dev->dev->caps.gid_table_len[port] - 1);
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c
index 2d32b519bb61..985fa2637390 100644
--- a/drivers/infiniband/hw/mlx5/cong.c
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -247,21 +247,30 @@ static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
247 } 247 }
248} 248}
249 249
250static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, int offset, u32 *var) 250static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, u8 port_num,
251 int offset, u32 *var)
251{ 252{
252 int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out); 253 int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out);
253 void *out; 254 void *out;
254 void *field; 255 void *field;
255 int err; 256 int err;
256 enum mlx5_ib_cong_node_type node; 257 enum mlx5_ib_cong_node_type node;
258 struct mlx5_core_dev *mdev;
259
260 /* Takes a 1-based port number */
261 mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
262 if (!mdev)
263 return -ENODEV;
257 264
258 out = kvzalloc(outlen, GFP_KERNEL); 265 out = kvzalloc(outlen, GFP_KERNEL);
259 if (!out) 266 if (!out) {
260 return -ENOMEM; 267 err = -ENOMEM;
268 goto alloc_err;
269 }
261 270
262 node = mlx5_ib_param_to_node(offset); 271 node = mlx5_ib_param_to_node(offset);
263 272
264 err = mlx5_cmd_query_cong_params(dev->mdev, node, out, outlen); 273 err = mlx5_cmd_query_cong_params(mdev, node, out, outlen);
265 if (err) 274 if (err)
266 goto free; 275 goto free;
267 276
@@ -270,21 +279,32 @@ static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, int offset, u32 *var)
270 279
271free: 280free:
272 kvfree(out); 281 kvfree(out);
282alloc_err:
283 mlx5_ib_put_native_port_mdev(dev, port_num + 1);
273 return err; 284 return err;
274} 285}
275 286
276static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, int offset, u32 var) 287static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u8 port_num,
288 int offset, u32 var)
277{ 289{
278 int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in); 290 int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in);
279 void *in; 291 void *in;
280 void *field; 292 void *field;
281 enum mlx5_ib_cong_node_type node; 293 enum mlx5_ib_cong_node_type node;
294 struct mlx5_core_dev *mdev;
282 u32 attr_mask = 0; 295 u32 attr_mask = 0;
283 int err; 296 int err;
284 297
298 /* Takes a 1-based port number */
299 mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
300 if (!mdev)
301 return -ENODEV;
302
285 in = kvzalloc(inlen, GFP_KERNEL); 303 in = kvzalloc(inlen, GFP_KERNEL);
286 if (!in) 304 if (!in) {
287 return -ENOMEM; 305 err = -ENOMEM;
306 goto alloc_err;
307 }
288 308
289 MLX5_SET(modify_cong_params_in, in, opcode, 309 MLX5_SET(modify_cong_params_in, in, opcode,
290 MLX5_CMD_OP_MODIFY_CONG_PARAMS); 310 MLX5_CMD_OP_MODIFY_CONG_PARAMS);
@@ -299,8 +319,10 @@ static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, int offset, u32 var)
299 MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp, 319 MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp,
300 attr_mask); 320 attr_mask);
301 321
302 err = mlx5_cmd_modify_cong_params(dev->mdev, in, inlen); 322 err = mlx5_cmd_modify_cong_params(mdev, in, inlen);
303 kvfree(in); 323 kvfree(in);
324alloc_err:
325 mlx5_ib_put_native_port_mdev(dev, port_num + 1);
304 return err; 326 return err;
305} 327}
306 328
@@ -324,7 +346,7 @@ static ssize_t set_param(struct file *filp, const char __user *buf,
324 if (kstrtou32(lbuf, 0, &var)) 346 if (kstrtou32(lbuf, 0, &var))
325 return -EINVAL; 347 return -EINVAL;
326 348
327 ret = mlx5_ib_set_cc_params(param->dev, offset, var); 349 ret = mlx5_ib_set_cc_params(param->dev, param->port_num, offset, var);
328 return ret ? ret : count; 350 return ret ? ret : count;
329} 351}
330 352
@@ -340,7 +362,7 @@ static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
340 if (*pos) 362 if (*pos)
341 return 0; 363 return 0;
342 364
343 ret = mlx5_ib_get_cc_params(param->dev, offset, &var); 365 ret = mlx5_ib_get_cc_params(param->dev, param->port_num, offset, &var);
344 if (ret) 366 if (ret)
345 return ret; 367 return ret;
346 368
@@ -362,44 +384,51 @@ static const struct file_operations dbg_cc_fops = {
362 .read = get_param, 384 .read = get_param,
363}; 385};
364 386
365void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev) 387void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
366{ 388{
367 if (!mlx5_debugfs_root || 389 if (!mlx5_debugfs_root ||
368 !dev->dbg_cc_params || 390 !dev->port[port_num].dbg_cc_params ||
369 !dev->dbg_cc_params->root) 391 !dev->port[port_num].dbg_cc_params->root)
370 return; 392 return;
371 393
372 debugfs_remove_recursive(dev->dbg_cc_params->root); 394 debugfs_remove_recursive(dev->port[port_num].dbg_cc_params->root);
373 kfree(dev->dbg_cc_params); 395 kfree(dev->port[port_num].dbg_cc_params);
374 dev->dbg_cc_params = NULL; 396 dev->port[port_num].dbg_cc_params = NULL;
375} 397}
376 398
377int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev) 399int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
378{ 400{
379 struct mlx5_ib_dbg_cc_params *dbg_cc_params; 401 struct mlx5_ib_dbg_cc_params *dbg_cc_params;
402 struct mlx5_core_dev *mdev;
380 int i; 403 int i;
381 404
382 if (!mlx5_debugfs_root) 405 if (!mlx5_debugfs_root)
383 goto out; 406 goto out;
384 407
385 if (!MLX5_CAP_GEN(dev->mdev, cc_query_allowed) || 408 /* Takes a 1-based port number */
386 !MLX5_CAP_GEN(dev->mdev, cc_modify_allowed)) 409 mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
410 if (!mdev)
387 goto out; 411 goto out;
388 412
413 if (!MLX5_CAP_GEN(mdev, cc_query_allowed) ||
414 !MLX5_CAP_GEN(mdev, cc_modify_allowed))
415 goto put_mdev;
416
389 dbg_cc_params = kzalloc(sizeof(*dbg_cc_params), GFP_KERNEL); 417 dbg_cc_params = kzalloc(sizeof(*dbg_cc_params), GFP_KERNEL);
390 if (!dbg_cc_params) 418 if (!dbg_cc_params)
391 goto out; 419 goto err;
392 420
393 dev->dbg_cc_params = dbg_cc_params; 421 dev->port[port_num].dbg_cc_params = dbg_cc_params;
394 422
395 dbg_cc_params->root = debugfs_create_dir("cc_params", 423 dbg_cc_params->root = debugfs_create_dir("cc_params",
396 dev->mdev->priv.dbg_root); 424 mdev->priv.dbg_root);
397 if (!dbg_cc_params->root) 425 if (!dbg_cc_params->root)
398 goto err; 426 goto err;
399 427
400 for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) { 428 for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
401 dbg_cc_params->params[i].offset = i; 429 dbg_cc_params->params[i].offset = i;
402 dbg_cc_params->params[i].dev = dev; 430 dbg_cc_params->params[i].dev = dev;
431 dbg_cc_params->params[i].port_num = port_num;
403 dbg_cc_params->params[i].dentry = 432 dbg_cc_params->params[i].dentry =
404 debugfs_create_file(mlx5_ib_dbg_cc_name[i], 433 debugfs_create_file(mlx5_ib_dbg_cc_name[i],
405 0600, dbg_cc_params->root, 434 0600, dbg_cc_params->root,
@@ -408,11 +437,17 @@ int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev)
408 if (!dbg_cc_params->params[i].dentry) 437 if (!dbg_cc_params->params[i].dentry)
409 goto err; 438 goto err;
410 } 439 }
411out: return 0; 440
441put_mdev:
442 mlx5_ib_put_native_port_mdev(dev, port_num + 1);
443out:
444 return 0;
412 445
413err: 446err:
414 mlx5_ib_warn(dev, "cong debugfs failure\n"); 447 mlx5_ib_warn(dev, "cong debugfs failure\n");
415 mlx5_ib_cleanup_cong_debugfs(dev); 448 mlx5_ib_cleanup_cong_debugfs(dev, port_num);
449 mlx5_ib_put_native_port_mdev(dev, port_num + 1);
450
416 /* 451 /*
417 * We don't want to fail driver if debugfs failed to initialize, 452 * We don't want to fail driver if debugfs failed to initialize,
418 * so we are not forwarding error to the user. 453 * so we are not forwarding error to the user.
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 18705cbcdc8c..5b974fb97611 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -1010,7 +1010,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
1010 MLX5_SET(cqc, cqc, uar_page, index); 1010 MLX5_SET(cqc, cqc, uar_page, index);
1011 MLX5_SET(cqc, cqc, c_eqn, eqn); 1011 MLX5_SET(cqc, cqc, c_eqn, eqn);
1012 MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma); 1012 MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
1013 if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN) 1013 if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
1014 MLX5_SET(cqc, cqc, oi, 1); 1014 MLX5_SET(cqc, cqc, oi, 1);
1015 1015
1016 err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen); 1016 err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen);
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 1003b0133a49..32a9e9228b13 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -197,10 +197,9 @@ static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt,
197 vl_15_dropped); 197 vl_15_dropped);
198} 198}
199 199
200static int process_pma_cmd(struct ib_device *ibdev, u8 port_num, 200static int process_pma_cmd(struct mlx5_core_dev *mdev, u8 port_num,
201 const struct ib_mad *in_mad, struct ib_mad *out_mad) 201 const struct ib_mad *in_mad, struct ib_mad *out_mad)
202{ 202{
203 struct mlx5_ib_dev *dev = to_mdev(ibdev);
204 int err; 203 int err;
205 void *out_cnt; 204 void *out_cnt;
206 205
@@ -222,7 +221,7 @@ static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
222 if (!out_cnt) 221 if (!out_cnt)
223 return IB_MAD_RESULT_FAILURE; 222 return IB_MAD_RESULT_FAILURE;
224 223
225 err = mlx5_core_query_vport_counter(dev->mdev, 0, 0, 224 err = mlx5_core_query_vport_counter(mdev, 0, 0,
226 port_num, out_cnt, sz); 225 port_num, out_cnt, sz);
227 if (!err) 226 if (!err)
228 pma_cnt_ext_assign(pma_cnt_ext, out_cnt); 227 pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
@@ -235,7 +234,7 @@ static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
235 if (!out_cnt) 234 if (!out_cnt)
236 return IB_MAD_RESULT_FAILURE; 235 return IB_MAD_RESULT_FAILURE;
237 236
238 err = mlx5_core_query_ib_ppcnt(dev->mdev, port_num, 237 err = mlx5_core_query_ib_ppcnt(mdev, port_num,
239 out_cnt, sz); 238 out_cnt, sz);
240 if (!err) 239 if (!err)
241 pma_cnt_assign(pma_cnt, out_cnt); 240 pma_cnt_assign(pma_cnt, out_cnt);
@@ -255,9 +254,11 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
255 u16 *out_mad_pkey_index) 254 u16 *out_mad_pkey_index)
256{ 255{
257 struct mlx5_ib_dev *dev = to_mdev(ibdev); 256 struct mlx5_ib_dev *dev = to_mdev(ibdev);
258 struct mlx5_core_dev *mdev = dev->mdev;
259 const struct ib_mad *in_mad = (const struct ib_mad *)in; 257 const struct ib_mad *in_mad = (const struct ib_mad *)in;
260 struct ib_mad *out_mad = (struct ib_mad *)out; 258 struct ib_mad *out_mad = (struct ib_mad *)out;
259 struct mlx5_core_dev *mdev;
260 u8 mdev_port_num;
261 int ret;
261 262
262 if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || 263 if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
263 *out_mad_size != sizeof(*out_mad))) 264 *out_mad_size != sizeof(*out_mad)))
@@ -265,14 +266,20 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
265 266
266 memset(out_mad->data, 0, sizeof(out_mad->data)); 267 memset(out_mad->data, 0, sizeof(out_mad->data));
267 268
269 mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
270 if (!mdev)
271 return IB_MAD_RESULT_FAILURE;
272
268 if (MLX5_CAP_GEN(mdev, vport_counters) && 273 if (MLX5_CAP_GEN(mdev, vport_counters) &&
269 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && 274 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
270 in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) { 275 in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) {
271 return process_pma_cmd(ibdev, port_num, in_mad, out_mad); 276 ret = process_pma_cmd(mdev, mdev_port_num, in_mad, out_mad);
272 } else { 277 } else {
273 return process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, 278 ret = process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
274 in_mad, out_mad); 279 in_mad, out_mad);
275 } 280 }
281 mlx5_ib_put_native_port_mdev(dev, port_num);
282 return ret;
276} 283}
277 284
278int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) 285int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
@@ -519,7 +526,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
519 int ext_active_speed; 526 int ext_active_speed;
520 int err = -ENOMEM; 527 int err = -ENOMEM;
521 528
522 if (port < 1 || port > MLX5_CAP_GEN(mdev, num_ports)) { 529 if (port < 1 || port > dev->num_ports) {
523 mlx5_ib_warn(dev, "invalid port number %d\n", port); 530 mlx5_ib_warn(dev, "invalid port number %d\n", port);
524 return -EINVAL; 531 return -EINVAL;
525 } 532 }
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 262c1aa2e028..4236c8086820 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -50,16 +50,14 @@
50#include <rdma/ib_cache.h> 50#include <rdma/ib_cache.h>
51#include <linux/mlx5/port.h> 51#include <linux/mlx5/port.h>
52#include <linux/mlx5/vport.h> 52#include <linux/mlx5/vport.h>
53#include <linux/mlx5/fs.h>
53#include <linux/list.h> 54#include <linux/list.h>
54#include <rdma/ib_smi.h> 55#include <rdma/ib_smi.h>
55#include <rdma/ib_umem.h> 56#include <rdma/ib_umem.h>
56#include <linux/in.h> 57#include <linux/in.h>
57#include <linux/etherdevice.h> 58#include <linux/etherdevice.h>
58#include <linux/mlx5/fs.h>
59#include <linux/mlx5/vport.h>
60#include "mlx5_ib.h" 59#include "mlx5_ib.h"
61#include "cmd.h" 60#include "cmd.h"
62#include <linux/mlx5/vport.h>
63 61
64#define DRIVER_NAME "mlx5_ib" 62#define DRIVER_NAME "mlx5_ib"
65#define DRIVER_VERSION "5.0-0" 63#define DRIVER_VERSION "5.0-0"
@@ -72,10 +70,36 @@ static char mlx5_version[] =
72 DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" 70 DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
73 DRIVER_VERSION "\n"; 71 DRIVER_VERSION "\n";
74 72
73struct mlx5_ib_event_work {
74 struct work_struct work;
75 struct mlx5_core_dev *dev;
76 void *context;
77 enum mlx5_dev_event event;
78 unsigned long param;
79};
80
75enum { 81enum {
76 MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3, 82 MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
77}; 83};
78 84
85static struct workqueue_struct *mlx5_ib_event_wq;
86static LIST_HEAD(mlx5_ib_unaffiliated_port_list);
87static LIST_HEAD(mlx5_ib_dev_list);
88/*
89 * This mutex should be held when accessing either of the above lists
90 */
91static DEFINE_MUTEX(mlx5_ib_multiport_mutex);
92
93struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi)
94{
95 struct mlx5_ib_dev *dev;
96
97 mutex_lock(&mlx5_ib_multiport_mutex);
98 dev = mpi->ibdev;
99 mutex_unlock(&mlx5_ib_multiport_mutex);
100 return dev;
101}
102
79static enum rdma_link_layer 103static enum rdma_link_layer
80mlx5_port_type_cap_to_rdma_ll(int port_type_cap) 104mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
81{ 105{
@@ -115,24 +139,32 @@ static int get_port_state(struct ib_device *ibdev,
115static int mlx5_netdev_event(struct notifier_block *this, 139static int mlx5_netdev_event(struct notifier_block *this,
116 unsigned long event, void *ptr) 140 unsigned long event, void *ptr)
117{ 141{
142 struct mlx5_roce *roce = container_of(this, struct mlx5_roce, nb);
118 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 143 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
119 struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev, 144 u8 port_num = roce->native_port_num;
120 roce.nb); 145 struct mlx5_core_dev *mdev;
146 struct mlx5_ib_dev *ibdev;
147
148 ibdev = roce->dev;
149 mdev = mlx5_ib_get_native_port_mdev(ibdev, port_num, NULL);
150 if (!mdev)
151 return NOTIFY_DONE;
121 152
122 switch (event) { 153 switch (event) {
123 case NETDEV_REGISTER: 154 case NETDEV_REGISTER:
124 case NETDEV_UNREGISTER: 155 case NETDEV_UNREGISTER:
125 write_lock(&ibdev->roce.netdev_lock); 156 write_lock(&roce->netdev_lock);
126 if (ndev->dev.parent == &ibdev->mdev->pdev->dev) 157
127 ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? 158 if (ndev->dev.parent == &mdev->pdev->dev)
128 NULL : ndev; 159 roce->netdev = (event == NETDEV_UNREGISTER) ?
129 write_unlock(&ibdev->roce.netdev_lock); 160 NULL : ndev;
161 write_unlock(&roce->netdev_lock);
130 break; 162 break;
131 163
132 case NETDEV_CHANGE: 164 case NETDEV_CHANGE:
133 case NETDEV_UP: 165 case NETDEV_UP:
134 case NETDEV_DOWN: { 166 case NETDEV_DOWN: {
135 struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev); 167 struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(mdev);
136 struct net_device *upper = NULL; 168 struct net_device *upper = NULL;
137 169
138 if (lag_ndev) { 170 if (lag_ndev) {
@@ -140,27 +172,28 @@ static int mlx5_netdev_event(struct notifier_block *this,
140 dev_put(lag_ndev); 172 dev_put(lag_ndev);
141 } 173 }
142 174
143 if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev)) 175 if ((upper == ndev || (!upper && ndev == roce->netdev))
144 && ibdev->ib_active) { 176 && ibdev->ib_active) {
145 struct ib_event ibev = { }; 177 struct ib_event ibev = { };
146 enum ib_port_state port_state; 178 enum ib_port_state port_state;
147 179
148 if (get_port_state(&ibdev->ib_dev, 1, &port_state)) 180 if (get_port_state(&ibdev->ib_dev, port_num,
149 return NOTIFY_DONE; 181 &port_state))
182 goto done;
150 183
151 if (ibdev->roce.last_port_state == port_state) 184 if (roce->last_port_state == port_state)
152 return NOTIFY_DONE; 185 goto done;
153 186
154 ibdev->roce.last_port_state = port_state; 187 roce->last_port_state = port_state;
155 ibev.device = &ibdev->ib_dev; 188 ibev.device = &ibdev->ib_dev;
156 if (port_state == IB_PORT_DOWN) 189 if (port_state == IB_PORT_DOWN)
157 ibev.event = IB_EVENT_PORT_ERR; 190 ibev.event = IB_EVENT_PORT_ERR;
158 else if (port_state == IB_PORT_ACTIVE) 191 else if (port_state == IB_PORT_ACTIVE)
159 ibev.event = IB_EVENT_PORT_ACTIVE; 192 ibev.event = IB_EVENT_PORT_ACTIVE;
160 else 193 else
161 return NOTIFY_DONE; 194 goto done;
162 195
163 ibev.element.port_num = 1; 196 ibev.element.port_num = port_num;
164 ib_dispatch_event(&ibev); 197 ib_dispatch_event(&ibev);
165 } 198 }
166 break; 199 break;
@@ -169,7 +202,8 @@ static int mlx5_netdev_event(struct notifier_block *this,
169 default: 202 default:
170 break; 203 break;
171 } 204 }
172 205done:
206 mlx5_ib_put_native_port_mdev(ibdev, port_num);
173 return NOTIFY_DONE; 207 return NOTIFY_DONE;
174} 208}
175 209
@@ -178,22 +212,88 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
178{ 212{
179 struct mlx5_ib_dev *ibdev = to_mdev(device); 213 struct mlx5_ib_dev *ibdev = to_mdev(device);
180 struct net_device *ndev; 214 struct net_device *ndev;
215 struct mlx5_core_dev *mdev;
181 216
182 ndev = mlx5_lag_get_roce_netdev(ibdev->mdev); 217 mdev = mlx5_ib_get_native_port_mdev(ibdev, port_num, NULL);
218 if (!mdev)
219 return NULL;
220
221 ndev = mlx5_lag_get_roce_netdev(mdev);
183 if (ndev) 222 if (ndev)
184 return ndev; 223 goto out;
185 224
186 /* Ensure ndev does not disappear before we invoke dev_hold() 225 /* Ensure ndev does not disappear before we invoke dev_hold()
187 */ 226 */
188 read_lock(&ibdev->roce.netdev_lock); 227 read_lock(&ibdev->roce[port_num - 1].netdev_lock);
189 ndev = ibdev->roce.netdev; 228 ndev = ibdev->roce[port_num - 1].netdev;
190 if (ndev) 229 if (ndev)
191 dev_hold(ndev); 230 dev_hold(ndev);
192 read_unlock(&ibdev->roce.netdev_lock); 231 read_unlock(&ibdev->roce[port_num - 1].netdev_lock);
193 232
233out:
234 mlx5_ib_put_native_port_mdev(ibdev, port_num);
194 return ndev; 235 return ndev;
195} 236}
196 237
238struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
239 u8 ib_port_num,
240 u8 *native_port_num)
241{
242 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev,
243 ib_port_num);
244 struct mlx5_core_dev *mdev = NULL;
245 struct mlx5_ib_multiport_info *mpi;
246 struct mlx5_ib_port *port;
247
248 if (native_port_num)
249 *native_port_num = 1;
250
251 if (!mlx5_core_mp_enabled(ibdev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
252 return ibdev->mdev;
253
254 port = &ibdev->port[ib_port_num - 1];
255 if (!port)
256 return NULL;
257
258 spin_lock(&port->mp.mpi_lock);
259 mpi = ibdev->port[ib_port_num - 1].mp.mpi;
260 if (mpi && !mpi->unaffiliate) {
261 mdev = mpi->mdev;
262 /* If it's the master no need to refcount, it'll exist
263 * as long as the ib_dev exists.
264 */
265 if (!mpi->is_master)
266 mpi->mdev_refcnt++;
267 }
268 spin_unlock(&port->mp.mpi_lock);
269
270 return mdev;
271}
272
273void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *ibdev, u8 port_num)
274{
275 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev,
276 port_num);
277 struct mlx5_ib_multiport_info *mpi;
278 struct mlx5_ib_port *port;
279
280 if (!mlx5_core_mp_enabled(ibdev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
281 return;
282
283 port = &ibdev->port[port_num - 1];
284
285 spin_lock(&port->mp.mpi_lock);
286 mpi = ibdev->port[port_num - 1].mp.mpi;
287 if (mpi->is_master)
288 goto out;
289
290 mpi->mdev_refcnt--;
291 if (mpi->unaffiliate)
292 complete(&mpi->unref_comp);
293out:
294 spin_unlock(&port->mp.mpi_lock);
295}
296
197static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, 297static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
198 u8 *active_width) 298 u8 *active_width)
199{ 299{
@@ -256,19 +356,33 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
256 struct ib_port_attr *props) 356 struct ib_port_attr *props)
257{ 357{
258 struct mlx5_ib_dev *dev = to_mdev(device); 358 struct mlx5_ib_dev *dev = to_mdev(device);
259 struct mlx5_core_dev *mdev = dev->mdev; 359 struct mlx5_core_dev *mdev;
260 struct net_device *ndev, *upper; 360 struct net_device *ndev, *upper;
261 enum ib_mtu ndev_ib_mtu; 361 enum ib_mtu ndev_ib_mtu;
362 bool put_mdev = true;
262 u16 qkey_viol_cntr; 363 u16 qkey_viol_cntr;
263 u32 eth_prot_oper; 364 u32 eth_prot_oper;
365 u8 mdev_port_num;
264 int err; 366 int err;
265 367
368 mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
369 if (!mdev) {
370 /* This means the port isn't affiliated yet. Get the
371 * info for the master port instead.
372 */
373 put_mdev = false;
374 mdev = dev->mdev;
375 mdev_port_num = 1;
376 port_num = 1;
377 }
378
266 /* Possible bad flows are checked before filling out props so in case 379 /* Possible bad flows are checked before filling out props so in case
267 * of an error it will still be zeroed out. 380 * of an error it will still be zeroed out.
268 */ 381 */
269 err = mlx5_query_port_eth_proto_oper(mdev, &eth_prot_oper, port_num); 382 err = mlx5_query_port_eth_proto_oper(mdev, &eth_prot_oper,
383 mdev_port_num);
270 if (err) 384 if (err)
271 return err; 385 goto out;
272 386
273 translate_eth_proto_oper(eth_prot_oper, &props->active_speed, 387 translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
274 &props->active_width); 388 &props->active_width);
@@ -284,12 +398,16 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
284 props->state = IB_PORT_DOWN; 398 props->state = IB_PORT_DOWN;
285 props->phys_state = 3; 399 props->phys_state = 3;
286 400
287 mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr); 401 mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr);
288 props->qkey_viol_cntr = qkey_viol_cntr; 402 props->qkey_viol_cntr = qkey_viol_cntr;
289 403
404 /* If this is a stub query for an unaffiliated port stop here */
405 if (!put_mdev)
406 goto out;
407
290 ndev = mlx5_ib_get_netdev(device, port_num); 408 ndev = mlx5_ib_get_netdev(device, port_num);
291 if (!ndev) 409 if (!ndev)
292 return 0; 410 goto out;
293 411
294 if (mlx5_lag_is_active(dev->mdev)) { 412 if (mlx5_lag_is_active(dev->mdev)) {
295 rcu_read_lock(); 413 rcu_read_lock();
@@ -312,7 +430,10 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
312 dev_put(ndev); 430 dev_put(ndev);
313 431
314 props->active_mtu = min(props->max_mtu, ndev_ib_mtu); 432 props->active_mtu = min(props->max_mtu, ndev_ib_mtu);
315 return 0; 433out:
434 if (put_mdev)
435 mlx5_ib_put_native_port_mdev(dev, port_num);
436 return err;
316} 437}
317 438
318static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, 439static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
@@ -354,7 +475,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
354 475
355 return mlx5_core_roce_gid_set(dev->mdev, index, roce_version, 476 return mlx5_core_roce_gid_set(dev->mdev, index, roce_version,
356 roce_l3_type, gid->raw, mac, vlan, 477 roce_l3_type, gid->raw, mac, vlan,
357 vlan_id); 478 vlan_id, port_num);
358} 479}
359 480
360static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num, 481static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
@@ -438,11 +559,11 @@ static int mlx5_get_vport_access_method(struct ib_device *ibdev)
438} 559}
439 560
440static void get_atomic_caps(struct mlx5_ib_dev *dev, 561static void get_atomic_caps(struct mlx5_ib_dev *dev,
562 u8 atomic_size_qp,
441 struct ib_device_attr *props) 563 struct ib_device_attr *props)
442{ 564{
443 u8 tmp; 565 u8 tmp;
444 u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations); 566 u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
445 u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
446 u8 atomic_req_8B_endianness_mode = 567 u8 atomic_req_8B_endianness_mode =
447 MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianness_mode); 568 MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianness_mode);
448 569
@@ -459,6 +580,29 @@ static void get_atomic_caps(struct mlx5_ib_dev *dev,
459 } 580 }
460} 581}
461 582
583static void get_atomic_caps_qp(struct mlx5_ib_dev *dev,
584 struct ib_device_attr *props)
585{
586 u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
587
588 get_atomic_caps(dev, atomic_size_qp, props);
589}
590
591static void get_atomic_caps_dc(struct mlx5_ib_dev *dev,
592 struct ib_device_attr *props)
593{
594 u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
595
596 get_atomic_caps(dev, atomic_size_qp, props);
597}
598
599bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev)
600{
601 struct ib_device_attr props = {};
602
603 get_atomic_caps_dc(dev, &props);
604 return (props.atomic_cap == IB_ATOMIC_HCA) ? true : false;
605}
462static int mlx5_query_system_image_guid(struct ib_device *ibdev, 606static int mlx5_query_system_image_guid(struct ib_device *ibdev,
463 __be64 *sys_image_guid) 607 __be64 *sys_image_guid)
464{ 608{
@@ -587,6 +731,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
587 int max_rq_sg; 731 int max_rq_sg;
588 int max_sq_sg; 732 int max_sq_sg;
589 u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); 733 u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
734 bool raw_support = !mlx5_core_mp_enabled(mdev);
590 struct mlx5_ib_query_device_resp resp = {}; 735 struct mlx5_ib_query_device_resp resp = {};
591 size_t resp_len; 736 size_t resp_len;
592 u64 max_tso; 737 u64 max_tso;
@@ -650,7 +795,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
650 if (MLX5_CAP_GEN(mdev, block_lb_mc)) 795 if (MLX5_CAP_GEN(mdev, block_lb_mc))
651 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; 796 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
652 797
653 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) { 798 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && raw_support) {
654 if (MLX5_CAP_ETH(mdev, csum_cap)) { 799 if (MLX5_CAP_ETH(mdev, csum_cap)) {
655 /* Legacy bit to support old userspace libraries */ 800 /* Legacy bit to support old userspace libraries */
656 props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; 801 props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
@@ -682,7 +827,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
682 MLX5_RX_HASH_SRC_PORT_TCP | 827 MLX5_RX_HASH_SRC_PORT_TCP |
683 MLX5_RX_HASH_DST_PORT_TCP | 828 MLX5_RX_HASH_DST_PORT_TCP |
684 MLX5_RX_HASH_SRC_PORT_UDP | 829 MLX5_RX_HASH_SRC_PORT_UDP |
685 MLX5_RX_HASH_DST_PORT_UDP; 830 MLX5_RX_HASH_DST_PORT_UDP |
831 MLX5_RX_HASH_INNER;
686 resp.response_length += sizeof(resp.rss_caps); 832 resp.response_length += sizeof(resp.rss_caps);
687 } 833 }
688 } else { 834 } else {
@@ -698,7 +844,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
698 } 844 }
699 845
700 if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) && 846 if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) &&
701 MLX5_CAP_GEN(dev->mdev, general_notification_event)) 847 MLX5_CAP_GEN(dev->mdev, general_notification_event) &&
848 raw_support)
702 props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP; 849 props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP;
703 850
704 if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && 851 if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
@@ -706,7 +853,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
706 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; 853 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
707 854
708 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && 855 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
709 MLX5_CAP_ETH(dev->mdev, scatter_fcs)) { 856 MLX5_CAP_ETH(dev->mdev, scatter_fcs) &&
857 raw_support) {
710 /* Legacy bit to support old userspace libraries */ 858 /* Legacy bit to support old userspace libraries */
711 props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS; 859 props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
712 props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS; 860 props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
@@ -746,7 +894,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
746 props->max_srq_sge = max_rq_sg - 1; 894 props->max_srq_sge = max_rq_sg - 1;
747 props->max_fast_reg_page_list_len = 895 props->max_fast_reg_page_list_len =
748 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size); 896 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
749 get_atomic_caps(dev, props); 897 get_atomic_caps_qp(dev, props);
750 props->masked_atomic_cap = IB_ATOMIC_NONE; 898 props->masked_atomic_cap = IB_ATOMIC_NONE;
751 props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); 899 props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
752 props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); 900 props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
@@ -770,7 +918,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
770 props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; 918 props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
771 919
772 if (mlx5_ib_port_link_layer(ibdev, 1) == 920 if (mlx5_ib_port_link_layer(ibdev, 1) ==
773 IB_LINK_LAYER_ETHERNET) { 921 IB_LINK_LAYER_ETHERNET && raw_support) {
774 props->rss_caps.max_rwq_indirection_tables = 922 props->rss_caps.max_rwq_indirection_tables =
775 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt); 923 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt);
776 props->rss_caps.max_rwq_indirection_table_size = 924 props->rss_caps.max_rwq_indirection_table_size =
@@ -807,7 +955,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
807 resp.response_length += sizeof(resp.cqe_comp_caps); 955 resp.response_length += sizeof(resp.cqe_comp_caps);
808 } 956 }
809 957
810 if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen)) { 958 if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen) &&
959 raw_support) {
811 if (MLX5_CAP_QOS(mdev, packet_pacing) && 960 if (MLX5_CAP_QOS(mdev, packet_pacing) &&
812 MLX5_CAP_GEN(mdev, qos)) { 961 MLX5_CAP_GEN(mdev, qos)) {
813 resp.packet_pacing_caps.qp_rate_limit_max = 962 resp.packet_pacing_caps.qp_rate_limit_max =
@@ -866,7 +1015,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
866 } 1015 }
867 } 1016 }
868 1017
869 if (field_avail(typeof(resp), striding_rq_caps, uhw->outlen)) { 1018 if (field_avail(typeof(resp), striding_rq_caps, uhw->outlen) &&
1019 raw_support) {
870 resp.response_length += sizeof(resp.striding_rq_caps); 1020 resp.response_length += sizeof(resp.striding_rq_caps);
871 if (MLX5_CAP_GEN(mdev, striding_rq)) { 1021 if (MLX5_CAP_GEN(mdev, striding_rq)) {
872 resp.striding_rq_caps.min_single_stride_log_num_of_bytes = 1022 resp.striding_rq_caps.min_single_stride_log_num_of_bytes =
@@ -1097,7 +1247,22 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
1097 } 1247 }
1098 1248
1099 if (!ret && props) { 1249 if (!ret && props) {
1100 count = mlx5_core_reserved_gids_count(to_mdev(ibdev)->mdev); 1250 struct mlx5_ib_dev *dev = to_mdev(ibdev);
1251 struct mlx5_core_dev *mdev;
1252 bool put_mdev = true;
1253
1254 mdev = mlx5_ib_get_native_port_mdev(dev, port, NULL);
1255 if (!mdev) {
1256 /* If the port isn't affiliated yet query the master.
1257 * The master and slave will have the same values.
1258 */
1259 mdev = dev->mdev;
1260 port = 1;
1261 put_mdev = false;
1262 }
1263 count = mlx5_core_reserved_gids_count(mdev);
1264 if (put_mdev)
1265 mlx5_ib_put_native_port_mdev(dev, port);
1101 props->gid_tbl_len -= count; 1266 props->gid_tbl_len -= count;
1102 } 1267 }
1103 return ret; 1268 return ret;
@@ -1122,20 +1287,43 @@ static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
1122 1287
1123} 1288}
1124 1289
1125static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 1290static int mlx5_query_hca_nic_pkey(struct ib_device *ibdev, u8 port,
1126 u16 *pkey) 1291 u16 index, u16 *pkey)
1127{ 1292{
1128 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1293 struct mlx5_ib_dev *dev = to_mdev(ibdev);
1129 struct mlx5_core_dev *mdev = dev->mdev; 1294 struct mlx5_core_dev *mdev;
1295 bool put_mdev = true;
1296 u8 mdev_port_num;
1297 int err;
1298
1299 mdev = mlx5_ib_get_native_port_mdev(dev, port, &mdev_port_num);
1300 if (!mdev) {
1301 /* The port isn't affiliated yet, get the PKey from the master
1302 * port. For RoCE the PKey tables will be the same.
1303 */
1304 put_mdev = false;
1305 mdev = dev->mdev;
1306 mdev_port_num = 1;
1307 }
1308
1309 err = mlx5_query_hca_vport_pkey(mdev, 0, mdev_port_num, 0,
1310 index, pkey);
1311 if (put_mdev)
1312 mlx5_ib_put_native_port_mdev(dev, port);
1130 1313
1314 return err;
1315}
1316
1317static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
1318 u16 *pkey)
1319{
1131 switch (mlx5_get_vport_access_method(ibdev)) { 1320 switch (mlx5_get_vport_access_method(ibdev)) {
1132 case MLX5_VPORT_ACCESS_METHOD_MAD: 1321 case MLX5_VPORT_ACCESS_METHOD_MAD:
1133 return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey); 1322 return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey);
1134 1323
1135 case MLX5_VPORT_ACCESS_METHOD_HCA: 1324 case MLX5_VPORT_ACCESS_METHOD_HCA:
1136 case MLX5_VPORT_ACCESS_METHOD_NIC: 1325 case MLX5_VPORT_ACCESS_METHOD_NIC:
1137 return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index, 1326 return mlx5_query_hca_nic_pkey(ibdev, port, index, pkey);
1138 pkey);
1139 default: 1327 default:
1140 return -EINVAL; 1328 return -EINVAL;
1141 } 1329 }
@@ -1174,23 +1362,32 @@ static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask,
1174 u32 value) 1362 u32 value)
1175{ 1363{
1176 struct mlx5_hca_vport_context ctx = {}; 1364 struct mlx5_hca_vport_context ctx = {};
1365 struct mlx5_core_dev *mdev;
1366 u8 mdev_port_num;
1177 int err; 1367 int err;
1178 1368
1179 err = mlx5_query_hca_vport_context(dev->mdev, 0, 1369 mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
1180 port_num, 0, &ctx); 1370 if (!mdev)
1371 return -ENODEV;
1372
1373 err = mlx5_query_hca_vport_context(mdev, 0, mdev_port_num, 0, &ctx);
1181 if (err) 1374 if (err)
1182 return err; 1375 goto out;
1183 1376
1184 if (~ctx.cap_mask1_perm & mask) { 1377 if (~ctx.cap_mask1_perm & mask) {
1185 mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n", 1378 mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n",
1186 mask, ctx.cap_mask1_perm); 1379 mask, ctx.cap_mask1_perm);
1187 return -EINVAL; 1380 err = -EINVAL;
1381 goto out;
1188 } 1382 }
1189 1383
1190 ctx.cap_mask1 = value; 1384 ctx.cap_mask1 = value;
1191 ctx.cap_mask1_perm = mask; 1385 ctx.cap_mask1_perm = mask;
1192 err = mlx5_core_modify_hca_vport_context(dev->mdev, 0, 1386 err = mlx5_core_modify_hca_vport_context(mdev, 0, mdev_port_num,
1193 port_num, 0, &ctx); 1387 0, &ctx);
1388
1389out:
1390 mlx5_ib_put_native_port_mdev(dev, port_num);
1194 1391
1195 return err; 1392 return err;
1196} 1393}
@@ -1241,9 +1438,18 @@ static void print_lib_caps(struct mlx5_ib_dev *dev, u64 caps)
1241 caps & MLX5_LIB_CAP_4K_UAR ? "y" : "n"); 1438 caps & MLX5_LIB_CAP_4K_UAR ? "y" : "n");
1242} 1439}
1243 1440
1441static u16 calc_dynamic_bfregs(int uars_per_sys_page)
1442{
1443 /* Large page with non 4k uar support might limit the dynamic size */
1444 if (uars_per_sys_page == 1 && PAGE_SIZE > 4096)
1445 return MLX5_MIN_DYN_BFREGS;
1446
1447 return MLX5_MAX_DYN_BFREGS;
1448}
1449
1244static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k, 1450static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k,
1245 struct mlx5_ib_alloc_ucontext_req_v2 *req, 1451 struct mlx5_ib_alloc_ucontext_req_v2 *req,
1246 u32 *num_sys_pages) 1452 struct mlx5_bfreg_info *bfregi)
1247{ 1453{
1248 int uars_per_sys_page; 1454 int uars_per_sys_page;
1249 int bfregs_per_sys_page; 1455 int bfregs_per_sys_page;
@@ -1260,16 +1466,21 @@ static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k,
1260 1466
1261 uars_per_sys_page = get_uars_per_sys_page(dev, lib_uar_4k); 1467 uars_per_sys_page = get_uars_per_sys_page(dev, lib_uar_4k);
1262 bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR; 1468 bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR;
1469 /* This holds the required static allocation asked by the user */
1263 req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page); 1470 req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page);
1264 *num_sys_pages = req->total_num_bfregs / bfregs_per_sys_page;
1265
1266 if (req->num_low_latency_bfregs > req->total_num_bfregs - 1) 1471 if (req->num_low_latency_bfregs > req->total_num_bfregs - 1)
1267 return -EINVAL; 1472 return -EINVAL;
1268 1473
1269 mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, using %d sys pages\n", 1474 bfregi->num_static_sys_pages = req->total_num_bfregs / bfregs_per_sys_page;
1475 bfregi->num_dyn_bfregs = ALIGN(calc_dynamic_bfregs(uars_per_sys_page), bfregs_per_sys_page);
1476 bfregi->total_num_bfregs = req->total_num_bfregs + bfregi->num_dyn_bfregs;
1477 bfregi->num_sys_pages = bfregi->total_num_bfregs / bfregs_per_sys_page;
1478
1479 mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, total bfregs %d, using %d sys pages\n",
1270 MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no", 1480 MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no",
1271 lib_uar_4k ? "yes" : "no", ref_bfregs, 1481 lib_uar_4k ? "yes" : "no", ref_bfregs,
1272 req->total_num_bfregs, *num_sys_pages); 1482 req->total_num_bfregs, bfregi->total_num_bfregs,
1483 bfregi->num_sys_pages);
1273 1484
1274 return 0; 1485 return 0;
1275} 1486}
@@ -1281,13 +1492,17 @@ static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *conte
1281 int i; 1492 int i;
1282 1493
1283 bfregi = &context->bfregi; 1494 bfregi = &context->bfregi;
1284 for (i = 0; i < bfregi->num_sys_pages; i++) { 1495 for (i = 0; i < bfregi->num_static_sys_pages; i++) {
1285 err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]); 1496 err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]);
1286 if (err) 1497 if (err)
1287 goto error; 1498 goto error;
1288 1499
1289 mlx5_ib_dbg(dev, "allocated uar %d\n", bfregi->sys_pages[i]); 1500 mlx5_ib_dbg(dev, "allocated uar %d\n", bfregi->sys_pages[i]);
1290 } 1501 }
1502
1503 for (i = bfregi->num_static_sys_pages; i < bfregi->num_sys_pages; i++)
1504 bfregi->sys_pages[i] = MLX5_IB_INVALID_UAR_INDEX;
1505
1291 return 0; 1506 return 0;
1292 1507
1293error: 1508error:
@@ -1306,12 +1521,16 @@ static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *con
1306 1521
1307 bfregi = &context->bfregi; 1522 bfregi = &context->bfregi;
1308 for (i = 0; i < bfregi->num_sys_pages; i++) { 1523 for (i = 0; i < bfregi->num_sys_pages; i++) {
1309 err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); 1524 if (i < bfregi->num_static_sys_pages ||
1310 if (err) { 1525 bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) {
1311 mlx5_ib_warn(dev, "failed to free uar %d\n", i); 1526 err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
1312 return err; 1527 if (err) {
1528 mlx5_ib_warn(dev, "failed to free uar %d, err=%d\n", i, err);
1529 return err;
1530 }
1313 } 1531 }
1314 } 1532 }
1533
1315 return 0; 1534 return 0;
1316} 1535}
1317 1536
@@ -1362,6 +1581,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
1362 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1581 struct mlx5_ib_dev *dev = to_mdev(ibdev);
1363 struct mlx5_ib_alloc_ucontext_req_v2 req = {}; 1582 struct mlx5_ib_alloc_ucontext_req_v2 req = {};
1364 struct mlx5_ib_alloc_ucontext_resp resp = {}; 1583 struct mlx5_ib_alloc_ucontext_resp resp = {};
1584 struct mlx5_core_dev *mdev = dev->mdev;
1365 struct mlx5_ib_ucontext *context; 1585 struct mlx5_ib_ucontext *context;
1366 struct mlx5_bfreg_info *bfregi; 1586 struct mlx5_bfreg_info *bfregi;
1367 int ver; 1587 int ver;
@@ -1422,13 +1642,13 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
1422 bfregi = &context->bfregi; 1642 bfregi = &context->bfregi;
1423 1643
1424 /* updates req->total_num_bfregs */ 1644 /* updates req->total_num_bfregs */
1425 err = calc_total_bfregs(dev, lib_uar_4k, &req, &bfregi->num_sys_pages); 1645 err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi);
1426 if (err) 1646 if (err)
1427 goto out_ctx; 1647 goto out_ctx;
1428 1648
1429 mutex_init(&bfregi->lock); 1649 mutex_init(&bfregi->lock);
1430 bfregi->lib_uar_4k = lib_uar_4k; 1650 bfregi->lib_uar_4k = lib_uar_4k;
1431 bfregi->count = kcalloc(req.total_num_bfregs, sizeof(*bfregi->count), 1651 bfregi->count = kcalloc(bfregi->total_num_bfregs, sizeof(*bfregi->count),
1432 GFP_KERNEL); 1652 GFP_KERNEL);
1433 if (!bfregi->count) { 1653 if (!bfregi->count) {
1434 err = -ENOMEM; 1654 err = -ENOMEM;
@@ -1470,7 +1690,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
1470 mutex_init(&context->db_page_mutex); 1690 mutex_init(&context->db_page_mutex);
1471 1691
1472 resp.tot_bfregs = req.total_num_bfregs; 1692 resp.tot_bfregs = req.total_num_bfregs;
1473 resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports); 1693 resp.num_ports = dev->num_ports;
1474 1694
1475 if (field_avail(typeof(resp), cqe_version, udata->outlen)) 1695 if (field_avail(typeof(resp), cqe_version, udata->outlen))
1476 resp.response_length += sizeof(resp.cqe_version); 1696 resp.response_length += sizeof(resp.cqe_version);
@@ -1489,6 +1709,12 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
1489 resp.response_length += sizeof(resp.eth_min_inline); 1709 resp.response_length += sizeof(resp.eth_min_inline);
1490 } 1710 }
1491 1711
1712 if (field_avail(typeof(resp), clock_info_versions, udata->outlen)) {
1713 if (mdev->clock_info)
1714 resp.clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1);
1715 resp.response_length += sizeof(resp.clock_info_versions);
1716 }
1717
1492 /* 1718 /*
1493 * We don't want to expose information from the PCI bar that is located 1719 * We don't want to expose information from the PCI bar that is located
1494 * after 4096 bytes, so if the arch only supports larger pages, let's 1720 * after 4096 bytes, so if the arch only supports larger pages, let's
@@ -1502,8 +1728,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
1502 resp.hca_core_clock_offset = 1728 resp.hca_core_clock_offset =
1503 offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE; 1729 offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE;
1504 } 1730 }
1505 resp.response_length += sizeof(resp.hca_core_clock_offset) + 1731 resp.response_length += sizeof(resp.hca_core_clock_offset);
1506 sizeof(resp.reserved2);
1507 } 1732 }
1508 1733
1509 if (field_avail(typeof(resp), log_uar_size, udata->outlen)) 1734 if (field_avail(typeof(resp), log_uar_size, udata->outlen))
@@ -1512,6 +1737,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
1512 if (field_avail(typeof(resp), num_uars_per_page, udata->outlen)) 1737 if (field_avail(typeof(resp), num_uars_per_page, udata->outlen))
1513 resp.response_length += sizeof(resp.num_uars_per_page); 1738 resp.response_length += sizeof(resp.num_uars_per_page);
1514 1739
1740 if (field_avail(typeof(resp), num_dyn_bfregs, udata->outlen)) {
1741 resp.num_dyn_bfregs = bfregi->num_dyn_bfregs;
1742 resp.response_length += sizeof(resp.num_dyn_bfregs);
1743 }
1744
1515 err = ib_copy_to_udata(udata, &resp, resp.response_length); 1745 err = ib_copy_to_udata(udata, &resp, resp.response_length);
1516 if (err) 1746 if (err)
1517 goto out_td; 1747 goto out_td;
@@ -1566,15 +1796,13 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
1566} 1796}
1567 1797
1568static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, 1798static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
1569 struct mlx5_bfreg_info *bfregi, 1799 int uar_idx)
1570 int idx)
1571{ 1800{
1572 int fw_uars_per_page; 1801 int fw_uars_per_page;
1573 1802
1574 fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1; 1803 fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1;
1575 1804
1576 return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + 1805 return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + uar_idx / fw_uars_per_page;
1577 bfregi->sys_pages[idx] / fw_uars_per_page;
1578} 1806}
1579 1807
1580static int get_command(unsigned long offset) 1808static int get_command(unsigned long offset)
@@ -1592,6 +1820,12 @@ static int get_index(unsigned long offset)
1592 return get_arg(offset); 1820 return get_arg(offset);
1593} 1821}
1594 1822
1823/* Index resides in an extra byte to enable larger values than 255 */
1824static int get_extended_index(unsigned long offset)
1825{
1826 return get_arg(offset) | ((offset >> 16) & 0xff) << 8;
1827}
1828
1595static void mlx5_ib_vma_open(struct vm_area_struct *area) 1829static void mlx5_ib_vma_open(struct vm_area_struct *area)
1596{ 1830{
1597 /* vma_open is called when a new VMA is created on top of our VMA. This 1831 /* vma_open is called when a new VMA is created on top of our VMA. This
@@ -1733,6 +1967,38 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
1733 } 1967 }
1734} 1968}
1735 1969
1970static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
1971 struct vm_area_struct *vma,
1972 struct mlx5_ib_ucontext *context)
1973{
1974 phys_addr_t pfn;
1975 int err;
1976
1977 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1978 return -EINVAL;
1979
1980 if (get_index(vma->vm_pgoff) != MLX5_IB_CLOCK_INFO_V1)
1981 return -EOPNOTSUPP;
1982
1983 if (vma->vm_flags & VM_WRITE)
1984 return -EPERM;
1985
1986 if (!dev->mdev->clock_info_page)
1987 return -EOPNOTSUPP;
1988
1989 pfn = page_to_pfn(dev->mdev->clock_info_page);
1990 err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE,
1991 vma->vm_page_prot);
1992 if (err)
1993 return err;
1994
1995 mlx5_ib_dbg(dev, "mapped clock info at 0x%lx, PA 0x%llx\n",
1996 vma->vm_start,
1997 (unsigned long long)pfn << PAGE_SHIFT);
1998
1999 return mlx5_ib_set_vma_data(vma, context);
2000}
2001
1736static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, 2002static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
1737 struct vm_area_struct *vma, 2003 struct vm_area_struct *vma,
1738 struct mlx5_ib_ucontext *context) 2004 struct mlx5_ib_ucontext *context)
@@ -1742,21 +2008,29 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
1742 unsigned long idx; 2008 unsigned long idx;
1743 phys_addr_t pfn, pa; 2009 phys_addr_t pfn, pa;
1744 pgprot_t prot; 2010 pgprot_t prot;
1745 int uars_per_page; 2011 u32 bfreg_dyn_idx = 0;
2012 u32 uar_index;
2013 int dyn_uar = (cmd == MLX5_IB_MMAP_ALLOC_WC);
2014 int max_valid_idx = dyn_uar ? bfregi->num_sys_pages :
2015 bfregi->num_static_sys_pages;
1746 2016
1747 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 2017 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1748 return -EINVAL; 2018 return -EINVAL;
1749 2019
1750 uars_per_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k); 2020 if (dyn_uar)
1751 idx = get_index(vma->vm_pgoff); 2021 idx = get_extended_index(vma->vm_pgoff) + bfregi->num_static_sys_pages;
1752 if (idx % uars_per_page || 2022 else
1753 idx * uars_per_page >= bfregi->num_sys_pages) { 2023 idx = get_index(vma->vm_pgoff);
1754 mlx5_ib_warn(dev, "invalid uar index %lu\n", idx); 2024
2025 if (idx >= max_valid_idx) {
2026 mlx5_ib_warn(dev, "invalid uar index %lu, max=%d\n",
2027 idx, max_valid_idx);
1755 return -EINVAL; 2028 return -EINVAL;
1756 } 2029 }
1757 2030
1758 switch (cmd) { 2031 switch (cmd) {
1759 case MLX5_IB_MMAP_WC_PAGE: 2032 case MLX5_IB_MMAP_WC_PAGE:
2033 case MLX5_IB_MMAP_ALLOC_WC:
1760/* Some architectures don't support WC memory */ 2034/* Some architectures don't support WC memory */
1761#if defined(CONFIG_X86) 2035#if defined(CONFIG_X86)
1762 if (!pat_enabled()) 2036 if (!pat_enabled())
@@ -1776,7 +2050,40 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
1776 return -EINVAL; 2050 return -EINVAL;
1777 } 2051 }
1778 2052
1779 pfn = uar_index2pfn(dev, bfregi, idx); 2053 if (dyn_uar) {
2054 int uars_per_page;
2055
2056 uars_per_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k);
2057 bfreg_dyn_idx = idx * (uars_per_page * MLX5_NON_FP_BFREGS_PER_UAR);
2058 if (bfreg_dyn_idx >= bfregi->total_num_bfregs) {
2059 mlx5_ib_warn(dev, "invalid bfreg_dyn_idx %u, max=%u\n",
2060 bfreg_dyn_idx, bfregi->total_num_bfregs);
2061 return -EINVAL;
2062 }
2063
2064 mutex_lock(&bfregi->lock);
2065 /* Fail if uar already allocated, first bfreg index of each
2066 * page holds its count.
2067 */
2068 if (bfregi->count[bfreg_dyn_idx]) {
2069 mlx5_ib_warn(dev, "wrong offset, idx %lu is busy, bfregn=%u\n", idx, bfreg_dyn_idx);
2070 mutex_unlock(&bfregi->lock);
2071 return -EINVAL;
2072 }
2073
2074 bfregi->count[bfreg_dyn_idx]++;
2075 mutex_unlock(&bfregi->lock);
2076
2077 err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index);
2078 if (err) {
2079 mlx5_ib_warn(dev, "UAR alloc failed\n");
2080 goto free_bfreg;
2081 }
2082 } else {
2083 uar_index = bfregi->sys_pages[idx];
2084 }
2085
2086 pfn = uar_index2pfn(dev, uar_index);
1780 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn); 2087 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
1781 2088
1782 vma->vm_page_prot = prot; 2089 vma->vm_page_prot = prot;
@@ -1785,14 +2092,32 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
1785 if (err) { 2092 if (err) {
1786 mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n", 2093 mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n",
1787 err, vma->vm_start, &pfn, mmap_cmd2str(cmd)); 2094 err, vma->vm_start, &pfn, mmap_cmd2str(cmd));
1788 return -EAGAIN; 2095 err = -EAGAIN;
2096 goto err;
1789 } 2097 }
1790 2098
1791 pa = pfn << PAGE_SHIFT; 2099 pa = pfn << PAGE_SHIFT;
1792 mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd), 2100 mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd),
1793 vma->vm_start, &pa); 2101 vma->vm_start, &pa);
1794 2102
1795 return mlx5_ib_set_vma_data(vma, context); 2103 err = mlx5_ib_set_vma_data(vma, context);
2104 if (err)
2105 goto err;
2106
2107 if (dyn_uar)
2108 bfregi->sys_pages[idx] = uar_index;
2109 return 0;
2110
2111err:
2112 if (!dyn_uar)
2113 return err;
2114
2115 mlx5_cmd_free_uar(dev->mdev, idx);
2116
2117free_bfreg:
2118 mlx5_ib_free_bfreg(dev, bfregi, bfreg_dyn_idx);
2119
2120 return err;
1796} 2121}
1797 2122
1798static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) 2123static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
@@ -1807,6 +2132,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
1807 case MLX5_IB_MMAP_WC_PAGE: 2132 case MLX5_IB_MMAP_WC_PAGE:
1808 case MLX5_IB_MMAP_NC_PAGE: 2133 case MLX5_IB_MMAP_NC_PAGE:
1809 case MLX5_IB_MMAP_REGULAR_PAGE: 2134 case MLX5_IB_MMAP_REGULAR_PAGE:
2135 case MLX5_IB_MMAP_ALLOC_WC:
1810 return uar_mmap(dev, command, vma, context); 2136 return uar_mmap(dev, command, vma, context);
1811 2137
1812 case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: 2138 case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
@@ -1835,6 +2161,8 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
1835 vma->vm_start, 2161 vma->vm_start,
1836 (unsigned long long)pfn << PAGE_SHIFT); 2162 (unsigned long long)pfn << PAGE_SHIFT);
1837 break; 2163 break;
2164 case MLX5_IB_MMAP_CLOCK_INFO:
2165 return mlx5_ib_mmap_clock_info_page(dev, vma, context);
1838 2166
1839 default: 2167 default:
1840 return -EINVAL; 2168 return -EINVAL;
@@ -2663,7 +2991,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
2663 return ERR_PTR(-ENOMEM); 2991 return ERR_PTR(-ENOMEM);
2664 2992
2665 if (domain != IB_FLOW_DOMAIN_USER || 2993 if (domain != IB_FLOW_DOMAIN_USER ||
2666 flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) || 2994 flow_attr->port > dev->num_ports ||
2667 (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)) 2995 (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP))
2668 return ERR_PTR(-EINVAL); 2996 return ERR_PTR(-EINVAL);
2669 2997
@@ -2928,15 +3256,24 @@ static void delay_drop_handler(struct work_struct *work)
2928 mutex_unlock(&delay_drop->lock); 3256 mutex_unlock(&delay_drop->lock);
2929} 3257}
2930 3258
2931static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, 3259static void mlx5_ib_handle_event(struct work_struct *_work)
2932 enum mlx5_dev_event event, unsigned long param)
2933{ 3260{
2934 struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context; 3261 struct mlx5_ib_event_work *work =
3262 container_of(_work, struct mlx5_ib_event_work, work);
3263 struct mlx5_ib_dev *ibdev;
2935 struct ib_event ibev; 3264 struct ib_event ibev;
2936 bool fatal = false; 3265 bool fatal = false;
2937 u8 port = 0; 3266 u8 port = 0;
2938 3267
2939 switch (event) { 3268 if (mlx5_core_is_mp_slave(work->dev)) {
3269 ibdev = mlx5_ib_get_ibdev_from_mpi(work->context);
3270 if (!ibdev)
3271 goto out;
3272 } else {
3273 ibdev = work->context;
3274 }
3275
3276 switch (work->event) {
2940 case MLX5_DEV_EVENT_SYS_ERROR: 3277 case MLX5_DEV_EVENT_SYS_ERROR:
2941 ibev.event = IB_EVENT_DEVICE_FATAL; 3278 ibev.event = IB_EVENT_DEVICE_FATAL;
2942 mlx5_ib_handle_internal_error(ibdev); 3279 mlx5_ib_handle_internal_error(ibdev);
@@ -2946,39 +3283,39 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
2946 case MLX5_DEV_EVENT_PORT_UP: 3283 case MLX5_DEV_EVENT_PORT_UP:
2947 case MLX5_DEV_EVENT_PORT_DOWN: 3284 case MLX5_DEV_EVENT_PORT_DOWN:
2948 case MLX5_DEV_EVENT_PORT_INITIALIZED: 3285 case MLX5_DEV_EVENT_PORT_INITIALIZED:
2949 port = (u8)param; 3286 port = (u8)work->param;
2950 3287
2951 /* In RoCE, port up/down events are handled in 3288 /* In RoCE, port up/down events are handled in
2952 * mlx5_netdev_event(). 3289 * mlx5_netdev_event().
2953 */ 3290 */
2954 if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == 3291 if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
2955 IB_LINK_LAYER_ETHERNET) 3292 IB_LINK_LAYER_ETHERNET)
2956 return; 3293 goto out;
2957 3294
2958 ibev.event = (event == MLX5_DEV_EVENT_PORT_UP) ? 3295 ibev.event = (work->event == MLX5_DEV_EVENT_PORT_UP) ?
2959 IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; 3296 IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
2960 break; 3297 break;
2961 3298
2962 case MLX5_DEV_EVENT_LID_CHANGE: 3299 case MLX5_DEV_EVENT_LID_CHANGE:
2963 ibev.event = IB_EVENT_LID_CHANGE; 3300 ibev.event = IB_EVENT_LID_CHANGE;
2964 port = (u8)param; 3301 port = (u8)work->param;
2965 break; 3302 break;
2966 3303
2967 case MLX5_DEV_EVENT_PKEY_CHANGE: 3304 case MLX5_DEV_EVENT_PKEY_CHANGE:
2968 ibev.event = IB_EVENT_PKEY_CHANGE; 3305 ibev.event = IB_EVENT_PKEY_CHANGE;
2969 port = (u8)param; 3306 port = (u8)work->param;
2970 3307
2971 schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); 3308 schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
2972 break; 3309 break;
2973 3310
2974 case MLX5_DEV_EVENT_GUID_CHANGE: 3311 case MLX5_DEV_EVENT_GUID_CHANGE:
2975 ibev.event = IB_EVENT_GID_CHANGE; 3312 ibev.event = IB_EVENT_GID_CHANGE;
2976 port = (u8)param; 3313 port = (u8)work->param;
2977 break; 3314 break;
2978 3315
2979 case MLX5_DEV_EVENT_CLIENT_REREG: 3316 case MLX5_DEV_EVENT_CLIENT_REREG:
2980 ibev.event = IB_EVENT_CLIENT_REREGISTER; 3317 ibev.event = IB_EVENT_CLIENT_REREGISTER;
2981 port = (u8)param; 3318 port = (u8)work->param;
2982 break; 3319 break;
2983 case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT: 3320 case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
2984 schedule_work(&ibdev->delay_drop.delay_drop_work); 3321 schedule_work(&ibdev->delay_drop.delay_drop_work);
@@ -3000,9 +3337,26 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
3000 3337
3001 if (fatal) 3338 if (fatal)
3002 ibdev->ib_active = false; 3339 ibdev->ib_active = false;
3003
3004out: 3340out:
3005 return; 3341 kfree(work);
3342}
3343
3344static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
3345 enum mlx5_dev_event event, unsigned long param)
3346{
3347 struct mlx5_ib_event_work *work;
3348
3349 work = kmalloc(sizeof(*work), GFP_ATOMIC);
3350 if (!work)
3351 return;
3352
3353 INIT_WORK(&work->work, mlx5_ib_handle_event);
3354 work->dev = dev;
3355 work->param = param;
3356 work->context = context;
3357 work->event = event;
3358
3359 queue_work(mlx5_ib_event_wq, &work->work);
3006} 3360}
3007 3361
3008static int set_has_smi_cap(struct mlx5_ib_dev *dev) 3362static int set_has_smi_cap(struct mlx5_ib_dev *dev)
@@ -3011,7 +3365,7 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev)
3011 int err; 3365 int err;
3012 int port; 3366 int port;
3013 3367
3014 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) { 3368 for (port = 1; port <= dev->num_ports; port++) {
3015 dev->mdev->port_caps[port - 1].has_smi = false; 3369 dev->mdev->port_caps[port - 1].has_smi = false;
3016 if (MLX5_CAP_GEN(dev->mdev, port_type) == 3370 if (MLX5_CAP_GEN(dev->mdev, port_type) ==
3017 MLX5_CAP_PORT_TYPE_IB) { 3371 MLX5_CAP_PORT_TYPE_IB) {
@@ -3038,16 +3392,15 @@ static void get_ext_port_caps(struct mlx5_ib_dev *dev)
3038{ 3392{
3039 int port; 3393 int port;
3040 3394
3041 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) 3395 for (port = 1; port <= dev->num_ports; port++)
3042 mlx5_query_ext_port_caps(dev, port); 3396 mlx5_query_ext_port_caps(dev, port);
3043} 3397}
3044 3398
3045static int get_port_caps(struct mlx5_ib_dev *dev) 3399static int get_port_caps(struct mlx5_ib_dev *dev, u8 port)
3046{ 3400{
3047 struct ib_device_attr *dprops = NULL; 3401 struct ib_device_attr *dprops = NULL;
3048 struct ib_port_attr *pprops = NULL; 3402 struct ib_port_attr *pprops = NULL;
3049 int err = -ENOMEM; 3403 int err = -ENOMEM;
3050 int port;
3051 struct ib_udata uhw = {.inlen = 0, .outlen = 0}; 3404 struct ib_udata uhw = {.inlen = 0, .outlen = 0};
3052 3405
3053 pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); 3406 pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
@@ -3068,22 +3421,21 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
3068 goto out; 3421 goto out;
3069 } 3422 }
3070 3423
3071 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) { 3424 memset(pprops, 0, sizeof(*pprops));
3072 memset(pprops, 0, sizeof(*pprops)); 3425 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
3073 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); 3426 if (err) {
3074 if (err) { 3427 mlx5_ib_warn(dev, "query_port %d failed %d\n",
3075 mlx5_ib_warn(dev, "query_port %d failed %d\n", 3428 port, err);
3076 port, err); 3429 goto out;
3077 break;
3078 }
3079 dev->mdev->port_caps[port - 1].pkey_table_len =
3080 dprops->max_pkeys;
3081 dev->mdev->port_caps[port - 1].gid_table_len =
3082 pprops->gid_tbl_len;
3083 mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
3084 dprops->max_pkeys, pprops->gid_tbl_len);
3085 } 3430 }
3086 3431
3432 dev->mdev->port_caps[port - 1].pkey_table_len =
3433 dprops->max_pkeys;
3434 dev->mdev->port_caps[port - 1].gid_table_len =
3435 pprops->gid_tbl_len;
3436 mlx5_ib_dbg(dev, "port %d: pkey_table_len %d, gid_table_len %d\n",
3437 port, dprops->max_pkeys, pprops->gid_tbl_len);
3438
3087out: 3439out:
3088 kfree(pprops); 3440 kfree(pprops);
3089 kfree(dprops); 3441 kfree(dprops);
@@ -3373,12 +3725,14 @@ static u32 get_core_cap_flags(struct ib_device *ibdev)
3373 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1); 3725 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
3374 u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type); 3726 u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
3375 u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version); 3727 u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
3728 bool raw_support = !mlx5_core_mp_enabled(dev->mdev);
3376 u32 ret = 0; 3729 u32 ret = 0;
3377 3730
3378 if (ll == IB_LINK_LAYER_INFINIBAND) 3731 if (ll == IB_LINK_LAYER_INFINIBAND)
3379 return RDMA_CORE_PORT_IBA_IB; 3732 return RDMA_CORE_PORT_IBA_IB;
3380 3733
3381 ret = RDMA_CORE_PORT_RAW_PACKET; 3734 if (raw_support)
3735 ret = RDMA_CORE_PORT_RAW_PACKET;
3382 3736
3383 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP)) 3737 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
3384 return ret; 3738 return ret;
@@ -3468,33 +3822,33 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
3468 } 3822 }
3469} 3823}
3470 3824
3471static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev) 3825static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
3472{ 3826{
3473 int err; 3827 int err;
3474 3828
3475 dev->roce.nb.notifier_call = mlx5_netdev_event; 3829 dev->roce[port_num].nb.notifier_call = mlx5_netdev_event;
3476 err = register_netdevice_notifier(&dev->roce.nb); 3830 err = register_netdevice_notifier(&dev->roce[port_num].nb);
3477 if (err) { 3831 if (err) {
3478 dev->roce.nb.notifier_call = NULL; 3832 dev->roce[port_num].nb.notifier_call = NULL;
3479 return err; 3833 return err;
3480 } 3834 }
3481 3835
3482 return 0; 3836 return 0;
3483} 3837}
3484 3838
3485static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev) 3839static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
3486{ 3840{
3487 if (dev->roce.nb.notifier_call) { 3841 if (dev->roce[port_num].nb.notifier_call) {
3488 unregister_netdevice_notifier(&dev->roce.nb); 3842 unregister_netdevice_notifier(&dev->roce[port_num].nb);
3489 dev->roce.nb.notifier_call = NULL; 3843 dev->roce[port_num].nb.notifier_call = NULL;
3490 } 3844 }
3491} 3845}
3492 3846
3493static int mlx5_enable_eth(struct mlx5_ib_dev *dev) 3847static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num)
3494{ 3848{
3495 int err; 3849 int err;
3496 3850
3497 err = mlx5_add_netdev_notifier(dev); 3851 err = mlx5_add_netdev_notifier(dev, port_num);
3498 if (err) 3852 if (err)
3499 return err; 3853 return err;
3500 3854
@@ -3515,7 +3869,7 @@ err_disable_roce:
3515 mlx5_nic_vport_disable_roce(dev->mdev); 3869 mlx5_nic_vport_disable_roce(dev->mdev);
3516 3870
3517err_unregister_netdevice_notifier: 3871err_unregister_netdevice_notifier:
3518 mlx5_remove_netdev_notifier(dev); 3872 mlx5_remove_netdev_notifier(dev, port_num);
3519 return err; 3873 return err;
3520} 3874}
3521 3875
@@ -3577,11 +3931,12 @@ static const struct mlx5_ib_counter extended_err_cnts[] = {
3577 3931
3578static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) 3932static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
3579{ 3933{
3580 unsigned int i; 3934 int i;
3581 3935
3582 for (i = 0; i < dev->num_ports; i++) { 3936 for (i = 0; i < dev->num_ports; i++) {
3583 mlx5_core_dealloc_q_counter(dev->mdev, 3937 if (dev->port[i].cnts.set_id)
3584 dev->port[i].cnts.set_id); 3938 mlx5_core_dealloc_q_counter(dev->mdev,
3939 dev->port[i].cnts.set_id);
3585 kfree(dev->port[i].cnts.names); 3940 kfree(dev->port[i].cnts.names);
3586 kfree(dev->port[i].cnts.offsets); 3941 kfree(dev->port[i].cnts.offsets);
3587 } 3942 }
@@ -3623,6 +3978,7 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
3623 3978
3624err_names: 3979err_names:
3625 kfree(cnts->names); 3980 kfree(cnts->names);
3981 cnts->names = NULL;
3626 return -ENOMEM; 3982 return -ENOMEM;
3627} 3983}
3628 3984
@@ -3669,37 +4025,33 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
3669 4025
3670static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) 4026static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
3671{ 4027{
4028 int err = 0;
3672 int i; 4029 int i;
3673 int ret;
3674 4030
3675 for (i = 0; i < dev->num_ports; i++) { 4031 for (i = 0; i < dev->num_ports; i++) {
3676 struct mlx5_ib_port *port = &dev->port[i]; 4032 err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
4033 if (err)
4034 goto err_alloc;
4035
4036 mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
4037 dev->port[i].cnts.offsets);
3677 4038
3678 ret = mlx5_core_alloc_q_counter(dev->mdev, 4039 err = mlx5_core_alloc_q_counter(dev->mdev,
3679 &port->cnts.set_id); 4040 &dev->port[i].cnts.set_id);
3680 if (ret) { 4041 if (err) {
3681 mlx5_ib_warn(dev, 4042 mlx5_ib_warn(dev,
3682 "couldn't allocate queue counter for port %d, err %d\n", 4043 "couldn't allocate queue counter for port %d, err %d\n",
3683 i + 1, ret); 4044 i + 1, err);
3684 goto dealloc_counters; 4045 goto err_alloc;
3685 } 4046 }
3686 4047 dev->port[i].cnts.set_id_valid = true;
3687 ret = __mlx5_ib_alloc_counters(dev, &port->cnts);
3688 if (ret)
3689 goto dealloc_counters;
3690
3691 mlx5_ib_fill_counters(dev, port->cnts.names,
3692 port->cnts.offsets);
3693 } 4048 }
3694 4049
3695 return 0; 4050 return 0;
3696 4051
3697dealloc_counters: 4052err_alloc:
3698 while (--i >= 0) 4053 mlx5_ib_dealloc_counters(dev);
3699 mlx5_core_dealloc_q_counter(dev->mdev, 4054 return err;
3700 dev->port[i].cnts.set_id);
3701
3702 return ret;
3703} 4055}
3704 4056
3705static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, 4057static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
@@ -3718,7 +4070,7 @@ static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
3718 RDMA_HW_STATS_DEFAULT_LIFESPAN); 4070 RDMA_HW_STATS_DEFAULT_LIFESPAN);
3719} 4071}
3720 4072
3721static int mlx5_ib_query_q_counters(struct mlx5_ib_dev *dev, 4073static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
3722 struct mlx5_ib_port *port, 4074 struct mlx5_ib_port *port,
3723 struct rdma_hw_stats *stats) 4075 struct rdma_hw_stats *stats)
3724{ 4076{
@@ -3731,7 +4083,7 @@ static int mlx5_ib_query_q_counters(struct mlx5_ib_dev *dev,
3731 if (!out) 4083 if (!out)
3732 return -ENOMEM; 4084 return -ENOMEM;
3733 4085
3734 ret = mlx5_core_query_q_counter(dev->mdev, 4086 ret = mlx5_core_query_q_counter(mdev,
3735 port->cnts.set_id, 0, 4087 port->cnts.set_id, 0,
3736 out, outlen); 4088 out, outlen);
3737 if (ret) 4089 if (ret)
@@ -3753,28 +4105,43 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
3753{ 4105{
3754 struct mlx5_ib_dev *dev = to_mdev(ibdev); 4106 struct mlx5_ib_dev *dev = to_mdev(ibdev);
3755 struct mlx5_ib_port *port = &dev->port[port_num - 1]; 4107 struct mlx5_ib_port *port = &dev->port[port_num - 1];
4108 struct mlx5_core_dev *mdev;
3756 int ret, num_counters; 4109 int ret, num_counters;
4110 u8 mdev_port_num;
3757 4111
3758 if (!stats) 4112 if (!stats)
3759 return -EINVAL; 4113 return -EINVAL;
3760 4114
3761 ret = mlx5_ib_query_q_counters(dev, port, stats); 4115 num_counters = port->cnts.num_q_counters + port->cnts.num_cong_counters;
4116
4117 /* q_counters are per IB device, query the master mdev */
4118 ret = mlx5_ib_query_q_counters(dev->mdev, port, stats);
3762 if (ret) 4119 if (ret)
3763 return ret; 4120 return ret;
3764 num_counters = port->cnts.num_q_counters;
3765 4121
3766 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 4122 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
4123 mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
4124 &mdev_port_num);
4125 if (!mdev) {
4126 /* If port is not affiliated yet, its in down state
4127 * which doesn't have any counters yet, so it would be
4128 * zero. So no need to read from the HCA.
4129 */
4130 goto done;
4131 }
3767 ret = mlx5_lag_query_cong_counters(dev->mdev, 4132 ret = mlx5_lag_query_cong_counters(dev->mdev,
3768 stats->value + 4133 stats->value +
3769 port->cnts.num_q_counters, 4134 port->cnts.num_q_counters,
3770 port->cnts.num_cong_counters, 4135 port->cnts.num_cong_counters,
3771 port->cnts.offsets + 4136 port->cnts.offsets +
3772 port->cnts.num_q_counters); 4137 port->cnts.num_q_counters);
4138
4139 mlx5_ib_put_native_port_mdev(dev, port_num);
3773 if (ret) 4140 if (ret)
3774 return ret; 4141 return ret;
3775 num_counters += port->cnts.num_cong_counters;
3776 } 4142 }
3777 4143
4144done:
3778 return num_counters; 4145 return num_counters;
3779} 4146}
3780 4147
@@ -3936,36 +4303,250 @@ mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector)
3936 return mlx5_get_vector_affinity(dev->mdev, comp_vector); 4303 return mlx5_get_vector_affinity(dev->mdev, comp_vector);
3937} 4304}
3938 4305
3939static void *mlx5_ib_add(struct mlx5_core_dev *mdev) 4306/* The mlx5_ib_multiport_mutex should be held when calling this function */
4307static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
4308 struct mlx5_ib_multiport_info *mpi)
3940{ 4309{
3941 struct mlx5_ib_dev *dev; 4310 u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
3942 enum rdma_link_layer ll; 4311 struct mlx5_ib_port *port = &ibdev->port[port_num];
3943 int port_type_cap; 4312 int comps;
3944 const char *name;
3945 int err; 4313 int err;
3946 int i; 4314 int i;
3947 4315
3948 port_type_cap = MLX5_CAP_GEN(mdev, port_type); 4316 mlx5_ib_cleanup_cong_debugfs(ibdev, port_num);
3949 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
3950 4317
3951 printk_once(KERN_INFO "%s", mlx5_version); 4318 spin_lock(&port->mp.mpi_lock);
4319 if (!mpi->ibdev) {
4320 spin_unlock(&port->mp.mpi_lock);
4321 return;
4322 }
4323 mpi->ibdev = NULL;
3952 4324
3953 dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); 4325 spin_unlock(&port->mp.mpi_lock);
3954 if (!dev) 4326 mlx5_remove_netdev_notifier(ibdev, port_num);
3955 return NULL; 4327 spin_lock(&port->mp.mpi_lock);
3956 4328
3957 dev->mdev = mdev; 4329 comps = mpi->mdev_refcnt;
4330 if (comps) {
4331 mpi->unaffiliate = true;
4332 init_completion(&mpi->unref_comp);
4333 spin_unlock(&port->mp.mpi_lock);
4334
4335 for (i = 0; i < comps; i++)
4336 wait_for_completion(&mpi->unref_comp);
4337
4338 spin_lock(&port->mp.mpi_lock);
4339 mpi->unaffiliate = false;
4340 }
4341
4342 port->mp.mpi = NULL;
4343
4344 list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list);
4345
4346 spin_unlock(&port->mp.mpi_lock);
4347
4348 err = mlx5_nic_vport_unaffiliate_multiport(mpi->mdev);
4349
4350 mlx5_ib_dbg(ibdev, "unaffiliated port %d\n", port_num + 1);
4351 /* Log an error, still needed to cleanup the pointers and add
4352 * it back to the list.
4353 */
4354 if (err)
4355 mlx5_ib_err(ibdev, "Failed to unaffiliate port %u\n",
4356 port_num + 1);
4357
4358 ibdev->roce[port_num].last_port_state = IB_PORT_DOWN;
4359}
4360
4361/* The mlx5_ib_multiport_mutex should be held when calling this function */
4362static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
4363 struct mlx5_ib_multiport_info *mpi)
4364{
4365 u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
4366 int err;
4367
4368 spin_lock(&ibdev->port[port_num].mp.mpi_lock);
4369 if (ibdev->port[port_num].mp.mpi) {
4370 mlx5_ib_warn(ibdev, "port %d already affiliated.\n",
4371 port_num + 1);
4372 spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
4373 return false;
4374 }
4375
4376 ibdev->port[port_num].mp.mpi = mpi;
4377 mpi->ibdev = ibdev;
4378 spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
4379
4380 err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev);
4381 if (err)
4382 goto unbind;
4383
4384 err = get_port_caps(ibdev, mlx5_core_native_port_num(mpi->mdev));
4385 if (err)
4386 goto unbind;
4387
4388 err = mlx5_add_netdev_notifier(ibdev, port_num);
4389 if (err) {
4390 mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n",
4391 port_num + 1);
4392 goto unbind;
4393 }
4394
4395 err = mlx5_ib_init_cong_debugfs(ibdev, port_num);
4396 if (err)
4397 goto unbind;
4398
4399 return true;
4400
4401unbind:
4402 mlx5_ib_unbind_slave_port(ibdev, mpi);
4403 return false;
4404}
4405
4406static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev)
4407{
4408 int port_num = mlx5_core_native_port_num(dev->mdev) - 1;
4409 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev,
4410 port_num + 1);
4411 struct mlx5_ib_multiport_info *mpi;
4412 int err;
4413 int i;
4414
4415 if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
4416 return 0;
4417
4418 err = mlx5_query_nic_vport_system_image_guid(dev->mdev,
4419 &dev->sys_image_guid);
4420 if (err)
4421 return err;
3958 4422
3959 dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port), 4423 err = mlx5_nic_vport_enable_roce(dev->mdev);
4424 if (err)
4425 return err;
4426
4427 mutex_lock(&mlx5_ib_multiport_mutex);
4428 for (i = 0; i < dev->num_ports; i++) {
4429 bool bound = false;
4430
4431 /* build a stub multiport info struct for the native port. */
4432 if (i == port_num) {
4433 mpi = kzalloc(sizeof(*mpi), GFP_KERNEL);
4434 if (!mpi) {
4435 mutex_unlock(&mlx5_ib_multiport_mutex);
4436 mlx5_nic_vport_disable_roce(dev->mdev);
4437 return -ENOMEM;
4438 }
4439
4440 mpi->is_master = true;
4441 mpi->mdev = dev->mdev;
4442 mpi->sys_image_guid = dev->sys_image_guid;
4443 dev->port[i].mp.mpi = mpi;
4444 mpi->ibdev = dev;
4445 mpi = NULL;
4446 continue;
4447 }
4448
4449 list_for_each_entry(mpi, &mlx5_ib_unaffiliated_port_list,
4450 list) {
4451 if (dev->sys_image_guid == mpi->sys_image_guid &&
4452 (mlx5_core_native_port_num(mpi->mdev) - 1) == i) {
4453 bound = mlx5_ib_bind_slave_port(dev, mpi);
4454 }
4455
4456 if (bound) {
4457 dev_dbg(&mpi->mdev->pdev->dev, "removing port from unaffiliated list.\n");
4458 mlx5_ib_dbg(dev, "port %d bound\n", i + 1);
4459 list_del(&mpi->list);
4460 break;
4461 }
4462 }
4463 if (!bound) {
4464 get_port_caps(dev, i + 1);
4465 mlx5_ib_dbg(dev, "no free port found for port %d\n",
4466 i + 1);
4467 }
4468 }
4469
4470 list_add_tail(&dev->ib_dev_list, &mlx5_ib_dev_list);
4471 mutex_unlock(&mlx5_ib_multiport_mutex);
4472 return err;
4473}
4474
4475static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
4476{
4477 int port_num = mlx5_core_native_port_num(dev->mdev) - 1;
4478 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev,
4479 port_num + 1);
4480 int i;
4481
4482 if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
4483 return;
4484
4485 mutex_lock(&mlx5_ib_multiport_mutex);
4486 for (i = 0; i < dev->num_ports; i++) {
4487 if (dev->port[i].mp.mpi) {
4488 /* Destroy the native port stub */
4489 if (i == port_num) {
4490 kfree(dev->port[i].mp.mpi);
4491 dev->port[i].mp.mpi = NULL;
4492 } else {
4493 mlx5_ib_dbg(dev, "unbinding port_num: %d\n", i + 1);
4494 mlx5_ib_unbind_slave_port(dev, dev->port[i].mp.mpi);
4495 }
4496 }
4497 }
4498
4499 mlx5_ib_dbg(dev, "removing from devlist\n");
4500 list_del(&dev->ib_dev_list);
4501 mutex_unlock(&mlx5_ib_multiport_mutex);
4502
4503 mlx5_nic_vport_disable_roce(dev->mdev);
4504}
4505
4506static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
4507{
4508 mlx5_ib_cleanup_multiport_master(dev);
4509#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
4510 cleanup_srcu_struct(&dev->mr_srcu);
4511#endif
4512 kfree(dev->port);
4513}
4514
4515static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
4516{
4517 struct mlx5_core_dev *mdev = dev->mdev;
4518 const char *name;
4519 int err;
4520 int i;
4521
4522 dev->port = kcalloc(dev->num_ports, sizeof(*dev->port),
3960 GFP_KERNEL); 4523 GFP_KERNEL);
3961 if (!dev->port) 4524 if (!dev->port)
3962 goto err_dealloc; 4525 return -ENOMEM;
4526
4527 for (i = 0; i < dev->num_ports; i++) {
4528 spin_lock_init(&dev->port[i].mp.mpi_lock);
4529 rwlock_init(&dev->roce[i].netdev_lock);
4530 }
3963 4531
3964 rwlock_init(&dev->roce.netdev_lock); 4532 err = mlx5_ib_init_multiport_master(dev);
3965 err = get_port_caps(dev);
3966 if (err) 4533 if (err)
3967 goto err_free_port; 4534 goto err_free_port;
3968 4535
4536 if (!mlx5_core_mp_enabled(mdev)) {
4537 int i;
4538
4539 for (i = 1; i <= dev->num_ports; i++) {
4540 err = get_port_caps(dev, i);
4541 if (err)
4542 break;
4543 }
4544 } else {
4545 err = get_port_caps(dev, mlx5_core_native_port_num(mdev));
4546 }
4547 if (err)
4548 goto err_mp;
4549
3969 if (mlx5_use_mad_ifc(dev)) 4550 if (mlx5_use_mad_ifc(dev))
3970 get_ext_port_caps(dev); 4551 get_ext_port_caps(dev);
3971 4552
@@ -3978,12 +4559,37 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
3978 dev->ib_dev.owner = THIS_MODULE; 4559 dev->ib_dev.owner = THIS_MODULE;
3979 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 4560 dev->ib_dev.node_type = RDMA_NODE_IB_CA;
3980 dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; 4561 dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
3981 dev->num_ports = MLX5_CAP_GEN(mdev, num_ports); 4562 dev->ib_dev.phys_port_cnt = dev->num_ports;
3982 dev->ib_dev.phys_port_cnt = dev->num_ports;
3983 dev->ib_dev.num_comp_vectors = 4563 dev->ib_dev.num_comp_vectors =
3984 dev->mdev->priv.eq_table.num_comp_vectors; 4564 dev->mdev->priv.eq_table.num_comp_vectors;
3985 dev->ib_dev.dev.parent = &mdev->pdev->dev; 4565 dev->ib_dev.dev.parent = &mdev->pdev->dev;
3986 4566
4567 mutex_init(&dev->flow_db.lock);
4568 mutex_init(&dev->cap_mask_mutex);
4569 INIT_LIST_HEAD(&dev->qp_list);
4570 spin_lock_init(&dev->reset_flow_resource_lock);
4571
4572#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
4573 err = init_srcu_struct(&dev->mr_srcu);
4574 if (err)
4575 goto err_free_port;
4576#endif
4577
4578 return 0;
4579err_mp:
4580 mlx5_ib_cleanup_multiport_master(dev);
4581
4582err_free_port:
4583 kfree(dev->port);
4584
4585 return -ENOMEM;
4586}
4587
4588static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
4589{
4590 struct mlx5_core_dev *mdev = dev->mdev;
4591 int err;
4592
3987 dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION; 4593 dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
3988 dev->ib_dev.uverbs_cmd_mask = 4594 dev->ib_dev.uverbs_cmd_mask =
3989 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 4595 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -4022,8 +4628,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
4022 dev->ib_dev.query_device = mlx5_ib_query_device; 4628 dev->ib_dev.query_device = mlx5_ib_query_device;
4023 dev->ib_dev.query_port = mlx5_ib_query_port; 4629 dev->ib_dev.query_port = mlx5_ib_query_port;
4024 dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer; 4630 dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
4025 if (ll == IB_LINK_LAYER_ETHERNET)
4026 dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
4027 dev->ib_dev.query_gid = mlx5_ib_query_gid; 4631 dev->ib_dev.query_gid = mlx5_ib_query_gid;
4028 dev->ib_dev.add_gid = mlx5_ib_add_gid; 4632 dev->ib_dev.add_gid = mlx5_ib_add_gid;
4029 dev->ib_dev.del_gid = mlx5_ib_del_gid; 4633 dev->ib_dev.del_gid = mlx5_ib_del_gid;
@@ -4080,8 +4684,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
4080 4684
4081 dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext; 4685 dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext;
4082 4686
4083 mlx5_ib_internal_fill_odp_caps(dev);
4084
4085 dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); 4687 dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence));
4086 4688
4087 if (MLX5_CAP_GEN(mdev, imaicl)) { 4689 if (MLX5_CAP_GEN(mdev, imaicl)) {
@@ -4092,11 +4694,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
4092 (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); 4694 (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
4093 } 4695 }
4094 4696
4095 if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
4096 dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
4097 dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
4098 }
4099
4100 if (MLX5_CAP_GEN(mdev, xrc)) { 4697 if (MLX5_CAP_GEN(mdev, xrc)) {
4101 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; 4698 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
4102 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; 4699 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
@@ -4111,8 +4708,39 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
4111 (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | 4708 (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
4112 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); 4709 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
4113 4710
4114 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == 4711 err = init_node_data(dev);
4115 IB_LINK_LAYER_ETHERNET) { 4712 if (err)
4713 return err;
4714
4715 if ((MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
4716 (MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) ||
4717 MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
4718 mutex_init(&dev->lb_mutex);
4719
4720 return 0;
4721}
4722
4723static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
4724{
4725 struct mlx5_core_dev *mdev = dev->mdev;
4726 enum rdma_link_layer ll;
4727 int port_type_cap;
4728 u8 port_num;
4729 int err;
4730 int i;
4731
4732 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
4733 port_type_cap = MLX5_CAP_GEN(mdev, port_type);
4734 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
4735
4736 if (ll == IB_LINK_LAYER_ETHERNET) {
4737 for (i = 0; i < dev->num_ports; i++) {
4738 dev->roce[i].dev = dev;
4739 dev->roce[i].native_port_num = i + 1;
4740 dev->roce[i].last_port_state = IB_PORT_DOWN;
4741 }
4742
4743 dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
4116 dev->ib_dev.create_wq = mlx5_ib_create_wq; 4744 dev->ib_dev.create_wq = mlx5_ib_create_wq;
4117 dev->ib_dev.modify_wq = mlx5_ib_modify_wq; 4745 dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
4118 dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq; 4746 dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
@@ -4124,143 +4752,329 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
4124 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | 4752 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
4125 (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | 4753 (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
4126 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); 4754 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
4755 err = mlx5_enable_eth(dev, port_num);
4756 if (err)
4757 return err;
4127 } 4758 }
4128 err = init_node_data(dev);
4129 if (err)
4130 goto err_free_port;
4131 4759
4132 mutex_init(&dev->flow_db.lock); 4760 return 0;
4133 mutex_init(&dev->cap_mask_mutex); 4761}
4134 INIT_LIST_HEAD(&dev->qp_list); 4762
4135 spin_lock_init(&dev->reset_flow_resource_lock); 4763static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
4764{
4765 struct mlx5_core_dev *mdev = dev->mdev;
4766 enum rdma_link_layer ll;
4767 int port_type_cap;
4768 u8 port_num;
4769
4770 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
4771 port_type_cap = MLX5_CAP_GEN(mdev, port_type);
4772 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
4136 4773
4137 if (ll == IB_LINK_LAYER_ETHERNET) { 4774 if (ll == IB_LINK_LAYER_ETHERNET) {
4138 err = mlx5_enable_eth(dev); 4775 mlx5_disable_eth(dev);
4139 if (err) 4776 mlx5_remove_netdev_notifier(dev, port_num);
4140 goto err_free_port;
4141 dev->roce.last_port_state = IB_PORT_DOWN;
4142 } 4777 }
4778}
4143 4779
4144 err = create_dev_resources(&dev->devr); 4780static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
4145 if (err) 4781{
4146 goto err_disable_eth; 4782 return create_dev_resources(&dev->devr);
4783}
4147 4784
4148 err = mlx5_ib_odp_init_one(dev); 4785static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
4149 if (err) 4786{
4150 goto err_rsrc; 4787 destroy_dev_resources(&dev->devr);
4788}
4151 4789
4790static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
4791{
4792 mlx5_ib_internal_fill_odp_caps(dev);
4793
4794 return mlx5_ib_odp_init_one(dev);
4795}
4796
4797static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
4798{
4152 if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { 4799 if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
4153 err = mlx5_ib_alloc_counters(dev); 4800 dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
4154 if (err) 4801 dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
4155 goto err_odp; 4802
4803 return mlx5_ib_alloc_counters(dev);
4156 } 4804 }
4157 4805
4158 err = mlx5_ib_init_cong_debugfs(dev); 4806 return 0;
4159 if (err) 4807}
4160 goto err_cnt; 4808
4809static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
4810{
4811 if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
4812 mlx5_ib_dealloc_counters(dev);
4813}
4161 4814
4815static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev)
4816{
4817 return mlx5_ib_init_cong_debugfs(dev,
4818 mlx5_core_native_port_num(dev->mdev) - 1);
4819}
4820
4821static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev)
4822{
4823 mlx5_ib_cleanup_cong_debugfs(dev,
4824 mlx5_core_native_port_num(dev->mdev) - 1);
4825}
4826
4827static int mlx5_ib_stage_uar_init(struct mlx5_ib_dev *dev)
4828{
4162 dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev); 4829 dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
4163 if (IS_ERR(dev->mdev->priv.uar)) 4830 if (!dev->mdev->priv.uar)
4164 goto err_cong; 4831 return -ENOMEM;
4832 return 0;
4833}
4834
4835static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
4836{
4837 mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
4838}
4839
4840static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
4841{
4842 int err;
4165 4843
4166 err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false); 4844 err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
4167 if (err) 4845 if (err)
4168 goto err_uar_page; 4846 return err;
4169 4847
4170 err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true); 4848 err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true);
4171 if (err) 4849 if (err)
4172 goto err_bfreg; 4850 mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
4173 4851
4174 err = ib_register_device(&dev->ib_dev, NULL); 4852 return err;
4175 if (err) 4853}
4176 goto err_fp_bfreg;
4177 4854
4178 err = create_umr_res(dev); 4855static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
4179 if (err) 4856{
4180 goto err_dev; 4857 mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
4858 mlx5_free_bfreg(dev->mdev, &dev->bfreg);
4859}
4860
4861static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
4862{
4863 return ib_register_device(&dev->ib_dev, NULL);
4864}
4865
4866static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
4867{
4868 ib_unregister_device(&dev->ib_dev);
4869}
4181 4870
4871static int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev)
4872{
4873 return create_umr_res(dev);
4874}
4875
4876static void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev)
4877{
4878 destroy_umrc_res(dev);
4879}
4880
4881static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev)
4882{
4182 init_delay_drop(dev); 4883 init_delay_drop(dev);
4183 4884
4885 return 0;
4886}
4887
4888static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
4889{
4890 cancel_delay_drop(dev);
4891}
4892
4893static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
4894{
4895 int err;
4896 int i;
4897
4184 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { 4898 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
4185 err = device_create_file(&dev->ib_dev.dev, 4899 err = device_create_file(&dev->ib_dev.dev,
4186 mlx5_class_attributes[i]); 4900 mlx5_class_attributes[i]);
4187 if (err) 4901 if (err)
4188 goto err_delay_drop; 4902 return err;
4189 } 4903 }
4190 4904
4191 if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && 4905 return 0;
4192 (MLX5_CAP_GEN(mdev, disable_local_lb_uc) || 4906}
4193 MLX5_CAP_GEN(mdev, disable_local_lb_mc))) 4907
4194 mutex_init(&dev->lb_mutex); 4908static void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
4909 const struct mlx5_ib_profile *profile,
4910 int stage)
4911{
4912 /* Number of stages to cleanup */
4913 while (stage) {
4914 stage--;
4915 if (profile->stage[stage].cleanup)
4916 profile->stage[stage].cleanup(dev);
4917 }
4918
4919 ib_dealloc_device((struct ib_device *)dev);
4920}
4921
4922static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num);
4923
4924static void *__mlx5_ib_add(struct mlx5_core_dev *mdev,
4925 const struct mlx5_ib_profile *profile)
4926{
4927 struct mlx5_ib_dev *dev;
4928 int err;
4929 int i;
4930
4931 printk_once(KERN_INFO "%s", mlx5_version);
4195 4932
4933 dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
4934 if (!dev)
4935 return NULL;
4936
4937 dev->mdev = mdev;
4938 dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
4939 MLX5_CAP_GEN(mdev, num_vhca_ports));
4940
4941 for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
4942 if (profile->stage[i].init) {
4943 err = profile->stage[i].init(dev);
4944 if (err)
4945 goto err_out;
4946 }
4947 }
4948
4949 dev->profile = profile;
4196 dev->ib_active = true; 4950 dev->ib_active = true;
4197 4951
4198 return dev; 4952 return dev;
4199 4953
4200err_delay_drop: 4954err_out:
4201 cancel_delay_drop(dev); 4955 __mlx5_ib_remove(dev, profile, i);
4202 destroy_umrc_res(dev);
4203 4956
4204err_dev: 4957 return NULL;
4205 ib_unregister_device(&dev->ib_dev); 4958}
4206 4959
4207err_fp_bfreg: 4960static const struct mlx5_ib_profile pf_profile = {
4208 mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); 4961 STAGE_CREATE(MLX5_IB_STAGE_INIT,
4962 mlx5_ib_stage_init_init,
4963 mlx5_ib_stage_init_cleanup),
4964 STAGE_CREATE(MLX5_IB_STAGE_CAPS,
4965 mlx5_ib_stage_caps_init,
4966 NULL),
4967 STAGE_CREATE(MLX5_IB_STAGE_ROCE,
4968 mlx5_ib_stage_roce_init,
4969 mlx5_ib_stage_roce_cleanup),
4970 STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
4971 mlx5_ib_stage_dev_res_init,
4972 mlx5_ib_stage_dev_res_cleanup),
4973 STAGE_CREATE(MLX5_IB_STAGE_ODP,
4974 mlx5_ib_stage_odp_init,
4975 NULL),
4976 STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
4977 mlx5_ib_stage_counters_init,
4978 mlx5_ib_stage_counters_cleanup),
4979 STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
4980 mlx5_ib_stage_cong_debugfs_init,
4981 mlx5_ib_stage_cong_debugfs_cleanup),
4982 STAGE_CREATE(MLX5_IB_STAGE_UAR,
4983 mlx5_ib_stage_uar_init,
4984 mlx5_ib_stage_uar_cleanup),
4985 STAGE_CREATE(MLX5_IB_STAGE_BFREG,
4986 mlx5_ib_stage_bfrag_init,
4987 mlx5_ib_stage_bfrag_cleanup),
4988 STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
4989 mlx5_ib_stage_ib_reg_init,
4990 mlx5_ib_stage_ib_reg_cleanup),
4991 STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES,
4992 mlx5_ib_stage_umr_res_init,
4993 mlx5_ib_stage_umr_res_cleanup),
4994 STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
4995 mlx5_ib_stage_delay_drop_init,
4996 mlx5_ib_stage_delay_drop_cleanup),
4997 STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
4998 mlx5_ib_stage_class_attr_init,
4999 NULL),
5000};
4209 5001
4210err_bfreg: 5002static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
4211 mlx5_free_bfreg(dev->mdev, &dev->bfreg); 5003{
5004 struct mlx5_ib_multiport_info *mpi;
5005 struct mlx5_ib_dev *dev;
5006 bool bound = false;
5007 int err;
4212 5008
4213err_uar_page: 5009 mpi = kzalloc(sizeof(*mpi), GFP_KERNEL);
4214 mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); 5010 if (!mpi)
5011 return NULL;
4215 5012
4216err_cong: 5013 mpi->mdev = mdev;
4217 mlx5_ib_cleanup_cong_debugfs(dev);
4218err_cnt:
4219 if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
4220 mlx5_ib_dealloc_counters(dev);
4221 5014
4222err_odp: 5015 err = mlx5_query_nic_vport_system_image_guid(mdev,
4223 mlx5_ib_odp_remove_one(dev); 5016 &mpi->sys_image_guid);
5017 if (err) {
5018 kfree(mpi);
5019 return NULL;
5020 }
4224 5021
4225err_rsrc: 5022 mutex_lock(&mlx5_ib_multiport_mutex);
4226 destroy_dev_resources(&dev->devr); 5023 list_for_each_entry(dev, &mlx5_ib_dev_list, ib_dev_list) {
5024 if (dev->sys_image_guid == mpi->sys_image_guid)
5025 bound = mlx5_ib_bind_slave_port(dev, mpi);
4227 5026
4228err_disable_eth: 5027 if (bound) {
4229 if (ll == IB_LINK_LAYER_ETHERNET) { 5028 rdma_roce_rescan_device(&dev->ib_dev);
4230 mlx5_disable_eth(dev); 5029 break;
4231 mlx5_remove_netdev_notifier(dev); 5030 }
4232 } 5031 }
4233 5032
4234err_free_port: 5033 if (!bound) {
4235 kfree(dev->port); 5034 list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list);
5035 dev_dbg(&mdev->pdev->dev, "no suitable IB device found to bind to, added to unaffiliated list.\n");
5036 } else {
5037 mlx5_ib_dbg(dev, "bound port %u\n", port_num + 1);
5038 }
5039 mutex_unlock(&mlx5_ib_multiport_mutex);
4236 5040
4237err_dealloc: 5041 return mpi;
4238 ib_dealloc_device((struct ib_device *)dev); 5042}
4239 5043
4240 return NULL; 5044static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
5045{
5046 enum rdma_link_layer ll;
5047 int port_type_cap;
5048
5049 port_type_cap = MLX5_CAP_GEN(mdev, port_type);
5050 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
5051
5052 if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) {
5053 u8 port_num = mlx5_core_native_port_num(mdev) - 1;
5054
5055 return mlx5_ib_add_slave_port(mdev, port_num);
5056 }
5057
5058 return __mlx5_ib_add(mdev, &pf_profile);
4241} 5059}
4242 5060
4243static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) 5061static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
4244{ 5062{
4245 struct mlx5_ib_dev *dev = context; 5063 struct mlx5_ib_multiport_info *mpi;
4246 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); 5064 struct mlx5_ib_dev *dev;
4247 5065
4248 cancel_delay_drop(dev); 5066 if (mlx5_core_is_mp_slave(mdev)) {
4249 mlx5_remove_netdev_notifier(dev); 5067 mpi = context;
4250 ib_unregister_device(&dev->ib_dev); 5068 mutex_lock(&mlx5_ib_multiport_mutex);
4251 mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); 5069 if (mpi->ibdev)
4252 mlx5_free_bfreg(dev->mdev, &dev->bfreg); 5070 mlx5_ib_unbind_slave_port(mpi->ibdev, mpi);
4253 mlx5_put_uars_page(dev->mdev, mdev->priv.uar); 5071 list_del(&mpi->list);
4254 mlx5_ib_cleanup_cong_debugfs(dev); 5072 mutex_unlock(&mlx5_ib_multiport_mutex);
4255 if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) 5073 return;
4256 mlx5_ib_dealloc_counters(dev); 5074 }
4257 destroy_umrc_res(dev); 5075
4258 mlx5_ib_odp_remove_one(dev); 5076 dev = context;
4259 destroy_dev_resources(&dev->devr); 5077 __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
4260 if (ll == IB_LINK_LAYER_ETHERNET)
4261 mlx5_disable_eth(dev);
4262 kfree(dev->port);
4263 ib_dealloc_device(&dev->ib_dev);
4264} 5078}
4265 5079
4266static struct mlx5_interface mlx5_ib_interface = { 5080static struct mlx5_interface mlx5_ib_interface = {
@@ -4277,6 +5091,10 @@ static int __init mlx5_ib_init(void)
4277{ 5091{
4278 int err; 5092 int err;
4279 5093
5094 mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0);
5095 if (!mlx5_ib_event_wq)
5096 return -ENOMEM;
5097
4280 mlx5_ib_odp_init(); 5098 mlx5_ib_odp_init();
4281 5099
4282 err = mlx5_register_interface(&mlx5_ib_interface); 5100 err = mlx5_register_interface(&mlx5_ib_interface);
@@ -4287,6 +5105,7 @@ static int __init mlx5_ib_init(void)
4287static void __exit mlx5_ib_cleanup(void) 5105static void __exit mlx5_ib_cleanup(void)
4288{ 5106{
4289 mlx5_unregister_interface(&mlx5_ib_interface); 5107 mlx5_unregister_interface(&mlx5_ib_interface);
5108 destroy_workqueue(mlx5_ib_event_wq);
4290} 5109}
4291 5110
4292module_init(mlx5_ib_init); 5111module_init(mlx5_ib_init);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 2c5f3533bbc9..139385129973 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -70,15 +70,6 @@ enum {
70 MLX5_IB_MMAP_CMD_MASK = 0xff, 70 MLX5_IB_MMAP_CMD_MASK = 0xff,
71}; 71};
72 72
73enum mlx5_ib_mmap_cmd {
74 MLX5_IB_MMAP_REGULAR_PAGE = 0,
75 MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1,
76 MLX5_IB_MMAP_WC_PAGE = 2,
77 MLX5_IB_MMAP_NC_PAGE = 3,
78 /* 5 is chosen in order to be compatible with old versions of libmlx5 */
79 MLX5_IB_MMAP_CORE_CLOCK = 5,
80};
81
82enum { 73enum {
83 MLX5_RES_SCAT_DATA32_CQE = 0x1, 74 MLX5_RES_SCAT_DATA32_CQE = 0x1,
84 MLX5_RES_SCAT_DATA64_CQE = 0x2, 75 MLX5_RES_SCAT_DATA64_CQE = 0x2,
@@ -112,6 +103,11 @@ enum {
112 MLX5_TM_MAX_SGE = 1, 103 MLX5_TM_MAX_SGE = 1,
113}; 104};
114 105
106enum {
107 MLX5_IB_INVALID_UAR_INDEX = BIT(31),
108 MLX5_IB_INVALID_BFREG = BIT(31),
109};
110
115struct mlx5_ib_vma_private_data { 111struct mlx5_ib_vma_private_data {
116 struct list_head list; 112 struct list_head list;
117 struct vm_area_struct *vma; 113 struct vm_area_struct *vma;
@@ -200,6 +196,8 @@ struct mlx5_ib_flow_db {
200 * creates the actual hardware QP. 196 * creates the actual hardware QP.
201 */ 197 */
202#define MLX5_IB_QPT_HW_GSI IB_QPT_RESERVED2 198#define MLX5_IB_QPT_HW_GSI IB_QPT_RESERVED2
199#define MLX5_IB_QPT_DCI IB_QPT_RESERVED3
200#define MLX5_IB_QPT_DCT IB_QPT_RESERVED4
203#define MLX5_IB_WR_UMR IB_WR_RESERVED1 201#define MLX5_IB_WR_UMR IB_WR_RESERVED1
204 202
205#define MLX5_IB_UMR_OCTOWORD 16 203#define MLX5_IB_UMR_OCTOWORD 16
@@ -360,12 +358,18 @@ struct mlx5_bf {
360 struct mlx5_sq_bfreg *bfreg; 358 struct mlx5_sq_bfreg *bfreg;
361}; 359};
362 360
361struct mlx5_ib_dct {
362 struct mlx5_core_dct mdct;
363 u32 *in;
364};
365
363struct mlx5_ib_qp { 366struct mlx5_ib_qp {
364 struct ib_qp ibqp; 367 struct ib_qp ibqp;
365 union { 368 union {
366 struct mlx5_ib_qp_trans trans_qp; 369 struct mlx5_ib_qp_trans trans_qp;
367 struct mlx5_ib_raw_packet_qp raw_packet_qp; 370 struct mlx5_ib_raw_packet_qp raw_packet_qp;
368 struct mlx5_ib_rss_qp rss_qp; 371 struct mlx5_ib_rss_qp rss_qp;
372 struct mlx5_ib_dct dct;
369 }; 373 };
370 struct mlx5_buf buf; 374 struct mlx5_buf buf;
371 375
@@ -404,6 +408,8 @@ struct mlx5_ib_qp {
404 u32 rate_limit; 408 u32 rate_limit;
405 u32 underlay_qpn; 409 u32 underlay_qpn;
406 bool tunnel_offload_en; 410 bool tunnel_offload_en;
411 /* storage for qp sub type when core qp type is IB_QPT_DRIVER */
412 enum ib_qp_type qp_sub_type;
407}; 413};
408 414
409struct mlx5_ib_cq_buf { 415struct mlx5_ib_cq_buf {
@@ -636,10 +642,21 @@ struct mlx5_ib_counters {
636 u32 num_q_counters; 642 u32 num_q_counters;
637 u32 num_cong_counters; 643 u32 num_cong_counters;
638 u16 set_id; 644 u16 set_id;
645 bool set_id_valid;
646};
647
648struct mlx5_ib_multiport_info;
649
650struct mlx5_ib_multiport {
651 struct mlx5_ib_multiport_info *mpi;
652 /* To be held when accessing the multiport info */
653 spinlock_t mpi_lock;
639}; 654};
640 655
641struct mlx5_ib_port { 656struct mlx5_ib_port {
642 struct mlx5_ib_counters cnts; 657 struct mlx5_ib_counters cnts;
658 struct mlx5_ib_multiport mp;
659 struct mlx5_ib_dbg_cc_params *dbg_cc_params;
643}; 660};
644 661
645struct mlx5_roce { 662struct mlx5_roce {
@@ -651,12 +668,15 @@ struct mlx5_roce {
651 struct notifier_block nb; 668 struct notifier_block nb;
652 atomic_t next_port; 669 atomic_t next_port;
653 enum ib_port_state last_port_state; 670 enum ib_port_state last_port_state;
671 struct mlx5_ib_dev *dev;
672 u8 native_port_num;
654}; 673};
655 674
656struct mlx5_ib_dbg_param { 675struct mlx5_ib_dbg_param {
657 int offset; 676 int offset;
658 struct mlx5_ib_dev *dev; 677 struct mlx5_ib_dev *dev;
659 struct dentry *dentry; 678 struct dentry *dentry;
679 u8 port_num;
660}; 680};
661 681
662enum mlx5_ib_dbg_cc_types { 682enum mlx5_ib_dbg_cc_types {
@@ -709,10 +729,50 @@ struct mlx5_ib_delay_drop {
709 struct mlx5_ib_dbg_delay_drop *dbg; 729 struct mlx5_ib_dbg_delay_drop *dbg;
710}; 730};
711 731
732enum mlx5_ib_stages {
733 MLX5_IB_STAGE_INIT,
734 MLX5_IB_STAGE_CAPS,
735 MLX5_IB_STAGE_ROCE,
736 MLX5_IB_STAGE_DEVICE_RESOURCES,
737 MLX5_IB_STAGE_ODP,
738 MLX5_IB_STAGE_COUNTERS,
739 MLX5_IB_STAGE_CONG_DEBUGFS,
740 MLX5_IB_STAGE_UAR,
741 MLX5_IB_STAGE_BFREG,
742 MLX5_IB_STAGE_IB_REG,
743 MLX5_IB_STAGE_UMR_RESOURCES,
744 MLX5_IB_STAGE_DELAY_DROP,
745 MLX5_IB_STAGE_CLASS_ATTR,
746 MLX5_IB_STAGE_MAX,
747};
748
749struct mlx5_ib_stage {
750 int (*init)(struct mlx5_ib_dev *dev);
751 void (*cleanup)(struct mlx5_ib_dev *dev);
752};
753
754#define STAGE_CREATE(_stage, _init, _cleanup) \
755 .stage[_stage] = {.init = _init, .cleanup = _cleanup}
756
757struct mlx5_ib_profile {
758 struct mlx5_ib_stage stage[MLX5_IB_STAGE_MAX];
759};
760
761struct mlx5_ib_multiport_info {
762 struct list_head list;
763 struct mlx5_ib_dev *ibdev;
764 struct mlx5_core_dev *mdev;
765 struct completion unref_comp;
766 u64 sys_image_guid;
767 u32 mdev_refcnt;
768 bool is_master;
769 bool unaffiliate;
770};
771
712struct mlx5_ib_dev { 772struct mlx5_ib_dev {
713 struct ib_device ib_dev; 773 struct ib_device ib_dev;
714 struct mlx5_core_dev *mdev; 774 struct mlx5_core_dev *mdev;
715 struct mlx5_roce roce; 775 struct mlx5_roce roce[MLX5_MAX_PORTS];
716 int num_ports; 776 int num_ports;
717 /* serialize update of capability mask 777 /* serialize update of capability mask
718 */ 778 */
@@ -746,12 +806,14 @@ struct mlx5_ib_dev {
746 struct mlx5_sq_bfreg bfreg; 806 struct mlx5_sq_bfreg bfreg;
747 struct mlx5_sq_bfreg fp_bfreg; 807 struct mlx5_sq_bfreg fp_bfreg;
748 struct mlx5_ib_delay_drop delay_drop; 808 struct mlx5_ib_delay_drop delay_drop;
749 struct mlx5_ib_dbg_cc_params *dbg_cc_params; 809 const struct mlx5_ib_profile *profile;
750 810
751 /* protect the user_td */ 811 /* protect the user_td */
752 struct mutex lb_mutex; 812 struct mutex lb_mutex;
753 u32 user_td; 813 u32 user_td;
754 u8 umr_fence; 814 u8 umr_fence;
815 struct list_head ib_dev_list;
816 u64 sys_image_guid;
755}; 817};
756 818
757static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) 819static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -956,13 +1018,14 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
956 struct ib_rwq_ind_table_init_attr *init_attr, 1018 struct ib_rwq_ind_table_init_attr *init_attr,
957 struct ib_udata *udata); 1019 struct ib_udata *udata);
958int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); 1020int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
1021bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev);
1022
959 1023
960#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1024#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
961void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev); 1025void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
962void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, 1026void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
963 struct mlx5_pagefault *pfault); 1027 struct mlx5_pagefault *pfault);
964int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev); 1028int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
965void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev);
966int __init mlx5_ib_odp_init(void); 1029int __init mlx5_ib_odp_init(void);
967void mlx5_ib_odp_cleanup(void); 1030void mlx5_ib_odp_cleanup(void);
968void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, 1031void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
@@ -977,7 +1040,6 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
977} 1040}
978 1041
979static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; } 1042static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
980static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
981static inline int mlx5_ib_odp_init(void) { return 0; } 1043static inline int mlx5_ib_odp_init(void) { return 0; }
982static inline void mlx5_ib_odp_cleanup(void) {} 1044static inline void mlx5_ib_odp_cleanup(void) {}
983static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {} 1045static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
@@ -1001,8 +1063,8 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
1001int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, 1063int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
1002 int index, enum ib_gid_type *gid_type); 1064 int index, enum ib_gid_type *gid_type);
1003 1065
1004void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev); 1066void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
1005int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev); 1067int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
1006 1068
1007/* GSI QP helper functions */ 1069/* GSI QP helper functions */
1008struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, 1070struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
@@ -1021,6 +1083,15 @@ void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi);
1021 1083
1022int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc); 1084int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc);
1023 1085
1086void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi,
1087 int bfregn);
1088struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi);
1089struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev,
1090 u8 ib_port_num,
1091 u8 *native_port_num);
1092void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
1093 u8 port_num);
1094
1024static inline void init_query_mad(struct ib_smp *mad) 1095static inline void init_query_mad(struct ib_smp *mad)
1025{ 1096{
1026 mad->base_version = 1; 1097 mad->base_version = 1;
@@ -1052,8 +1123,8 @@ static inline u32 check_cq_create_flags(u32 flags)
1052 * It returns non-zero value for unsupported CQ 1123 * It returns non-zero value for unsupported CQ
1053 * create flags, otherwise it returns zero. 1124 * create flags, otherwise it returns zero.
1054 */ 1125 */
1055 return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN | 1126 return (flags & ~(IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN |
1056 IB_CQ_FLAGS_TIMESTAMP_COMPLETION)); 1127 IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION));
1057} 1128}
1058 1129
1059static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx, 1130static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx,
@@ -1113,10 +1184,10 @@ static inline int get_uars_per_sys_page(struct mlx5_ib_dev *dev, bool lib_suppor
1113 MLX5_UARS_IN_PAGE : 1; 1184 MLX5_UARS_IN_PAGE : 1;
1114} 1185}
1115 1186
1116static inline int get_num_uars(struct mlx5_ib_dev *dev, 1187static inline int get_num_static_uars(struct mlx5_ib_dev *dev,
1117 struct mlx5_bfreg_info *bfregi) 1188 struct mlx5_bfreg_info *bfregi)
1118{ 1189{
1119 return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_sys_pages; 1190 return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages;
1120} 1191}
1121 1192
1122#endif /* MLX5_IB_H */ 1193#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index d109fe8290a7..556e015678de 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1206,6 +1206,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1206 int err; 1206 int err;
1207 bool use_umr = true; 1207 bool use_umr = true;
1208 1208
1209 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
1210 return ERR_PTR(-EINVAL);
1211
1209 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1212 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1210 start, virt_addr, length, access_flags); 1213 start, virt_addr, length, access_flags);
1211 1214
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index e2197bdda89c..f1a87a690a4c 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -1207,10 +1207,6 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
1207{ 1207{
1208 int ret; 1208 int ret;
1209 1209
1210 ret = init_srcu_struct(&dev->mr_srcu);
1211 if (ret)
1212 return ret;
1213
1214 if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) { 1210 if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
1215 ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey); 1211 ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
1216 if (ret) { 1212 if (ret) {
@@ -1222,11 +1218,6 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
1222 return 0; 1218 return 0;
1223} 1219}
1224 1220
1225void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *dev)
1226{
1227 cleanup_srcu_struct(&dev->mr_srcu);
1228}
1229
1230int mlx5_ib_odp_init(void) 1221int mlx5_ib_odp_init(void)
1231{ 1222{
1232 mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) - 1223 mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) -
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index cffe5966aef9..39d24bf694a8 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -493,7 +493,7 @@ enum {
493 493
494static int max_bfregs(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi) 494static int max_bfregs(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi)
495{ 495{
496 return get_num_uars(dev, bfregi) * MLX5_NON_FP_BFREGS_PER_UAR; 496 return get_num_static_uars(dev, bfregi) * MLX5_NON_FP_BFREGS_PER_UAR;
497} 497}
498 498
499static int num_med_bfreg(struct mlx5_ib_dev *dev, 499static int num_med_bfreg(struct mlx5_ib_dev *dev,
@@ -581,7 +581,7 @@ static int alloc_bfreg(struct mlx5_ib_dev *dev,
581 return bfregn; 581 return bfregn;
582} 582}
583 583
584static void free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, int bfregn) 584void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, int bfregn)
585{ 585{
586 mutex_lock(&bfregi->lock); 586 mutex_lock(&bfregi->lock);
587 bfregi->count[bfregn]--; 587 bfregi->count[bfregn]--;
@@ -613,6 +613,7 @@ static int to_mlx5_st(enum ib_qp_type type)
613 case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC; 613 case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC;
614 case IB_QPT_SMI: return MLX5_QP_ST_QP0; 614 case IB_QPT_SMI: return MLX5_QP_ST_QP0;
615 case MLX5_IB_QPT_HW_GSI: return MLX5_QP_ST_QP1; 615 case MLX5_IB_QPT_HW_GSI: return MLX5_QP_ST_QP1;
616 case MLX5_IB_QPT_DCI: return MLX5_QP_ST_DCI;
616 case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6; 617 case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6;
617 case IB_QPT_RAW_PACKET: 618 case IB_QPT_RAW_PACKET:
618 case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE; 619 case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
@@ -627,7 +628,8 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
627 struct mlx5_ib_cq *recv_cq); 628 struct mlx5_ib_cq *recv_cq);
628 629
629static int bfregn_to_uar_index(struct mlx5_ib_dev *dev, 630static int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
630 struct mlx5_bfreg_info *bfregi, int bfregn) 631 struct mlx5_bfreg_info *bfregi, int bfregn,
632 bool dyn_bfreg)
631{ 633{
632 int bfregs_per_sys_page; 634 int bfregs_per_sys_page;
633 int index_of_sys_page; 635 int index_of_sys_page;
@@ -637,8 +639,16 @@ static int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
637 MLX5_NON_FP_BFREGS_PER_UAR; 639 MLX5_NON_FP_BFREGS_PER_UAR;
638 index_of_sys_page = bfregn / bfregs_per_sys_page; 640 index_of_sys_page = bfregn / bfregs_per_sys_page;
639 641
640 offset = bfregn % bfregs_per_sys_page / MLX5_NON_FP_BFREGS_PER_UAR; 642 if (dyn_bfreg) {
643 index_of_sys_page += bfregi->num_static_sys_pages;
644 if (bfregn > bfregi->num_dyn_bfregs ||
645 bfregi->sys_pages[index_of_sys_page] == MLX5_IB_INVALID_UAR_INDEX) {
646 mlx5_ib_dbg(dev, "Invalid dynamic uar index\n");
647 return -EINVAL;
648 }
649 }
641 650
651 offset = bfregn % bfregs_per_sys_page / MLX5_NON_FP_BFREGS_PER_UAR;
642 return bfregi->sys_pages[index_of_sys_page] + offset; 652 return bfregi->sys_pages[index_of_sys_page] + offset;
643} 653}
644 654
@@ -764,7 +774,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
764 struct mlx5_ib_create_qp ucmd; 774 struct mlx5_ib_create_qp ucmd;
765 struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer; 775 struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
766 int page_shift = 0; 776 int page_shift = 0;
767 int uar_index; 777 int uar_index = 0;
768 int npages; 778 int npages;
769 u32 offset = 0; 779 u32 offset = 0;
770 int bfregn; 780 int bfregn;
@@ -780,12 +790,20 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
780 } 790 }
781 791
782 context = to_mucontext(pd->uobject->context); 792 context = to_mucontext(pd->uobject->context);
783 /* 793 if (ucmd.flags & MLX5_QP_FLAG_BFREG_INDEX) {
784 * TBD: should come from the verbs when we have the API 794 uar_index = bfregn_to_uar_index(dev, &context->bfregi,
785 */ 795 ucmd.bfreg_index, true);
786 if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) 796 if (uar_index < 0)
797 return uar_index;
798
799 bfregn = MLX5_IB_INVALID_BFREG;
800 } else if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) {
801 /*
802 * TBD: should come from the verbs when we have the API
803 */
787 /* In CROSS_CHANNEL CQ and QP must use the same UAR */ 804 /* In CROSS_CHANNEL CQ and QP must use the same UAR */
788 bfregn = MLX5_CROSS_CHANNEL_BFREG; 805 bfregn = MLX5_CROSS_CHANNEL_BFREG;
806 }
789 else { 807 else {
790 bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_HIGH); 808 bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_HIGH);
791 if (bfregn < 0) { 809 if (bfregn < 0) {
@@ -804,8 +822,10 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
804 } 822 }
805 } 823 }
806 824
807 uar_index = bfregn_to_uar_index(dev, &context->bfregi, bfregn);
808 mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index); 825 mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index);
826 if (bfregn != MLX5_IB_INVALID_BFREG)
827 uar_index = bfregn_to_uar_index(dev, &context->bfregi, bfregn,
828 false);
809 829
810 qp->rq.offset = 0; 830 qp->rq.offset = 0;
811 qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); 831 qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
@@ -845,7 +865,10 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
845 MLX5_SET(qpc, qpc, page_offset, offset); 865 MLX5_SET(qpc, qpc, page_offset, offset);
846 866
847 MLX5_SET(qpc, qpc, uar_page, uar_index); 867 MLX5_SET(qpc, qpc, uar_page, uar_index);
848 resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn); 868 if (bfregn != MLX5_IB_INVALID_BFREG)
869 resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn);
870 else
871 resp->bfreg_index = MLX5_IB_INVALID_BFREG;
849 qp->bfregn = bfregn; 872 qp->bfregn = bfregn;
850 873
851 err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db); 874 err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db);
@@ -874,7 +897,8 @@ err_umem:
874 ib_umem_release(ubuffer->umem); 897 ib_umem_release(ubuffer->umem);
875 898
876err_bfreg: 899err_bfreg:
877 free_bfreg(dev, &context->bfregi, bfregn); 900 if (bfregn != MLX5_IB_INVALID_BFREG)
901 mlx5_ib_free_bfreg(dev, &context->bfregi, bfregn);
878 return err; 902 return err;
879} 903}
880 904
@@ -887,7 +911,13 @@ static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd,
887 mlx5_ib_db_unmap_user(context, &qp->db); 911 mlx5_ib_db_unmap_user(context, &qp->db);
888 if (base->ubuffer.umem) 912 if (base->ubuffer.umem)
889 ib_umem_release(base->ubuffer.umem); 913 ib_umem_release(base->ubuffer.umem);
890 free_bfreg(dev, &context->bfregi, qp->bfregn); 914
915 /*
916 * Free only the BFREGs which are handled by the kernel.
917 * BFREGs of UARs allocated dynamically are handled by user.
918 */
919 if (qp->bfregn != MLX5_IB_INVALID_BFREG)
920 mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn);
891} 921}
892 922
893static int create_kernel_qp(struct mlx5_ib_dev *dev, 923static int create_kernel_qp(struct mlx5_ib_dev *dev,
@@ -1015,6 +1045,7 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
1015static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) 1045static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
1016{ 1046{
1017 if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) || 1047 if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) ||
1048 (attr->qp_type == MLX5_IB_QPT_DCI) ||
1018 (attr->qp_type == IB_QPT_XRC_INI)) 1049 (attr->qp_type == IB_QPT_XRC_INI))
1019 return MLX5_SRQ_RQ; 1050 return MLX5_SRQ_RQ;
1020 else if (!qp->has_rq) 1051 else if (!qp->has_rq)
@@ -2086,20 +2117,108 @@ static const char *ib_qp_type_str(enum ib_qp_type type)
2086 return "IB_QPT_RAW_PACKET"; 2117 return "IB_QPT_RAW_PACKET";
2087 case MLX5_IB_QPT_REG_UMR: 2118 case MLX5_IB_QPT_REG_UMR:
2088 return "MLX5_IB_QPT_REG_UMR"; 2119 return "MLX5_IB_QPT_REG_UMR";
2120 case IB_QPT_DRIVER:
2121 return "IB_QPT_DRIVER";
2089 case IB_QPT_MAX: 2122 case IB_QPT_MAX:
2090 default: 2123 default:
2091 return "Invalid QP type"; 2124 return "Invalid QP type";
2092 } 2125 }
2093} 2126}
2094 2127
2128static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
2129 struct ib_qp_init_attr *attr,
2130 struct mlx5_ib_create_qp *ucmd)
2131{
2132 struct mlx5_ib_dev *dev;
2133 struct mlx5_ib_qp *qp;
2134 int err = 0;
2135 u32 uidx = MLX5_IB_DEFAULT_UIDX;
2136 void *dctc;
2137
2138 if (!attr->srq || !attr->recv_cq)
2139 return ERR_PTR(-EINVAL);
2140
2141 dev = to_mdev(pd->device);
2142
2143 err = get_qp_user_index(to_mucontext(pd->uobject->context),
2144 ucmd, sizeof(*ucmd), &uidx);
2145 if (err)
2146 return ERR_PTR(err);
2147
2148 qp = kzalloc(sizeof(*qp), GFP_KERNEL);
2149 if (!qp)
2150 return ERR_PTR(-ENOMEM);
2151
2152 qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL);
2153 if (!qp->dct.in) {
2154 err = -ENOMEM;
2155 goto err_free;
2156 }
2157
2158 dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
2159 qp->qp_sub_type = MLX5_IB_QPT_DCT;
2160 MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn);
2161 MLX5_SET(dctc, dctc, srqn_xrqn, to_msrq(attr->srq)->msrq.srqn);
2162 MLX5_SET(dctc, dctc, cqn, to_mcq(attr->recv_cq)->mcq.cqn);
2163 MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key);
2164 MLX5_SET(dctc, dctc, user_index, uidx);
2165
2166 qp->state = IB_QPS_RESET;
2167
2168 return &qp->ibqp;
2169err_free:
2170 kfree(qp);
2171 return ERR_PTR(err);
2172}
2173
2174static int set_mlx_qp_type(struct mlx5_ib_dev *dev,
2175 struct ib_qp_init_attr *init_attr,
2176 struct mlx5_ib_create_qp *ucmd,
2177 struct ib_udata *udata)
2178{
2179 enum { MLX_QP_FLAGS = MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI };
2180 int err;
2181
2182 if (!udata)
2183 return -EINVAL;
2184
2185 if (udata->inlen < sizeof(*ucmd)) {
2186 mlx5_ib_dbg(dev, "create_qp user command is smaller than expected\n");
2187 return -EINVAL;
2188 }
2189 err = ib_copy_from_udata(ucmd, udata, sizeof(*ucmd));
2190 if (err)
2191 return err;
2192
2193 if ((ucmd->flags & MLX_QP_FLAGS) == MLX5_QP_FLAG_TYPE_DCI) {
2194 init_attr->qp_type = MLX5_IB_QPT_DCI;
2195 } else {
2196 if ((ucmd->flags & MLX_QP_FLAGS) == MLX5_QP_FLAG_TYPE_DCT) {
2197 init_attr->qp_type = MLX5_IB_QPT_DCT;
2198 } else {
2199 mlx5_ib_dbg(dev, "Invalid QP flags\n");
2200 return -EINVAL;
2201 }
2202 }
2203
2204 if (!MLX5_CAP_GEN(dev->mdev, dct)) {
2205 mlx5_ib_dbg(dev, "DC transport is not supported\n");
2206 return -EOPNOTSUPP;
2207 }
2208
2209 return 0;
2210}
2211
2095struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, 2212struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
2096 struct ib_qp_init_attr *init_attr, 2213 struct ib_qp_init_attr *verbs_init_attr,
2097 struct ib_udata *udata) 2214 struct ib_udata *udata)
2098{ 2215{
2099 struct mlx5_ib_dev *dev; 2216 struct mlx5_ib_dev *dev;
2100 struct mlx5_ib_qp *qp; 2217 struct mlx5_ib_qp *qp;
2101 u16 xrcdn = 0; 2218 u16 xrcdn = 0;
2102 int err; 2219 int err;
2220 struct ib_qp_init_attr mlx_init_attr;
2221 struct ib_qp_init_attr *init_attr = verbs_init_attr;
2103 2222
2104 if (pd) { 2223 if (pd) {
2105 dev = to_mdev(pd->device); 2224 dev = to_mdev(pd->device);
@@ -2124,6 +2243,26 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
2124 dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); 2243 dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
2125 } 2244 }
2126 2245
2246 if (init_attr->qp_type == IB_QPT_DRIVER) {
2247 struct mlx5_ib_create_qp ucmd;
2248
2249 init_attr = &mlx_init_attr;
2250 memcpy(init_attr, verbs_init_attr, sizeof(*verbs_init_attr));
2251 err = set_mlx_qp_type(dev, init_attr, &ucmd, udata);
2252 if (err)
2253 return ERR_PTR(err);
2254
2255 if (init_attr->qp_type == MLX5_IB_QPT_DCI) {
2256 if (init_attr->cap.max_recv_wr ||
2257 init_attr->cap.max_recv_sge) {
2258 mlx5_ib_dbg(dev, "DCI QP requires zero size receive queue\n");
2259 return ERR_PTR(-EINVAL);
2260 }
2261 } else {
2262 return mlx5_ib_create_dct(pd, init_attr, &ucmd);
2263 }
2264 }
2265
2127 switch (init_attr->qp_type) { 2266 switch (init_attr->qp_type) {
2128 case IB_QPT_XRC_TGT: 2267 case IB_QPT_XRC_TGT:
2129 case IB_QPT_XRC_INI: 2268 case IB_QPT_XRC_INI:
@@ -2145,6 +2284,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
2145 case IB_QPT_SMI: 2284 case IB_QPT_SMI:
2146 case MLX5_IB_QPT_HW_GSI: 2285 case MLX5_IB_QPT_HW_GSI:
2147 case MLX5_IB_QPT_REG_UMR: 2286 case MLX5_IB_QPT_REG_UMR:
2287 case MLX5_IB_QPT_DCI:
2148 qp = kzalloc(sizeof(*qp), GFP_KERNEL); 2288 qp = kzalloc(sizeof(*qp), GFP_KERNEL);
2149 if (!qp) 2289 if (!qp)
2150 return ERR_PTR(-ENOMEM); 2290 return ERR_PTR(-ENOMEM);
@@ -2185,9 +2325,31 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
2185 return ERR_PTR(-EINVAL); 2325 return ERR_PTR(-EINVAL);
2186 } 2326 }
2187 2327
2328 if (verbs_init_attr->qp_type == IB_QPT_DRIVER)
2329 qp->qp_sub_type = init_attr->qp_type;
2330
2188 return &qp->ibqp; 2331 return &qp->ibqp;
2189} 2332}
2190 2333
2334static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp)
2335{
2336 struct mlx5_ib_dev *dev = to_mdev(mqp->ibqp.device);
2337
2338 if (mqp->state == IB_QPS_RTR) {
2339 int err;
2340
2341 err = mlx5_core_destroy_dct(dev->mdev, &mqp->dct.mdct);
2342 if (err) {
2343 mlx5_ib_warn(dev, "failed to destroy DCT %d\n", err);
2344 return err;
2345 }
2346 }
2347
2348 kfree(mqp->dct.in);
2349 kfree(mqp);
2350 return 0;
2351}
2352
2191int mlx5_ib_destroy_qp(struct ib_qp *qp) 2353int mlx5_ib_destroy_qp(struct ib_qp *qp)
2192{ 2354{
2193 struct mlx5_ib_dev *dev = to_mdev(qp->device); 2355 struct mlx5_ib_dev *dev = to_mdev(qp->device);
@@ -2196,6 +2358,9 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp)
2196 if (unlikely(qp->qp_type == IB_QPT_GSI)) 2358 if (unlikely(qp->qp_type == IB_QPT_GSI))
2197 return mlx5_ib_gsi_destroy_qp(qp); 2359 return mlx5_ib_gsi_destroy_qp(qp);
2198 2360
2361 if (mqp->qp_sub_type == MLX5_IB_QPT_DCT)
2362 return mlx5_ib_destroy_dct(mqp);
2363
2199 destroy_qp_common(dev, mqp); 2364 destroy_qp_common(dev, mqp);
2200 2365
2201 kfree(mqp); 2366 kfree(mqp);
@@ -2763,7 +2928,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
2763 if (!context) 2928 if (!context)
2764 return -ENOMEM; 2929 return -ENOMEM;
2765 2930
2766 err = to_mlx5_st(ibqp->qp_type); 2931 err = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ?
2932 qp->qp_sub_type : ibqp->qp_type);
2767 if (err < 0) { 2933 if (err < 0) {
2768 mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type); 2934 mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type);
2769 goto out; 2935 goto out;
@@ -2796,8 +2962,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
2796 (ibqp->qp_type == IB_QPT_XRC_INI) || 2962 (ibqp->qp_type == IB_QPT_XRC_INI) ||
2797 (ibqp->qp_type == IB_QPT_XRC_TGT)) { 2963 (ibqp->qp_type == IB_QPT_XRC_TGT)) {
2798 if (mlx5_lag_is_active(dev->mdev)) { 2964 if (mlx5_lag_is_active(dev->mdev)) {
2965 u8 p = mlx5_core_native_port_num(dev->mdev);
2799 tx_affinity = (unsigned int)atomic_add_return(1, 2966 tx_affinity = (unsigned int)atomic_add_return(1,
2800 &dev->roce.next_port) % 2967 &dev->roce[p].next_port) %
2801 MLX5_MAX_PORTS + 1; 2968 MLX5_MAX_PORTS + 1;
2802 context->flags |= cpu_to_be32(tx_affinity << 24); 2969 context->flags |= cpu_to_be32(tx_affinity << 24);
2803 } 2970 }
@@ -2922,7 +3089,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
2922 3089
2923 mlx5_cur = to_mlx5_state(cur_state); 3090 mlx5_cur = to_mlx5_state(cur_state);
2924 mlx5_new = to_mlx5_state(new_state); 3091 mlx5_new = to_mlx5_state(new_state);
2925 mlx5_st = to_mlx5_st(ibqp->qp_type); 3092 mlx5_st = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ?
3093 qp->qp_sub_type : ibqp->qp_type);
2926 if (mlx5_st < 0) 3094 if (mlx5_st < 0)
2927 goto out; 3095 goto out;
2928 3096
@@ -2994,6 +3162,139 @@ out:
2994 return err; 3162 return err;
2995} 3163}
2996 3164
3165static inline bool is_valid_mask(int mask, int req, int opt)
3166{
3167 if ((mask & req) != req)
3168 return false;
3169
3170 if (mask & ~(req | opt))
3171 return false;
3172
3173 return true;
3174}
3175
3176/* check valid transition for driver QP types
3177 * for now the only QP type that this function supports is DCI
3178 */
3179static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new_state,
3180 enum ib_qp_attr_mask attr_mask)
3181{
3182 int req = IB_QP_STATE;
3183 int opt = 0;
3184
3185 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
3186 req |= IB_QP_PKEY_INDEX | IB_QP_PORT;
3187 return is_valid_mask(attr_mask, req, opt);
3188 } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
3189 opt = IB_QP_PKEY_INDEX | IB_QP_PORT;
3190 return is_valid_mask(attr_mask, req, opt);
3191 } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
3192 req |= IB_QP_PATH_MTU;
3193 opt = IB_QP_PKEY_INDEX;
3194 return is_valid_mask(attr_mask, req, opt);
3195 } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
3196 req |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
3197 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_SQ_PSN;
3198 opt = IB_QP_MIN_RNR_TIMER;
3199 return is_valid_mask(attr_mask, req, opt);
3200 } else if (cur_state == IB_QPS_RTS && new_state == IB_QPS_RTS) {
3201 opt = IB_QP_MIN_RNR_TIMER;
3202 return is_valid_mask(attr_mask, req, opt);
3203 } else if (cur_state != IB_QPS_RESET && new_state == IB_QPS_ERR) {
3204 return is_valid_mask(attr_mask, req, opt);
3205 }
3206 return false;
3207}
3208
3209/* mlx5_ib_modify_dct: modify a DCT QP
3210 * valid transitions are:
3211 * RESET to INIT: must set access_flags, pkey_index and port
3212 * INIT to RTR : must set min_rnr_timer, tclass, flow_label,
3213 * mtu, gid_index and hop_limit
3214 * Other transitions and attributes are illegal
3215 */
3216static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
3217 int attr_mask, struct ib_udata *udata)
3218{
3219 struct mlx5_ib_qp *qp = to_mqp(ibqp);
3220 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
3221 enum ib_qp_state cur_state, new_state;
3222 int err = 0;
3223 int required = IB_QP_STATE;
3224 void *dctc;
3225
3226 if (!(attr_mask & IB_QP_STATE))
3227 return -EINVAL;
3228
3229 cur_state = qp->state;
3230 new_state = attr->qp_state;
3231
3232 dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
3233 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
3234 required |= IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT;
3235 if (!is_valid_mask(attr_mask, required, 0))
3236 return -EINVAL;
3237
3238 if (attr->port_num == 0 ||
3239 attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports)) {
3240 mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n",
3241 attr->port_num, dev->num_ports);
3242 return -EINVAL;
3243 }
3244 if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
3245 MLX5_SET(dctc, dctc, rre, 1);
3246 if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
3247 MLX5_SET(dctc, dctc, rwe, 1);
3248 if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) {
3249 if (!mlx5_ib_dc_atomic_is_supported(dev))
3250 return -EOPNOTSUPP;
3251 MLX5_SET(dctc, dctc, rae, 1);
3252 MLX5_SET(dctc, dctc, atomic_mode, MLX5_ATOMIC_MODE_DCT_CX);
3253 }
3254 MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index);
3255 MLX5_SET(dctc, dctc, port, attr->port_num);
3256 MLX5_SET(dctc, dctc, counter_set_id, dev->port[attr->port_num - 1].cnts.set_id);
3257
3258 } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
3259 struct mlx5_ib_modify_qp_resp resp = {};
3260 u32 min_resp_len = offsetof(typeof(resp), dctn) +
3261 sizeof(resp.dctn);
3262
3263 if (udata->outlen < min_resp_len)
3264 return -EINVAL;
3265 resp.response_length = min_resp_len;
3266
3267 required |= IB_QP_MIN_RNR_TIMER | IB_QP_AV | IB_QP_PATH_MTU;
3268 if (!is_valid_mask(attr_mask, required, 0))
3269 return -EINVAL;
3270 MLX5_SET(dctc, dctc, min_rnr_nak, attr->min_rnr_timer);
3271 MLX5_SET(dctc, dctc, tclass, attr->ah_attr.grh.traffic_class);
3272 MLX5_SET(dctc, dctc, flow_label, attr->ah_attr.grh.flow_label);
3273 MLX5_SET(dctc, dctc, mtu, attr->path_mtu);
3274 MLX5_SET(dctc, dctc, my_addr_index, attr->ah_attr.grh.sgid_index);
3275 MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit);
3276
3277 err = mlx5_core_create_dct(dev->mdev, &qp->dct.mdct, qp->dct.in,
3278 MLX5_ST_SZ_BYTES(create_dct_in));
3279 if (err)
3280 return err;
3281 resp.dctn = qp->dct.mdct.mqp.qpn;
3282 err = ib_copy_to_udata(udata, &resp, resp.response_length);
3283 if (err) {
3284 mlx5_core_destroy_dct(dev->mdev, &qp->dct.mdct);
3285 return err;
3286 }
3287 } else {
3288 mlx5_ib_warn(dev, "Modify DCT: Invalid transition from %d to %d\n", cur_state, new_state);
3289 return -EINVAL;
3290 }
3291 if (err)
3292 qp->state = IB_QPS_ERR;
3293 else
3294 qp->state = new_state;
3295 return err;
3296}
3297
2997int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 3298int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2998 int attr_mask, struct ib_udata *udata) 3299 int attr_mask, struct ib_udata *udata)
2999{ 3300{
@@ -3011,8 +3312,14 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
3011 if (unlikely(ibqp->qp_type == IB_QPT_GSI)) 3312 if (unlikely(ibqp->qp_type == IB_QPT_GSI))
3012 return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask); 3313 return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
3013 3314
3014 qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ? 3315 if (ibqp->qp_type == IB_QPT_DRIVER)
3015 IB_QPT_GSI : ibqp->qp_type; 3316 qp_type = qp->qp_sub_type;
3317 else
3318 qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ?
3319 IB_QPT_GSI : ibqp->qp_type;
3320
3321 if (qp_type == MLX5_IB_QPT_DCT)
3322 return mlx5_ib_modify_dct(ibqp, attr, attr_mask, udata);
3016 3323
3017 mutex_lock(&qp->mutex); 3324 mutex_lock(&qp->mutex);
3018 3325
@@ -3031,15 +3338,21 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
3031 goto out; 3338 goto out;
3032 } 3339 }
3033 } else if (qp_type != MLX5_IB_QPT_REG_UMR && 3340 } else if (qp_type != MLX5_IB_QPT_REG_UMR &&
3034 !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) { 3341 qp_type != MLX5_IB_QPT_DCI &&
3342 !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) {
3035 mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n", 3343 mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
3036 cur_state, new_state, ibqp->qp_type, attr_mask); 3344 cur_state, new_state, ibqp->qp_type, attr_mask);
3037 goto out; 3345 goto out;
3346 } else if (qp_type == MLX5_IB_QPT_DCI &&
3347 !modify_dci_qp_is_ok(cur_state, new_state, attr_mask)) {
3348 mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
3349 cur_state, new_state, qp_type, attr_mask);
3350 goto out;
3038 } 3351 }
3039 3352
3040 if ((attr_mask & IB_QP_PORT) && 3353 if ((attr_mask & IB_QP_PORT) &&
3041 (attr->port_num == 0 || 3354 (attr->port_num == 0 ||
3042 attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) { 3355 attr->port_num > dev->num_ports)) {
3043 mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n", 3356 mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n",
3044 attr->port_num, dev->num_ports); 3357 attr->port_num, dev->num_ports);
3045 goto out; 3358 goto out;
@@ -4358,11 +4671,10 @@ static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev,
4358 struct rdma_ah_attr *ah_attr, 4671 struct rdma_ah_attr *ah_attr,
4359 struct mlx5_qp_path *path) 4672 struct mlx5_qp_path *path)
4360{ 4673{
4361 struct mlx5_core_dev *dev = ibdev->mdev;
4362 4674
4363 memset(ah_attr, 0, sizeof(*ah_attr)); 4675 memset(ah_attr, 0, sizeof(*ah_attr));
4364 4676
4365 if (!path->port || path->port > MLX5_CAP_GEN(dev, num_ports)) 4677 if (!path->port || path->port > ibdev->num_ports)
4366 return; 4678 return;
4367 4679
4368 ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, path->port); 4680 ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, path->port);
@@ -4577,6 +4889,71 @@ out:
4577 return err; 4889 return err;
4578} 4890}
4579 4891
4892static int mlx5_ib_dct_query_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *mqp,
4893 struct ib_qp_attr *qp_attr, int qp_attr_mask,
4894 struct ib_qp_init_attr *qp_init_attr)
4895{
4896 struct mlx5_core_dct *dct = &mqp->dct.mdct;
4897 u32 *out;
4898 u32 access_flags = 0;
4899 int outlen = MLX5_ST_SZ_BYTES(query_dct_out);
4900 void *dctc;
4901 int err;
4902 int supported_mask = IB_QP_STATE |
4903 IB_QP_ACCESS_FLAGS |
4904 IB_QP_PORT |
4905 IB_QP_MIN_RNR_TIMER |
4906 IB_QP_AV |
4907 IB_QP_PATH_MTU |
4908 IB_QP_PKEY_INDEX;
4909
4910 if (qp_attr_mask & ~supported_mask)
4911 return -EINVAL;
4912 if (mqp->state != IB_QPS_RTR)
4913 return -EINVAL;
4914
4915 out = kzalloc(outlen, GFP_KERNEL);
4916 if (!out)
4917 return -ENOMEM;
4918
4919 err = mlx5_core_dct_query(dev->mdev, dct, out, outlen);
4920 if (err)
4921 goto out;
4922
4923 dctc = MLX5_ADDR_OF(query_dct_out, out, dct_context_entry);
4924
4925 if (qp_attr_mask & IB_QP_STATE)
4926 qp_attr->qp_state = IB_QPS_RTR;
4927
4928 if (qp_attr_mask & IB_QP_ACCESS_FLAGS) {
4929 if (MLX5_GET(dctc, dctc, rre))
4930 access_flags |= IB_ACCESS_REMOTE_READ;
4931 if (MLX5_GET(dctc, dctc, rwe))
4932 access_flags |= IB_ACCESS_REMOTE_WRITE;
4933 if (MLX5_GET(dctc, dctc, rae))
4934 access_flags |= IB_ACCESS_REMOTE_ATOMIC;
4935 qp_attr->qp_access_flags = access_flags;
4936 }
4937
4938 if (qp_attr_mask & IB_QP_PORT)
4939 qp_attr->port_num = MLX5_GET(dctc, dctc, port);
4940 if (qp_attr_mask & IB_QP_MIN_RNR_TIMER)
4941 qp_attr->min_rnr_timer = MLX5_GET(dctc, dctc, min_rnr_nak);
4942 if (qp_attr_mask & IB_QP_AV) {
4943 qp_attr->ah_attr.grh.traffic_class = MLX5_GET(dctc, dctc, tclass);
4944 qp_attr->ah_attr.grh.flow_label = MLX5_GET(dctc, dctc, flow_label);
4945 qp_attr->ah_attr.grh.sgid_index = MLX5_GET(dctc, dctc, my_addr_index);
4946 qp_attr->ah_attr.grh.hop_limit = MLX5_GET(dctc, dctc, hop_limit);
4947 }
4948 if (qp_attr_mask & IB_QP_PATH_MTU)
4949 qp_attr->path_mtu = MLX5_GET(dctc, dctc, mtu);
4950 if (qp_attr_mask & IB_QP_PKEY_INDEX)
4951 qp_attr->pkey_index = MLX5_GET(dctc, dctc, pkey_index);
4952out:
4953 kfree(out);
4954 return err;
4955}
4956
4580int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, 4957int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
4581 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) 4958 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
4582{ 4959{
@@ -4596,6 +4973,10 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
4596 memset(qp_init_attr, 0, sizeof(*qp_init_attr)); 4973 memset(qp_init_attr, 0, sizeof(*qp_init_attr));
4597 memset(qp_attr, 0, sizeof(*qp_attr)); 4974 memset(qp_attr, 0, sizeof(*qp_attr));
4598 4975
4976 if (unlikely(qp->qp_sub_type == MLX5_IB_QPT_DCT))
4977 return mlx5_ib_dct_query_qp(dev, qp, qp_attr,
4978 qp_attr_mask, qp_init_attr);
4979
4599 mutex_lock(&qp->mutex); 4980 mutex_lock(&qp->mutex);
4600 4981
4601 if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET || 4982 if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
@@ -4685,13 +5066,10 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
4685 int err; 5066 int err;
4686 5067
4687 err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn); 5068 err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn);
4688 if (err) { 5069 if (err)
4689 mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn); 5070 mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
4690 return err;
4691 }
4692 5071
4693 kfree(xrcd); 5072 kfree(xrcd);
4694
4695 return 0; 5073 return 0;
4696} 5074}
4697 5075
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index c6fe89d79248..2fe503e86c1d 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -472,7 +472,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
472 goto out; 472 goto out;
473 } 473 }
474 474
475 ret = get_user_pages(uaddr & PAGE_MASK, 1, FOLL_WRITE, pages, NULL); 475 ret = get_user_pages_fast(uaddr & PAGE_MASK, 1, FOLL_WRITE, pages);
476 if (ret < 0) 476 if (ret < 0)
477 goto out; 477 goto out;
478 478
@@ -623,13 +623,12 @@ int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
623 page = dev->db_tab->page + end; 623 page = dev->db_tab->page + end;
624 624
625alloc: 625alloc:
626 page->db_rec = dma_alloc_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE, 626 page->db_rec = dma_zalloc_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
627 &page->mapping, GFP_KERNEL); 627 &page->mapping, GFP_KERNEL);
628 if (!page->db_rec) { 628 if (!page->db_rec) {
629 ret = -ENOMEM; 629 ret = -ENOMEM;
630 goto out; 630 goto out;
631 } 631 }
632 memset(page->db_rec, 0, MTHCA_ICM_PAGE_SIZE);
633 632
634 ret = mthca_MAP_ICM_page(dev, page->mapping, 633 ret = mthca_MAP_ICM_page(dev, page->mapping,
635 mthca_uarc_virt(dev, &dev->driver_uar, i)); 634 mthca_uarc_virt(dev, &dev->driver_uar, i));
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
deleted file mode 100644
index 5fe56e810739..000000000000
--- a/drivers/infiniband/hw/mthca/mthca_user.h
+++ /dev/null
@@ -1,112 +0,0 @@
1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#ifndef MTHCA_USER_H
35#define MTHCA_USER_H
36
37#include <linux/types.h>
38
39/*
40 * Increment this value if any changes that break userspace ABI
41 * compatibility are made.
42 */
43#define MTHCA_UVERBS_ABI_VERSION 1
44
45/*
46 * Make sure that all structs defined in this file remain laid out so
47 * that they pack the same way on 32-bit and 64-bit architectures (to
48 * avoid incompatibility between 32-bit userspace and 64-bit kernels).
49 * In particular do not use pointer types -- pass pointers in __u64
50 * instead.
51 */
52
53struct mthca_alloc_ucontext_resp {
54 __u32 qp_tab_size;
55 __u32 uarc_size;
56};
57
58struct mthca_alloc_pd_resp {
59 __u32 pdn;
60 __u32 reserved;
61};
62
63struct mthca_reg_mr {
64/*
65 * Mark the memory region with a DMA attribute that causes
66 * in-flight DMA to be flushed when the region is written to:
67 */
68#define MTHCA_MR_DMASYNC 0x1
69 __u32 mr_attrs;
70 __u32 reserved;
71};
72
73struct mthca_create_cq {
74 __u32 lkey;
75 __u32 pdn;
76 __u64 arm_db_page;
77 __u64 set_db_page;
78 __u32 arm_db_index;
79 __u32 set_db_index;
80};
81
82struct mthca_create_cq_resp {
83 __u32 cqn;
84 __u32 reserved;
85};
86
87struct mthca_resize_cq {
88 __u32 lkey;
89 __u32 reserved;
90};
91
92struct mthca_create_srq {
93 __u32 lkey;
94 __u32 db_index;
95 __u64 db_page;
96};
97
98struct mthca_create_srq_resp {
99 __u32 srqn;
100 __u32 reserved;
101};
102
103struct mthca_create_qp {
104 __u32 lkey;
105 __u32 reserved;
106 __u64 sq_db_page;
107 __u64 rq_db_page;
108 __u32 sq_db_index;
109 __u32 rq_db_index;
110};
111
112#endif /* MTHCA_USER_H */
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index c56ca2a74df5..6cdfbf8c5674 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1365,7 +1365,7 @@ static int mini_cm_del_listen(struct nes_cm_core *cm_core,
1365static inline int mini_cm_accelerated(struct nes_cm_core *cm_core, 1365static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
1366 struct nes_cm_node *cm_node) 1366 struct nes_cm_node *cm_node)
1367{ 1367{
1368 cm_node->accelerated = 1; 1368 cm_node->accelerated = true;
1369 1369
1370 if (cm_node->accept_pend) { 1370 if (cm_node->accept_pend) {
1371 BUG_ON(!cm_node->listener); 1371 BUG_ON(!cm_node->listener);
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index d827d03e3941..b9cc02b4e8d5 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -279,7 +279,6 @@ struct nes_cm_tcp_context {
279 u8 rcv_wscale; 279 u8 rcv_wscale;
280 280
281 struct nes_cm_tsa_context tsa_cntxt; 281 struct nes_cm_tsa_context tsa_cntxt;
282 struct timeval sent_ts;
283}; 282};
284 283
285 284
@@ -341,7 +340,7 @@ struct nes_cm_node {
341 u16 mpa_frame_size; 340 u16 mpa_frame_size;
342 struct iw_cm_id *cm_id; 341 struct iw_cm_id *cm_id;
343 struct list_head list; 342 struct list_head list;
344 int accelerated; 343 bool accelerated;
345 struct nes_cm_listener *listener; 344 struct nes_cm_listener *listener;
346 enum nes_cm_conn_type conn_type; 345 enum nes_cm_conn_type conn_type;
347 struct nes_vnic *nesvnic; 346 struct nes_vnic *nesvnic;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 0ba695a88b62..9904918589a4 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -380,11 +380,10 @@ static int ocrdma_alloc_q(struct ocrdma_dev *dev,
380 q->len = len; 380 q->len = len;
381 q->entry_size = entry_size; 381 q->entry_size = entry_size;
382 q->size = len * entry_size; 382 q->size = len * entry_size;
383 q->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, q->size, 383 q->va = dma_zalloc_coherent(&dev->nic_info.pdev->dev, q->size,
384 &q->dma, GFP_KERNEL); 384 &q->dma, GFP_KERNEL);
385 if (!q->va) 385 if (!q->va)
386 return -ENOMEM; 386 return -ENOMEM;
387 memset(q->va, 0, q->size);
388 return 0; 387 return 0;
389} 388}
390 389
@@ -1819,12 +1818,11 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
1819 return -ENOMEM; 1818 return -ENOMEM;
1820 ocrdma_init_mch(&cmd->cmd.req, OCRDMA_CMD_CREATE_CQ, 1819 ocrdma_init_mch(&cmd->cmd.req, OCRDMA_CMD_CREATE_CQ,
1821 OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); 1820 OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
1822 cq->va = dma_alloc_coherent(&pdev->dev, cq->len, &cq->pa, GFP_KERNEL); 1821 cq->va = dma_zalloc_coherent(&pdev->dev, cq->len, &cq->pa, GFP_KERNEL);
1823 if (!cq->va) { 1822 if (!cq->va) {
1824 status = -ENOMEM; 1823 status = -ENOMEM;
1825 goto mem_err; 1824 goto mem_err;
1826 } 1825 }
1827 memset(cq->va, 0, cq->len);
1828 page_size = cq->len / hw_pages; 1826 page_size = cq->len / hw_pages;
1829 cmd->cmd.pgsz_pgcnt = (page_size / OCRDMA_MIN_Q_PAGE_SIZE) << 1827 cmd->cmd.pgsz_pgcnt = (page_size / OCRDMA_MIN_Q_PAGE_SIZE) <<
1830 OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT; 1828 OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT;
@@ -2212,10 +2210,9 @@ static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd,
2212 qp->sq.max_cnt = max_wqe_allocated; 2210 qp->sq.max_cnt = max_wqe_allocated;
2213 len = (hw_pages * hw_page_size); 2211 len = (hw_pages * hw_page_size);
2214 2212
2215 qp->sq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL); 2213 qp->sq.va = dma_zalloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
2216 if (!qp->sq.va) 2214 if (!qp->sq.va)
2217 return -EINVAL; 2215 return -EINVAL;
2218 memset(qp->sq.va, 0, len);
2219 qp->sq.len = len; 2216 qp->sq.len = len;
2220 qp->sq.pa = pa; 2217 qp->sq.pa = pa;
2221 qp->sq.entry_size = dev->attr.wqe_size; 2218 qp->sq.entry_size = dev->attr.wqe_size;
@@ -2263,10 +2260,9 @@ static int ocrdma_set_create_qp_rq_cmd(struct ocrdma_create_qp_req *cmd,
2263 qp->rq.max_cnt = max_rqe_allocated; 2260 qp->rq.max_cnt = max_rqe_allocated;
2264 len = (hw_pages * hw_page_size); 2261 len = (hw_pages * hw_page_size);
2265 2262
2266 qp->rq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL); 2263 qp->rq.va = dma_zalloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
2267 if (!qp->rq.va) 2264 if (!qp->rq.va)
2268 return -ENOMEM; 2265 return -ENOMEM;
2269 memset(qp->rq.va, 0, len);
2270 qp->rq.pa = pa; 2266 qp->rq.pa = pa;
2271 qp->rq.len = len; 2267 qp->rq.len = len;
2272 qp->rq.entry_size = dev->attr.rqe_size; 2268 qp->rq.entry_size = dev->attr.rqe_size;
@@ -2320,11 +2316,10 @@ static int ocrdma_set_create_qp_ird_cmd(struct ocrdma_create_qp_req *cmd,
2320 if (dev->attr.ird == 0) 2316 if (dev->attr.ird == 0)
2321 return 0; 2317 return 0;
2322 2318
2323 qp->ird_q_va = dma_alloc_coherent(&pdev->dev, ird_q_len, 2319 qp->ird_q_va = dma_zalloc_coherent(&pdev->dev, ird_q_len, &pa,
2324 &pa, GFP_KERNEL); 2320 GFP_KERNEL);
2325 if (!qp->ird_q_va) 2321 if (!qp->ird_q_va)
2326 return -ENOMEM; 2322 return -ENOMEM;
2327 memset(qp->ird_q_va, 0, ird_q_len);
2328 ocrdma_build_q_pages(&cmd->ird_addr[0], dev->attr.num_ird_pages, 2323 ocrdma_build_q_pages(&cmd->ird_addr[0], dev->attr.num_ird_pages,
2329 pa, ird_page_size); 2324 pa, ird_page_size);
2330 for (; i < ird_q_len / dev->attr.rqe_size; i++) { 2325 for (; i < ird_q_len / dev->attr.rqe_size; i++) {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index e528d7acb7f6..24d20a4aa262 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -73,15 +73,13 @@ bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev)
73 mem->size = max_t(u32, sizeof(struct ocrdma_rdma_stats_req), 73 mem->size = max_t(u32, sizeof(struct ocrdma_rdma_stats_req),
74 sizeof(struct ocrdma_rdma_stats_resp)); 74 sizeof(struct ocrdma_rdma_stats_resp));
75 75
76 mem->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, mem->size, 76 mem->va = dma_zalloc_coherent(&dev->nic_info.pdev->dev, mem->size,
77 &mem->pa, GFP_KERNEL); 77 &mem->pa, GFP_KERNEL);
78 if (!mem->va) { 78 if (!mem->va) {
79 pr_err("%s: stats mbox allocation failed\n", __func__); 79 pr_err("%s: stats mbox allocation failed\n", __func__);
80 return false; 80 return false;
81 } 81 }
82 82
83 memset(mem->va, 0, mem->size);
84
85 /* Alloc debugfs mem */ 83 /* Alloc debugfs mem */
86 mem->debugfs_mem = kzalloc(OCRDMA_MAX_DBGFS_MEM, GFP_KERNEL); 84 mem->debugfs_mem = kzalloc(OCRDMA_MAX_DBGFS_MEM, GFP_KERNEL);
87 if (!mem->debugfs_mem) 85 if (!mem->debugfs_mem)
@@ -834,7 +832,7 @@ void ocrdma_add_port_stats(struct ocrdma_dev *dev)
834 832
835 dev->reset_stats.type = OCRDMA_RESET_STATS; 833 dev->reset_stats.type = OCRDMA_RESET_STATS;
836 dev->reset_stats.dev = dev; 834 dev->reset_stats.dev = dev;
837 if (!debugfs_create_file("reset_stats", S_IRUSR, dev->dir, 835 if (!debugfs_create_file("reset_stats", 0200, dev->dir,
838 &dev->reset_stats, &ocrdma_dbg_ops)) 836 &dev->reset_stats, &ocrdma_dbg_ops))
839 goto err; 837 goto err;
840 838
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 7866fd8051f6..8009bdad4e5b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -461,7 +461,7 @@ retry:
461static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx, 461static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx,
462 struct ocrdma_pd *pd) 462 struct ocrdma_pd *pd)
463{ 463{
464 return (uctx->cntxt_pd == pd ? true : false); 464 return (uctx->cntxt_pd == pd);
465} 465}
466 466
467static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev, 467static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
@@ -550,13 +550,12 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
550 INIT_LIST_HEAD(&ctx->mm_head); 550 INIT_LIST_HEAD(&ctx->mm_head);
551 mutex_init(&ctx->mm_list_lock); 551 mutex_init(&ctx->mm_list_lock);
552 552
553 ctx->ah_tbl.va = dma_alloc_coherent(&pdev->dev, map_len, 553 ctx->ah_tbl.va = dma_zalloc_coherent(&pdev->dev, map_len,
554 &ctx->ah_tbl.pa, GFP_KERNEL); 554 &ctx->ah_tbl.pa, GFP_KERNEL);
555 if (!ctx->ah_tbl.va) { 555 if (!ctx->ah_tbl.va) {
556 kfree(ctx); 556 kfree(ctx);
557 return ERR_PTR(-ENOMEM); 557 return ERR_PTR(-ENOMEM);
558 } 558 }
559 memset(ctx->ah_tbl.va, 0, map_len);
560 ctx->ah_tbl.len = map_len; 559 ctx->ah_tbl.len = map_len;
561 560
562 memset(&resp, 0, sizeof(resp)); 561 memset(&resp, 0, sizeof(resp));
@@ -885,13 +884,12 @@ static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr)
885 return -ENOMEM; 884 return -ENOMEM;
886 885
887 for (i = 0; i < mr->num_pbls; i++) { 886 for (i = 0; i < mr->num_pbls; i++) {
888 va = dma_alloc_coherent(&pdev->dev, dma_len, &pa, GFP_KERNEL); 887 va = dma_zalloc_coherent(&pdev->dev, dma_len, &pa, GFP_KERNEL);
889 if (!va) { 888 if (!va) {
890 ocrdma_free_mr_pbl_tbl(dev, mr); 889 ocrdma_free_mr_pbl_tbl(dev, mr);
891 status = -ENOMEM; 890 status = -ENOMEM;
892 break; 891 break;
893 } 892 }
894 memset(va, 0, dma_len);
895 mr->pbl_table[i].va = va; 893 mr->pbl_table[i].va = va;
896 mr->pbl_table[i].pa = pa; 894 mr->pbl_table[i].pa = pa;
897 } 895 }
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index b26aa88dab48..53f00dbf313f 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -604,12 +604,11 @@ static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
604 return ERR_PTR(-ENOMEM); 604 return ERR_PTR(-ENOMEM);
605 605
606 for (i = 0; i < pbl_info->num_pbls; i++) { 606 for (i = 0; i < pbl_info->num_pbls; i++) {
607 va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size, 607 va = dma_zalloc_coherent(&pdev->dev, pbl_info->pbl_size,
608 &pa, flags); 608 &pa, flags);
609 if (!va) 609 if (!va)
610 goto err; 610 goto err;
611 611
612 memset(va, 0, pbl_info->pbl_size);
613 pbl_table[i].va = va; 612 pbl_table[i].va = va;
614 pbl_table[i].pa = pa; 613 pbl_table[i].pa = pa;
615 } 614 }
@@ -3040,7 +3039,7 @@ static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3040 swqe->wqe_size = 2; 3039 swqe->wqe_size = 2;
3041 swqe2 = qed_chain_produce(&qp->sq.pbl); 3040 swqe2 = qed_chain_produce(&qp->sq.pbl);
3042 3041
3043 swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.imm_data); 3042 swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3044 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2, 3043 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3045 wr, bad_wr); 3044 wr, bad_wr);
3046 swqe->length = cpu_to_le32(length); 3045 swqe->length = cpu_to_le32(length);
@@ -3471,9 +3470,9 @@ static int qedr_poll_cq_req(struct qedr_dev *dev,
3471 break; 3470 break;
3472 case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR: 3471 case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3473 if (qp->state != QED_ROCE_QP_STATE_ERR) 3472 if (qp->state != QED_ROCE_QP_STATE_ERR)
3474 DP_ERR(dev, 3473 DP_DEBUG(dev, QEDR_MSG_CQ,
3475 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n", 3474 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3476 cq->icid, qp->icid); 3475 cq->icid, qp->icid);
3477 cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons, 3476 cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3478 IB_WC_WR_FLUSH_ERR, 1); 3477 IB_WC_WR_FLUSH_ERR, 1);
3479 break; 3478 break;
@@ -3591,7 +3590,7 @@ static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
3591 wc->byte_len = le32_to_cpu(resp->length); 3590 wc->byte_len = le32_to_cpu(resp->length);
3592 3591
3593 if (resp->flags & QEDR_RESP_IMM) { 3592 if (resp->flags & QEDR_RESP_IMM) {
3594 wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key); 3593 wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
3595 wc->wc_flags |= IB_WC_WITH_IMM; 3594 wc->wc_flags |= IB_WC_WITH_IMM;
3596 3595
3597 if (resp->flags & QEDR_RESP_RDMA) 3596 if (resp->flags & QEDR_RESP_RDMA)
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 092ed8103842..0235f76bbc72 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1428,8 +1428,6 @@ u64 qib_sps_ints(void);
1428 */ 1428 */
1429dma_addr_t qib_map_page(struct pci_dev *, struct page *, unsigned long, 1429dma_addr_t qib_map_page(struct pci_dev *, struct page *, unsigned long,
1430 size_t, int); 1430 size_t, int);
1431const char *qib_get_unit_name(int unit);
1432const char *qib_get_card_name(struct rvt_dev_info *rdi);
1433struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi); 1431struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi);
1434 1432
1435/* 1433/*
@@ -1488,15 +1486,15 @@ extern struct mutex qib_mutex;
1488 1486
1489#define qib_dev_err(dd, fmt, ...) \ 1487#define qib_dev_err(dd, fmt, ...) \
1490 dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ 1488 dev_err(&(dd)->pcidev->dev, "%s: " fmt, \
1491 qib_get_unit_name((dd)->unit), ##__VA_ARGS__) 1489 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
1492 1490
1493#define qib_dev_warn(dd, fmt, ...) \ 1491#define qib_dev_warn(dd, fmt, ...) \
1494 dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \ 1492 dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \
1495 qib_get_unit_name((dd)->unit), ##__VA_ARGS__) 1493 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
1496 1494
1497#define qib_dev_porterr(dd, port, fmt, ...) \ 1495#define qib_dev_porterr(dd, port, fmt, ...) \
1498 dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ 1496 dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \
1499 qib_get_unit_name((dd)->unit), (dd)->unit, (port), \ 1497 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), (dd)->unit, (port), \
1500 ##__VA_ARGS__) 1498 ##__VA_ARGS__)
1501 1499
1502#define qib_devinfo(pcidev, fmt, ...) \ 1500#define qib_devinfo(pcidev, fmt, ...) \
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 33d3335385e8..3117cc5f2a9a 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -81,22 +81,6 @@ MODULE_DESCRIPTION("Intel IB driver");
81 81
82struct qlogic_ib_stats qib_stats; 82struct qlogic_ib_stats qib_stats;
83 83
84const char *qib_get_unit_name(int unit)
85{
86 static char iname[16];
87
88 snprintf(iname, sizeof(iname), "infinipath%u", unit);
89 return iname;
90}
91
92const char *qib_get_card_name(struct rvt_dev_info *rdi)
93{
94 struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
95 struct qib_devdata *dd = container_of(ibdev,
96 struct qib_devdata, verbs_dev);
97 return qib_get_unit_name(dd->unit);
98}
99
100struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi) 84struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi)
101{ 85{
102 struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi); 86 struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c
index 33a2e74c8495..5838b3bf34b9 100644
--- a/drivers/infiniband/hw/qib/qib_eeprom.c
+++ b/drivers/infiniband/hw/qib/qib_eeprom.c
@@ -163,8 +163,7 @@ void qib_get_eeprom_info(struct qib_devdata *dd)
163 if (bguid[6] == 0xff) { 163 if (bguid[6] == 0xff) {
164 if (bguid[5] == 0xff) { 164 if (bguid[5] == 0xff) {
165 qib_dev_err(dd, 165 qib_dev_err(dd,
166 "Can't set %s GUID from base, wraps to OUI!\n", 166 "Can't set GUID from base, wraps to OUI!\n");
167 qib_get_unit_name(t));
168 dd->base_guid = 0; 167 dd->base_guid = 0;
169 goto bail; 168 goto bail;
170 } 169 }
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index b67df63bd64b..f7593b5e2b76 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -568,20 +568,16 @@ done:
568static int qib_set_part_key(struct qib_ctxtdata *rcd, u16 key) 568static int qib_set_part_key(struct qib_ctxtdata *rcd, u16 key)
569{ 569{
570 struct qib_pportdata *ppd = rcd->ppd; 570 struct qib_pportdata *ppd = rcd->ppd;
571 int i, any = 0, pidx = -1; 571 int i, pidx = -1;
572 bool any = false;
572 u16 lkey = key & 0x7FFF; 573 u16 lkey = key & 0x7FFF;
573 int ret;
574 574
575 if (lkey == (QIB_DEFAULT_P_KEY & 0x7FFF)) { 575 if (lkey == (QIB_DEFAULT_P_KEY & 0x7FFF))
576 /* nothing to do; this key always valid */ 576 /* nothing to do; this key always valid */
577 ret = 0; 577 return 0;
578 goto bail;
579 }
580 578
581 if (!lkey) { 579 if (!lkey)
582 ret = -EINVAL; 580 return -EINVAL;
583 goto bail;
584 }
585 581
586 /* 582 /*
587 * Set the full membership bit, because it has to be 583 * Set the full membership bit, because it has to be
@@ -594,18 +590,14 @@ static int qib_set_part_key(struct qib_ctxtdata *rcd, u16 key)
594 for (i = 0; i < ARRAY_SIZE(rcd->pkeys); i++) { 590 for (i = 0; i < ARRAY_SIZE(rcd->pkeys); i++) {
595 if (!rcd->pkeys[i] && pidx == -1) 591 if (!rcd->pkeys[i] && pidx == -1)
596 pidx = i; 592 pidx = i;
597 if (rcd->pkeys[i] == key) { 593 if (rcd->pkeys[i] == key)
598 ret = -EEXIST; 594 return -EEXIST;
599 goto bail;
600 }
601 } 595 }
602 if (pidx == -1) { 596 if (pidx == -1)
603 ret = -EBUSY; 597 return -EBUSY;
604 goto bail; 598 for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
605 }
606 for (any = i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
607 if (!ppd->pkeys[i]) { 599 if (!ppd->pkeys[i]) {
608 any++; 600 any = true;
609 continue; 601 continue;
610 } 602 }
611 if (ppd->pkeys[i] == key) { 603 if (ppd->pkeys[i] == key) {
@@ -613,44 +605,34 @@ static int qib_set_part_key(struct qib_ctxtdata *rcd, u16 key)
613 605
614 if (atomic_inc_return(pkrefs) > 1) { 606 if (atomic_inc_return(pkrefs) > 1) {
615 rcd->pkeys[pidx] = key; 607 rcd->pkeys[pidx] = key;
616 ret = 0; 608 return 0;
617 goto bail;
618 } else {
619 /*
620 * lost race, decrement count, catch below
621 */
622 atomic_dec(pkrefs);
623 any++;
624 } 609 }
610 /*
611 * lost race, decrement count, catch below
612 */
613 atomic_dec(pkrefs);
614 any = true;
625 } 615 }
626 if ((ppd->pkeys[i] & 0x7FFF) == lkey) { 616 if ((ppd->pkeys[i] & 0x7FFF) == lkey)
627 /* 617 /*
628 * It makes no sense to have both the limited and 618 * It makes no sense to have both the limited and
629 * full membership PKEY set at the same time since 619 * full membership PKEY set at the same time since
630 * the unlimited one will disable the limited one. 620 * the unlimited one will disable the limited one.
631 */ 621 */
632 ret = -EEXIST; 622 return -EEXIST;
633 goto bail;
634 }
635 }
636 if (!any) {
637 ret = -EBUSY;
638 goto bail;
639 } 623 }
640 for (any = i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) { 624 if (!any)
625 return -EBUSY;
626 for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) {
641 if (!ppd->pkeys[i] && 627 if (!ppd->pkeys[i] &&
642 atomic_inc_return(&ppd->pkeyrefs[i]) == 1) { 628 atomic_inc_return(&ppd->pkeyrefs[i]) == 1) {
643 rcd->pkeys[pidx] = key; 629 rcd->pkeys[pidx] = key;
644 ppd->pkeys[i] = key; 630 ppd->pkeys[i] = key;
645 (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0); 631 (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0);
646 ret = 0; 632 return 0;
647 goto bail;
648 } 633 }
649 } 634 }
650 ret = -EBUSY; 635 return -EBUSY;
651
652bail:
653 return ret;
654} 636}
655 637
656/** 638/**
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 85dfbba427f6..3990f386aa32 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1119,6 +1119,8 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
1119 "Could not allocate unit ID: error %d\n", -ret); 1119 "Could not allocate unit ID: error %d\n", -ret);
1120 goto bail; 1120 goto bail;
1121 } 1121 }
1122 rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s%d", "qib", dd->unit);
1123
1122 dd->int_counter = alloc_percpu(u64); 1124 dd->int_counter = alloc_percpu(u64);
1123 if (!dd->int_counter) { 1125 if (!dd->int_counter) {
1124 ret = -ENOMEM; 1126 ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
deleted file mode 100644
index 8fdf79f8d4e4..000000000000
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ /dev/null
@@ -1,235 +0,0 @@
1/*
2 * Copyright (c) 2006, 2007, 2009 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include "qib.h"
35
36/**
37 * qib_alloc_lkey - allocate an lkey
38 * @mr: memory region that this lkey protects
39 * @dma_region: 0->normal key, 1->restricted DMA key
40 *
41 * Returns 0 if successful, otherwise returns -errno.
42 *
43 * Increments mr reference count as required.
44 *
45 * Sets the lkey field mr for non-dma regions.
46 *
47 */
48
49int qib_alloc_lkey(struct rvt_mregion *mr, int dma_region)
50{
51 unsigned long flags;
52 u32 r;
53 u32 n;
54 int ret = 0;
55 struct qib_ibdev *dev = to_idev(mr->pd->device);
56 struct rvt_lkey_table *rkt = &dev->lk_table;
57
58 spin_lock_irqsave(&rkt->lock, flags);
59
60 /* special case for dma_mr lkey == 0 */
61 if (dma_region) {
62 struct rvt_mregion *tmr;
63
64 tmr = rcu_access_pointer(dev->dma_mr);
65 if (!tmr) {
66 qib_get_mr(mr);
67 rcu_assign_pointer(dev->dma_mr, mr);
68 mr->lkey_published = 1;
69 }
70 goto success;
71 }
72
73 /* Find the next available LKEY */
74 r = rkt->next;
75 n = r;
76 for (;;) {
77 if (rkt->table[r] == NULL)
78 break;
79 r = (r + 1) & (rkt->max - 1);
80 if (r == n)
81 goto bail;
82 }
83 rkt->next = (r + 1) & (rkt->max - 1);
84 /*
85 * Make sure lkey is never zero which is reserved to indicate an
86 * unrestricted LKEY.
87 */
88 rkt->gen++;
89 /*
90 * bits are capped in qib_verbs.c to insure enough bits
91 * for generation number
92 */
93 mr->lkey = (r << (32 - ib_rvt_lkey_table_size)) |
94 ((((1 << (24 - ib_rvt_lkey_table_size)) - 1) & rkt->gen)
95 << 8);
96 if (mr->lkey == 0) {
97 mr->lkey |= 1 << 8;
98 rkt->gen++;
99 }
100 qib_get_mr(mr);
101 rcu_assign_pointer(rkt->table[r], mr);
102 mr->lkey_published = 1;
103success:
104 spin_unlock_irqrestore(&rkt->lock, flags);
105out:
106 return ret;
107bail:
108 spin_unlock_irqrestore(&rkt->lock, flags);
109 ret = -ENOMEM;
110 goto out;
111}
112
113/**
114 * qib_free_lkey - free an lkey
115 * @mr: mr to free from tables
116 */
117void qib_free_lkey(struct rvt_mregion *mr)
118{
119 unsigned long flags;
120 u32 lkey = mr->lkey;
121 u32 r;
122 struct qib_ibdev *dev = to_idev(mr->pd->device);
123 struct rvt_lkey_table *rkt = &dev->lk_table;
124
125 spin_lock_irqsave(&rkt->lock, flags);
126 if (!mr->lkey_published)
127 goto out;
128 if (lkey == 0)
129 RCU_INIT_POINTER(dev->dma_mr, NULL);
130 else {
131 r = lkey >> (32 - ib_rvt_lkey_table_size);
132 RCU_INIT_POINTER(rkt->table[r], NULL);
133 }
134 qib_put_mr(mr);
135 mr->lkey_published = 0;
136out:
137 spin_unlock_irqrestore(&rkt->lock, flags);
138}
139
140/**
141 * qib_rkey_ok - check the IB virtual address, length, and RKEY
142 * @qp: qp for validation
143 * @sge: SGE state
144 * @len: length of data
145 * @vaddr: virtual address to place data
146 * @rkey: rkey to check
147 * @acc: access flags
148 *
149 * Return 1 if successful, otherwise 0.
150 *
151 * increments the reference count upon success
152 */
153int qib_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
154 u32 len, u64 vaddr, u32 rkey, int acc)
155{
156 struct rvt_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
157 struct rvt_mregion *mr;
158 unsigned n, m;
159 size_t off;
160
161 /* We use RKEY == zero for kernel virtual addresses */
162 rcu_read_lock();
163 if (rkey == 0) {
164 struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd);
165 struct qib_ibdev *dev = to_idev(pd->ibpd.device);
166
167 if (pd->user)
168 goto bail;
169 mr = rcu_dereference(dev->dma_mr);
170 if (!mr)
171 goto bail;
172 if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
173 goto bail;
174 rcu_read_unlock();
175
176 sge->mr = mr;
177 sge->vaddr = (void *) vaddr;
178 sge->length = len;
179 sge->sge_length = len;
180 sge->m = 0;
181 sge->n = 0;
182 goto ok;
183 }
184
185 mr = rcu_dereference(
186 rkt->table[(rkey >> (32 - ib_rvt_lkey_table_size))]);
187 if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
188 goto bail;
189
190 off = vaddr - mr->iova;
191 if (unlikely(vaddr < mr->iova || off + len > mr->length ||
192 (mr->access_flags & acc) == 0))
193 goto bail;
194 if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
195 goto bail;
196 rcu_read_unlock();
197
198 off += mr->offset;
199 if (mr->page_shift) {
200 /*
201 page sizes are uniform power of 2 so no loop is necessary
202 entries_spanned_by_off is the number of times the loop below
203 would have executed.
204 */
205 size_t entries_spanned_by_off;
206
207 entries_spanned_by_off = off >> mr->page_shift;
208 off -= (entries_spanned_by_off << mr->page_shift);
209 m = entries_spanned_by_off / RVT_SEGSZ;
210 n = entries_spanned_by_off % RVT_SEGSZ;
211 } else {
212 m = 0;
213 n = 0;
214 while (off >= mr->map[m]->segs[n].length) {
215 off -= mr->map[m]->segs[n].length;
216 n++;
217 if (n >= RVT_SEGSZ) {
218 m++;
219 n = 0;
220 }
221 }
222 }
223 sge->mr = mr;
224 sge->vaddr = mr->map[m]->segs[n].vaddr + off;
225 sge->length = mr->map[m]->segs[n].length - off;
226 sge->sge_length = len;
227 sge->m = m;
228 sge->n = n;
229ok:
230 return 1;
231bail:
232 rcu_read_unlock();
233 return 0;
234}
235
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 1a785c37ad0a..cfddff45413f 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -432,13 +432,13 @@ no_flow_control:
432 qp->s_state = OP(COMPARE_SWAP); 432 qp->s_state = OP(COMPARE_SWAP);
433 put_ib_ateth_swap(wqe->atomic_wr.swap, 433 put_ib_ateth_swap(wqe->atomic_wr.swap,
434 &ohdr->u.atomic_eth); 434 &ohdr->u.atomic_eth);
435 put_ib_ateth_swap(wqe->atomic_wr.compare_add, 435 put_ib_ateth_compare(wqe->atomic_wr.compare_add,
436 &ohdr->u.atomic_eth); 436 &ohdr->u.atomic_eth);
437 } else { 437 } else {
438 qp->s_state = OP(FETCH_ADD); 438 qp->s_state = OP(FETCH_ADD);
439 put_ib_ateth_swap(wqe->atomic_wr.compare_add, 439 put_ib_ateth_swap(wqe->atomic_wr.compare_add,
440 &ohdr->u.atomic_eth); 440 &ohdr->u.atomic_eth);
441 put_ib_ateth_swap(0, &ohdr->u.atomic_eth); 441 put_ib_ateth_compare(0, &ohdr->u.atomic_eth);
442 } 442 }
443 put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr, 443 put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
444 &ohdr->u.atomic_eth); 444 &ohdr->u.atomic_eth);
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index c55000501582..fabee760407e 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1571,7 +1571,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
1571 if (!ib_qib_sys_image_guid) 1571 if (!ib_qib_sys_image_guid)
1572 ib_qib_sys_image_guid = ppd->guid; 1572 ib_qib_sys_image_guid = ppd->guid;
1573 1573
1574 strlcpy(ibdev->name, "qib%d", IB_DEVICE_NAME_MAX);
1575 ibdev->owner = THIS_MODULE; 1574 ibdev->owner = THIS_MODULE;
1576 ibdev->node_guid = ppd->guid; 1575 ibdev->node_guid = ppd->guid;
1577 ibdev->phys_port_cnt = dd->num_pports; 1576 ibdev->phys_port_cnt = dd->num_pports;
@@ -1586,7 +1585,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
1586 * Fill in rvt info object. 1585 * Fill in rvt info object.
1587 */ 1586 */
1588 dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files; 1587 dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files;
1589 dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name;
1590 dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev; 1588 dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev;
1591 dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah; 1589 dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah;
1592 dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe; 1590 dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
index 685ef2293cb8..4210ca14014d 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -45,7 +45,6 @@
45#include "usnic_ib_verbs.h" 45#include "usnic_ib_verbs.h"
46#include "usnic_ib_sysfs.h" 46#include "usnic_ib_sysfs.h"
47#include "usnic_log.h" 47#include "usnic_log.h"
48#include "usnic_ib_sysfs.h"
49 48
50static ssize_t usnic_ib_show_board(struct device *device, 49static ssize_t usnic_ib_show_board(struct device *device,
51 struct device_attribute *attr, 50 struct device_attribute *attr,
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index aa2456a4f9bd..a688a5669168 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -47,7 +47,6 @@
47#include "usnic_log.h" 47#include "usnic_log.h"
48#include "usnic_uiom.h" 48#include "usnic_uiom.h"
49#include "usnic_transport.h" 49#include "usnic_transport.h"
50#include "usnic_ib_verbs.h"
51 50
52#define USNIC_DEFAULT_TRANSPORT USNIC_TRANSPORT_ROCE_CUSTOM 51#define USNIC_DEFAULT_TRANSPORT USNIC_TRANSPORT_ROCE_CUSTOM
53 52
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index 4f7bd3b6a315..44cb1cfba417 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -93,7 +93,7 @@ struct pvrdma_cq {
93 struct pvrdma_page_dir pdir; 93 struct pvrdma_page_dir pdir;
94 u32 cq_handle; 94 u32 cq_handle;
95 bool is_kernel; 95 bool is_kernel;
96 atomic_t refcnt; 96 refcount_t refcnt;
97 struct completion free; 97 struct completion free;
98}; 98};
99 99
@@ -196,7 +196,7 @@ struct pvrdma_qp {
196 u8 state; 196 u8 state;
197 bool is_kernel; 197 bool is_kernel;
198 struct mutex mutex; /* QP state mutex. */ 198 struct mutex mutex; /* QP state mutex. */
199 atomic_t refcnt; 199 refcount_t refcnt;
200 struct completion free; 200 struct completion free;
201}; 201};
202 202
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index e529622cefad..faa9478c14a6 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -132,8 +132,9 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
132 } 132 }
133 133
134 cq->ibcq.cqe = entries; 134 cq->ibcq.cqe = entries;
135 cq->is_kernel = !context;
135 136
136 if (context) { 137 if (!cq->is_kernel) {
137 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { 138 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
138 ret = -EFAULT; 139 ret = -EFAULT;
139 goto err_cq; 140 goto err_cq;
@@ -148,8 +149,6 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
148 149
149 npages = ib_umem_page_count(cq->umem); 150 npages = ib_umem_page_count(cq->umem);
150 } else { 151 } else {
151 cq->is_kernel = true;
152
153 /* One extra page for shared ring state */ 152 /* One extra page for shared ring state */
154 npages = 1 + (entries * sizeof(struct pvrdma_cqe) + 153 npages = 1 + (entries * sizeof(struct pvrdma_cqe) +
155 PAGE_SIZE - 1) / PAGE_SIZE; 154 PAGE_SIZE - 1) / PAGE_SIZE;
@@ -178,7 +177,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
178 else 177 else
179 pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0); 178 pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0);
180 179
181 atomic_set(&cq->refcnt, 1); 180 refcount_set(&cq->refcnt, 1);
182 init_completion(&cq->free); 181 init_completion(&cq->free);
183 spin_lock_init(&cq->cq_lock); 182 spin_lock_init(&cq->cq_lock);
184 183
@@ -202,7 +201,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
202 dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq; 201 dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq;
203 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); 202 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
204 203
205 if (context) { 204 if (!cq->is_kernel) {
206 cq->uar = &(to_vucontext(context)->uar); 205 cq->uar = &(to_vucontext(context)->uar);
207 206
208 /* Copy udata back. */ 207 /* Copy udata back. */
@@ -219,7 +218,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
219err_page_dir: 218err_page_dir:
220 pvrdma_page_dir_cleanup(dev, &cq->pdir); 219 pvrdma_page_dir_cleanup(dev, &cq->pdir);
221err_umem: 220err_umem:
222 if (context) 221 if (!cq->is_kernel)
223 ib_umem_release(cq->umem); 222 ib_umem_release(cq->umem);
224err_cq: 223err_cq:
225 atomic_dec(&dev->num_cqs); 224 atomic_dec(&dev->num_cqs);
@@ -230,7 +229,7 @@ err_cq:
230 229
231static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq) 230static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
232{ 231{
233 if (atomic_dec_and_test(&cq->refcnt)) 232 if (refcount_dec_and_test(&cq->refcnt))
234 complete(&cq->free); 233 complete(&cq->free);
235 wait_for_completion(&cq->free); 234 wait_for_completion(&cq->free);
236 235
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index e92681878c93..d650a9fcde24 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -243,13 +243,13 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
243 mutex_init(&dev->port_mutex); 243 mutex_init(&dev->port_mutex);
244 spin_lock_init(&dev->desc_lock); 244 spin_lock_init(&dev->desc_lock);
245 245
246 dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(void *), 246 dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(struct pvrdma_cq *),
247 GFP_KERNEL); 247 GFP_KERNEL);
248 if (!dev->cq_tbl) 248 if (!dev->cq_tbl)
249 return ret; 249 return ret;
250 spin_lock_init(&dev->cq_tbl_lock); 250 spin_lock_init(&dev->cq_tbl_lock);
251 251
252 dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(void *), 252 dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(struct pvrdma_qp *),
253 GFP_KERNEL); 253 GFP_KERNEL);
254 if (!dev->qp_tbl) 254 if (!dev->qp_tbl)
255 goto err_cq_free; 255 goto err_cq_free;
@@ -333,7 +333,7 @@ static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type)
333 spin_lock_irqsave(&dev->qp_tbl_lock, flags); 333 spin_lock_irqsave(&dev->qp_tbl_lock, flags);
334 qp = dev->qp_tbl[qpn % dev->dsr->caps.max_qp]; 334 qp = dev->qp_tbl[qpn % dev->dsr->caps.max_qp];
335 if (qp) 335 if (qp)
336 atomic_inc(&qp->refcnt); 336 refcount_inc(&qp->refcnt);
337 spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); 337 spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
338 338
339 if (qp && qp->ibqp.event_handler) { 339 if (qp && qp->ibqp.event_handler) {
@@ -346,7 +346,7 @@ static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type)
346 ibqp->event_handler(&e, ibqp->qp_context); 346 ibqp->event_handler(&e, ibqp->qp_context);
347 } 347 }
348 if (qp) { 348 if (qp) {
349 if (atomic_dec_and_test(&qp->refcnt)) 349 if (refcount_dec_and_test(&qp->refcnt))
350 complete(&qp->free); 350 complete(&qp->free);
351 } 351 }
352} 352}
@@ -359,7 +359,7 @@ static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type)
359 spin_lock_irqsave(&dev->cq_tbl_lock, flags); 359 spin_lock_irqsave(&dev->cq_tbl_lock, flags);
360 cq = dev->cq_tbl[cqn % dev->dsr->caps.max_cq]; 360 cq = dev->cq_tbl[cqn % dev->dsr->caps.max_cq];
361 if (cq) 361 if (cq)
362 atomic_inc(&cq->refcnt); 362 refcount_inc(&cq->refcnt);
363 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); 363 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
364 364
365 if (cq && cq->ibcq.event_handler) { 365 if (cq && cq->ibcq.event_handler) {
@@ -372,7 +372,7 @@ static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type)
372 ibcq->event_handler(&e, ibcq->cq_context); 372 ibcq->event_handler(&e, ibcq->cq_context);
373 } 373 }
374 if (cq) { 374 if (cq) {
375 if (atomic_dec_and_test(&cq->refcnt)) 375 if (refcount_dec_and_test(&cq->refcnt))
376 complete(&cq->free); 376 complete(&cq->free);
377 } 377 }
378} 378}
@@ -531,13 +531,13 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id)
531 spin_lock_irqsave(&dev->cq_tbl_lock, flags); 531 spin_lock_irqsave(&dev->cq_tbl_lock, flags);
532 cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq]; 532 cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq];
533 if (cq) 533 if (cq)
534 atomic_inc(&cq->refcnt); 534 refcount_inc(&cq->refcnt);
535 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); 535 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
536 536
537 if (cq && cq->ibcq.comp_handler) 537 if (cq && cq->ibcq.comp_handler)
538 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 538 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
539 if (cq) { 539 if (cq) {
540 if (atomic_dec_and_test(&cq->refcnt)) 540 if (refcount_dec_and_test(&cq->refcnt))
541 complete(&cq->free); 541 complete(&cq->free);
542 } 542 }
543 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots); 543 pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
@@ -882,8 +882,8 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
882 dev_info(&pdev->dev, "device version %d, driver version %d\n", 882 dev_info(&pdev->dev, "device version %d, driver version %d\n",
883 dev->dsr_version, PVRDMA_VERSION); 883 dev->dsr_version, PVRDMA_VERSION);
884 884
885 dev->dsr = dma_alloc_coherent(&pdev->dev, sizeof(*dev->dsr), 885 dev->dsr = dma_zalloc_coherent(&pdev->dev, sizeof(*dev->dsr),
886 &dev->dsrbase, GFP_KERNEL); 886 &dev->dsrbase, GFP_KERNEL);
887 if (!dev->dsr) { 887 if (!dev->dsr) {
888 dev_err(&pdev->dev, "failed to allocate shared region\n"); 888 dev_err(&pdev->dev, "failed to allocate shared region\n");
889 ret = -ENOMEM; 889 ret = -ENOMEM;
@@ -891,7 +891,6 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
891 } 891 }
892 892
893 /* Setup the shared region */ 893 /* Setup the shared region */
894 memset(dev->dsr, 0, sizeof(*dev->dsr));
895 dev->dsr->driver_version = PVRDMA_VERSION; 894 dev->dsr->driver_version = PVRDMA_VERSION;
896 dev->dsr->gos_info.gos_bits = sizeof(void *) == 4 ? 895 dev->dsr->gos_info.gos_bits = sizeof(void *) == 4 ?
897 PVRDMA_GOS_BITS_32 : 896 PVRDMA_GOS_BITS_32 :
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
index 8519f3212e52..fa96fa4fb829 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
@@ -119,10 +119,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
119 union pvrdma_cmd_resp rsp; 119 union pvrdma_cmd_resp rsp;
120 struct pvrdma_cmd_create_mr *cmd = &req.create_mr; 120 struct pvrdma_cmd_create_mr *cmd = &req.create_mr;
121 struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp; 121 struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp;
122 int nchunks;
123 int ret; 122 int ret;
124 int entry;
125 struct scatterlist *sg;
126 123
127 if (length == 0 || length > dev->dsr->caps.max_mr_size) { 124 if (length == 0 || length > dev->dsr->caps.max_mr_size) {
128 dev_warn(&dev->pdev->dev, "invalid mem region length\n"); 125 dev_warn(&dev->pdev->dev, "invalid mem region length\n");
@@ -137,13 +134,9 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
137 return ERR_CAST(umem); 134 return ERR_CAST(umem);
138 } 135 }
139 136
140 nchunks = 0; 137 if (umem->npages < 0 || umem->npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
141 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry)
142 nchunks += sg_dma_len(sg) >> PAGE_SHIFT;
143
144 if (nchunks < 0 || nchunks > PVRDMA_PAGE_DIR_MAX_PAGES) {
145 dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n", 138 dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n",
146 nchunks); 139 umem->npages);
147 ret = -EINVAL; 140 ret = -EINVAL;
148 goto err_umem; 141 goto err_umem;
149 } 142 }
@@ -158,7 +151,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
158 mr->mmr.size = length; 151 mr->mmr.size = length;
159 mr->umem = umem; 152 mr->umem = umem;
160 153
161 ret = pvrdma_page_dir_init(dev, &mr->pdir, nchunks, false); 154 ret = pvrdma_page_dir_init(dev, &mr->pdir, umem->npages, false);
162 if (ret) { 155 if (ret) {
163 dev_warn(&dev->pdev->dev, 156 dev_warn(&dev->pdev->dev,
164 "could not allocate page directory\n"); 157 "could not allocate page directory\n");
@@ -175,7 +168,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
175 cmd->length = length; 168 cmd->length = length;
176 cmd->pd_handle = to_vpd(pd)->pd_handle; 169 cmd->pd_handle = to_vpd(pd)->pd_handle;
177 cmd->access_flags = access_flags; 170 cmd->access_flags = access_flags;
178 cmd->nchunks = nchunks; 171 cmd->nchunks = umem->npages;
179 cmd->pdir_dma = mr->pdir.dir_dma; 172 cmd->pdir_dma = mr->pdir.dir_dma;
180 173
181 ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP); 174 ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 4059308e1454..7bf518bdbf21 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -245,12 +245,13 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
245 spin_lock_init(&qp->sq.lock); 245 spin_lock_init(&qp->sq.lock);
246 spin_lock_init(&qp->rq.lock); 246 spin_lock_init(&qp->rq.lock);
247 mutex_init(&qp->mutex); 247 mutex_init(&qp->mutex);
248 atomic_set(&qp->refcnt, 1); 248 refcount_set(&qp->refcnt, 1);
249 init_completion(&qp->free); 249 init_completion(&qp->free);
250 250
251 qp->state = IB_QPS_RESET; 251 qp->state = IB_QPS_RESET;
252 qp->is_kernel = !(pd->uobject && udata);
252 253
253 if (pd->uobject && udata) { 254 if (!qp->is_kernel) {
254 dev_dbg(&dev->pdev->dev, 255 dev_dbg(&dev->pdev->dev,
255 "create queuepair from user space\n"); 256 "create queuepair from user space\n");
256 257
@@ -291,8 +292,6 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
291 qp->npages_recv = 0; 292 qp->npages_recv = 0;
292 qp->npages = qp->npages_send + qp->npages_recv; 293 qp->npages = qp->npages_send + qp->npages_recv;
293 } else { 294 } else {
294 qp->is_kernel = true;
295
296 ret = pvrdma_set_sq_size(to_vdev(pd->device), 295 ret = pvrdma_set_sq_size(to_vdev(pd->device),
297 &init_attr->cap, qp); 296 &init_attr->cap, qp);
298 if (ret) 297 if (ret)
@@ -394,7 +393,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
394err_pdir: 393err_pdir:
395 pvrdma_page_dir_cleanup(dev, &qp->pdir); 394 pvrdma_page_dir_cleanup(dev, &qp->pdir);
396err_umem: 395err_umem:
397 if (pd->uobject && udata) { 396 if (!qp->is_kernel) {
398 if (qp->rumem) 397 if (qp->rumem)
399 ib_umem_release(qp->rumem); 398 ib_umem_release(qp->rumem);
400 if (qp->sumem) 399 if (qp->sumem)
@@ -428,7 +427,7 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp)
428 427
429 pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); 428 pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
430 429
431 if (atomic_dec_and_test(&qp->refcnt)) 430 if (refcount_dec_and_test(&qp->refcnt))
432 complete(&qp->free); 431 complete(&qp->free);
433 wait_for_completion(&qp->free); 432 wait_for_completion(&qp->free);
434 433
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 97d71e49c092..fb52b669bfce 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -56,7 +56,7 @@
56 * rvt_cq_enter - add a new entry to the completion queue 56 * rvt_cq_enter - add a new entry to the completion queue
57 * @cq: completion queue 57 * @cq: completion queue
58 * @entry: work completion entry to add 58 * @entry: work completion entry to add
59 * @sig: true if @entry is solicited 59 * @solicited: true if @entry is solicited
60 * 60 *
61 * This may be called with qp->s_lock held. 61 * This may be called with qp->s_lock held.
62 */ 62 */
@@ -101,8 +101,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
101 wc->uqueue[head].opcode = entry->opcode; 101 wc->uqueue[head].opcode = entry->opcode;
102 wc->uqueue[head].vendor_err = entry->vendor_err; 102 wc->uqueue[head].vendor_err = entry->vendor_err;
103 wc->uqueue[head].byte_len = entry->byte_len; 103 wc->uqueue[head].byte_len = entry->byte_len;
104 wc->uqueue[head].ex.imm_data = 104 wc->uqueue[head].ex.imm_data = entry->ex.imm_data;
105 (__u32 __force)entry->ex.imm_data;
106 wc->uqueue[head].qp_num = entry->qp->qp_num; 105 wc->uqueue[head].qp_num = entry->qp->qp_num;
107 wc->uqueue[head].src_qp = entry->src_qp; 106 wc->uqueue[head].src_qp = entry->src_qp;
108 wc->uqueue[head].wc_flags = entry->wc_flags; 107 wc->uqueue[head].wc_flags = entry->wc_flags;
@@ -198,7 +197,7 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
198 return ERR_PTR(-EINVAL); 197 return ERR_PTR(-EINVAL);
199 198
200 /* Allocate the completion queue structure. */ 199 /* Allocate the completion queue structure. */
201 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 200 cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, rdi->dparms.node);
202 if (!cq) 201 if (!cq)
203 return ERR_PTR(-ENOMEM); 202 return ERR_PTR(-ENOMEM);
204 203
@@ -214,7 +213,9 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
214 sz += sizeof(struct ib_uverbs_wc) * (entries + 1); 213 sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
215 else 214 else
216 sz += sizeof(struct ib_wc) * (entries + 1); 215 sz += sizeof(struct ib_wc) * (entries + 1);
217 wc = vmalloc_user(sz); 216 wc = udata ?
217 vmalloc_user(sz) :
218 vzalloc_node(sz, rdi->dparms.node);
218 if (!wc) { 219 if (!wc) {
219 ret = ERR_PTR(-ENOMEM); 220 ret = ERR_PTR(-ENOMEM);
220 goto bail_cq; 221 goto bail_cq;
@@ -369,7 +370,9 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
369 sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); 370 sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
370 else 371 else
371 sz += sizeof(struct ib_wc) * (cqe + 1); 372 sz += sizeof(struct ib_wc) * (cqe + 1);
372 wc = vmalloc_user(sz); 373 wc = udata ?
374 vmalloc_user(sz) :
375 vzalloc_node(sz, rdi->dparms.node);
373 if (!wc) 376 if (!wc)
374 return -ENOMEM; 377 return -ENOMEM;
375 378
diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c
index b3a38c5e4cad..dd11c6fcd060 100644
--- a/drivers/infiniband/sw/rdmavt/mcast.c
+++ b/drivers/infiniband/sw/rdmavt/mcast.c
@@ -272,7 +272,7 @@ bail:
272/** 272/**
273 * rvt_attach_mcast - attach a qp to a multicast group 273 * rvt_attach_mcast - attach a qp to a multicast group
274 * @ibqp: Infiniband qp 274 * @ibqp: Infiniband qp
275 * @igd: multicast guid 275 * @gid: multicast guid
276 * @lid: multicast lid 276 * @lid: multicast lid
277 * 277 *
278 * Return: 0 on success 278 * Return: 0 on success
@@ -335,7 +335,7 @@ bail_mcast:
335/** 335/**
336 * rvt_detach_mcast - remove a qp from a multicast group 336 * rvt_detach_mcast - remove a qp from a multicast group
337 * @ibqp: Infiniband qp 337 * @ibqp: Infiniband qp
338 * @igd: multicast guid 338 * @gid: multicast guid
339 * @lid: multicast lid 339 * @lid: multicast lid
340 * 340 *
341 * Return: 0 on success 341 * Return: 0 on success
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 42713511b53b..1b2e5362a3ff 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -768,7 +768,7 @@ bail:
768 768
769/** 769/**
770 * rvt_map_phys_fmr - set up a fast memory region 770 * rvt_map_phys_fmr - set up a fast memory region
771 * @ibmfr: the fast memory region to set up 771 * @ibfmr: the fast memory region to set up
772 * @page_list: the list of pages to associate with the fast memory region 772 * @page_list: the list of pages to associate with the fast memory region
773 * @list_len: the number of pages to associate with the fast memory region 773 * @list_len: the number of pages to associate with the fast memory region
774 * @iova: the virtual address of the start of the fast memory region 774 * @iova: the virtual address of the start of the fast memory region
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index eae84c216e2f..c82e6bb3d77c 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -269,7 +269,7 @@ no_qp_table:
269 269
270/** 270/**
271 * free_all_qps - check for QPs still in use 271 * free_all_qps - check for QPs still in use
272 * @qpt: the QP table to empty 272 * @rdi: rvt device info structure
273 * 273 *
274 * There should not be any QPs still in use. 274 * There should not be any QPs still in use.
275 * Free memory for table. 275 * Free memory for table.
@@ -335,9 +335,9 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
335/** 335/**
336 * alloc_qpn - Allocate the next available qpn or zero/one for QP type 336 * alloc_qpn - Allocate the next available qpn or zero/one for QP type
337 * IB_QPT_SMI/IB_QPT_GSI 337 * IB_QPT_SMI/IB_QPT_GSI
338 *@rdi: rvt device info structure 338 * @rdi: rvt device info structure
339 *@qpt: queue pair number table pointer 339 * @qpt: queue pair number table pointer
340 *@port_num: IB port number, 1 based, comes from core 340 * @port_num: IB port number, 1 based, comes from core
341 * 341 *
342 * Return: The queue pair number 342 * Return: The queue pair number
343 */ 343 */
@@ -1650,9 +1650,9 @@ static inline int rvt_qp_valid_operation(
1650 1650
1651/** 1651/**
1652 * rvt_qp_is_avail - determine queue capacity 1652 * rvt_qp_is_avail - determine queue capacity
1653 * @qp - the qp 1653 * @qp: the qp
1654 * @rdi - the rdmavt device 1654 * @rdi: the rdmavt device
1655 * @reserved_op - is reserved operation 1655 * @reserved_op: is reserved operation
1656 * 1656 *
1657 * This assumes the s_hlock is held but the s_last 1657 * This assumes the s_hlock is held but the s_last
1658 * qp variable is uncontrolled. 1658 * qp variable is uncontrolled.
@@ -2074,6 +2074,7 @@ void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth)
2074 lockdep_assert_held(&qp->s_lock); 2074 lockdep_assert_held(&qp->s_lock);
2075 qp->s_flags |= RVT_S_WAIT_RNR; 2075 qp->s_flags |= RVT_S_WAIT_RNR;
2076 to = rvt_aeth_to_usec(aeth); 2076 to = rvt_aeth_to_usec(aeth);
2077 trace_rvt_rnrnak_add(qp, to);
2077 hrtimer_start(&qp->s_rnr_timer, 2078 hrtimer_start(&qp->s_rnr_timer,
2078 ns_to_ktime(1000 * to), HRTIMER_MODE_REL); 2079 ns_to_ktime(1000 * to), HRTIMER_MODE_REL);
2079} 2080}
@@ -2103,17 +2104,14 @@ EXPORT_SYMBOL(rvt_stop_rc_timers);
2103 * stop an rnr timer and return if the timer 2104 * stop an rnr timer and return if the timer
2104 * had been pending. 2105 * had been pending.
2105 */ 2106 */
2106static int rvt_stop_rnr_timer(struct rvt_qp *qp) 2107static void rvt_stop_rnr_timer(struct rvt_qp *qp)
2107{ 2108{
2108 int rval = 0;
2109
2110 lockdep_assert_held(&qp->s_lock); 2109 lockdep_assert_held(&qp->s_lock);
2111 /* Remove QP from rnr timer */ 2110 /* Remove QP from rnr timer */
2112 if (qp->s_flags & RVT_S_WAIT_RNR) { 2111 if (qp->s_flags & RVT_S_WAIT_RNR) {
2113 qp->s_flags &= ~RVT_S_WAIT_RNR; 2112 qp->s_flags &= ~RVT_S_WAIT_RNR;
2114 rval = hrtimer_try_to_cancel(&qp->s_rnr_timer); 2113 trace_rvt_rnrnak_stop(qp, 0);
2115 } 2114 }
2116 return rval;
2117} 2115}
2118 2116
2119/** 2117/**
@@ -2166,6 +2164,7 @@ enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t)
2166 2164
2167 spin_lock_irqsave(&qp->s_lock, flags); 2165 spin_lock_irqsave(&qp->s_lock, flags);
2168 rvt_stop_rnr_timer(qp); 2166 rvt_stop_rnr_timer(qp);
2167 trace_rvt_rnrnak_timeout(qp, 0);
2169 rdi->driver_f.schedule_send(qp); 2168 rdi->driver_f.schedule_send(qp);
2170 spin_unlock_irqrestore(&qp->s_lock, flags); 2169 spin_unlock_irqrestore(&qp->s_lock, flags);
2171 return HRTIMER_NORESTART; 2170 return HRTIMER_NORESTART;
@@ -2174,8 +2173,8 @@ EXPORT_SYMBOL(rvt_rc_rnr_retry);
2174 2173
2175/** 2174/**
2176 * rvt_qp_iter_init - initial for QP iteration 2175 * rvt_qp_iter_init - initial for QP iteration
2177 * @rdi - rvt devinfo 2176 * @rdi: rvt devinfo
2178 * @v - u64 value 2177 * @v: u64 value
2179 * 2178 *
2180 * This returns an iterator suitable for iterating QPs 2179 * This returns an iterator suitable for iterating QPs
2181 * in the system. 2180 * in the system.
diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c
index f7c48e9023de..3707952b4364 100644
--- a/drivers/infiniband/sw/rdmavt/srq.c
+++ b/drivers/infiniband/sw/rdmavt/srq.c
@@ -90,7 +90,7 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
90 srq_init_attr->attr.max_wr > dev->dparms.props.max_srq_wr) 90 srq_init_attr->attr.max_wr > dev->dparms.props.max_srq_wr)
91 return ERR_PTR(-EINVAL); 91 return ERR_PTR(-EINVAL);
92 92
93 srq = kmalloc(sizeof(*srq), GFP_KERNEL); 93 srq = kzalloc_node(sizeof(*srq), GFP_KERNEL, dev->dparms.node);
94 if (!srq) 94 if (!srq)
95 return ERR_PTR(-ENOMEM); 95 return ERR_PTR(-ENOMEM);
96 96
@@ -101,7 +101,10 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
101 srq->rq.max_sge = srq_init_attr->attr.max_sge; 101 srq->rq.max_sge = srq_init_attr->attr.max_sge;
102 sz = sizeof(struct ib_sge) * srq->rq.max_sge + 102 sz = sizeof(struct ib_sge) * srq->rq.max_sge +
103 sizeof(struct rvt_rwqe); 103 sizeof(struct rvt_rwqe);
104 srq->rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz); 104 srq->rq.wq = udata ?
105 vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz) :
106 vzalloc_node(sizeof(struct rvt_rwq) + srq->rq.size * sz,
107 dev->dparms.node);
105 if (!srq->rq.wq) { 108 if (!srq->rq.wq) {
106 ret = ERR_PTR(-ENOMEM); 109 ret = ERR_PTR(-ENOMEM);
107 goto bail_srq; 110 goto bail_srq;
@@ -129,16 +132,12 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
129 ret = ERR_PTR(err); 132 ret = ERR_PTR(err);
130 goto bail_ip; 133 goto bail_ip;
131 } 134 }
132 } else {
133 srq->ip = NULL;
134 } 135 }
135 136
136 /* 137 /*
137 * ib_create_srq() will initialize srq->ibsrq. 138 * ib_create_srq() will initialize srq->ibsrq.
138 */ 139 */
139 spin_lock_init(&srq->rq.lock); 140 spin_lock_init(&srq->rq.lock);
140 srq->rq.wq->head = 0;
141 srq->rq.wq->tail = 0;
142 srq->limit = srq_init_attr->attr.srq_limit; 141 srq->limit = srq_init_attr->attr.srq_limit;
143 142
144 spin_lock(&dev->n_srqs_lock); 143 spin_lock(&dev->n_srqs_lock);
@@ -200,7 +199,10 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
200 sz = sizeof(struct rvt_rwqe) + 199 sz = sizeof(struct rvt_rwqe) +
201 srq->rq.max_sge * sizeof(struct ib_sge); 200 srq->rq.max_sge * sizeof(struct ib_sge);
202 size = attr->max_wr + 1; 201 size = attr->max_wr + 1;
203 wq = vmalloc_user(sizeof(struct rvt_rwq) + size * sz); 202 wq = udata ?
203 vmalloc_user(sizeof(struct rvt_rwq) + size * sz) :
204 vzalloc_node(sizeof(struct rvt_rwq) + size * sz,
205 dev->dparms.node);
204 if (!wq) 206 if (!wq)
205 return -ENOMEM; 207 return -ENOMEM;
206 208
diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h
index bb4b1e710f22..36ddbd291ee0 100644
--- a/drivers/infiniband/sw/rdmavt/trace.h
+++ b/drivers/infiniband/sw/rdmavt/trace.h
@@ -45,8 +45,8 @@
45 * 45 *
46 */ 46 */
47 47
48#define RDI_DEV_ENTRY(rdi) __string(dev, rdi->driver_f.get_card_name(rdi)) 48#define RDI_DEV_ENTRY(rdi) __string(dev, rvt_get_ibdev_name(rdi))
49#define RDI_DEV_ASSIGN(rdi) __assign_str(dev, rdi->driver_f.get_card_name(rdi)) 49#define RDI_DEV_ASSIGN(rdi) __assign_str(dev, rvt_get_ibdev_name(rdi))
50 50
51#include "trace_rvt.h" 51#include "trace_rvt.h"
52#include "trace_qp.h" 52#include "trace_qp.h"
diff --git a/drivers/infiniband/sw/rdmavt/trace_qp.h b/drivers/infiniband/sw/rdmavt/trace_qp.h
index 4c77a3119bda..efc9d814b032 100644
--- a/drivers/infiniband/sw/rdmavt/trace_qp.h
+++ b/drivers/infiniband/sw/rdmavt/trace_qp.h
@@ -85,6 +85,48 @@ DEFINE_EVENT(rvt_qphash_template, rvt_qpremove,
85 TP_PROTO(struct rvt_qp *qp, u32 bucket), 85 TP_PROTO(struct rvt_qp *qp, u32 bucket),
86 TP_ARGS(qp, bucket)); 86 TP_ARGS(qp, bucket));
87 87
88DECLARE_EVENT_CLASS(
89 rvt_rnrnak_template,
90 TP_PROTO(struct rvt_qp *qp, u32 to),
91 TP_ARGS(qp, to),
92 TP_STRUCT__entry(
93 RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
94 __field(u32, qpn)
95 __field(void *, hrtimer)
96 __field(u32, s_flags)
97 __field(u32, to)
98 ),
99 TP_fast_assign(
100 RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
101 __entry->qpn = qp->ibqp.qp_num;
102 __entry->hrtimer = &qp->s_rnr_timer;
103 __entry->s_flags = qp->s_flags;
104 __entry->to = to;
105 ),
106 TP_printk(
107 "[%s] qpn 0x%x hrtimer 0x%p s_flags 0x%x timeout %u us",
108 __get_str(dev),
109 __entry->qpn,
110 __entry->hrtimer,
111 __entry->s_flags,
112 __entry->to
113 )
114);
115
116DEFINE_EVENT(
117 rvt_rnrnak_template, rvt_rnrnak_add,
118 TP_PROTO(struct rvt_qp *qp, u32 to),
119 TP_ARGS(qp, to));
120
121DEFINE_EVENT(
122 rvt_rnrnak_template, rvt_rnrnak_timeout,
123 TP_PROTO(struct rvt_qp *qp, u32 to),
124 TP_ARGS(qp, to));
125
126DEFINE_EVENT(
127 rvt_rnrnak_template, rvt_rnrnak_stop,
128 TP_PROTO(struct rvt_qp *qp, u32 to),
129 TP_ARGS(qp, to));
88 130
89#endif /* __RVT_TRACE_QP_H */ 131#endif /* __RVT_TRACE_QP_H */
90 132
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 64bdd442078a..a4553b2b3696 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -224,7 +224,8 @@ static int rvt_modify_port(struct ib_device *ibdev, u8 port_num,
224 * rvt_query_pkey - Return a pkey from the table at a given index 224 * rvt_query_pkey - Return a pkey from the table at a given index
225 * @ibdev: Verbs IB dev 225 * @ibdev: Verbs IB dev
226 * @port_num: Port number, 1 based from ib core 226 * @port_num: Port number, 1 based from ib core
227 * @intex: Index into pkey table 227 * @index: Index into pkey table
228 * @pkey: returned pkey from the port pkey table
228 * 229 *
229 * Return: 0 on failure pkey otherwise 230 * Return: 0 on failure pkey otherwise
230 */ 231 */
@@ -255,7 +256,7 @@ static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index,
255 * rvt_query_gid - Return a gid from the table 256 * rvt_query_gid - Return a gid from the table
256 * @ibdev: Verbs IB dev 257 * @ibdev: Verbs IB dev
257 * @port_num: Port number, 1 based from ib core 258 * @port_num: Port number, 1 based from ib core
258 * @index: = Index in table 259 * @guid_index: Index in table
259 * @gid: Gid to return 260 * @gid: Gid to return
260 * 261 *
261 * Return: 0 on success 262 * Return: 0 on success
@@ -297,8 +298,8 @@ static inline struct rvt_ucontext *to_iucontext(struct ib_ucontext
297 298
298/** 299/**
299 * rvt_alloc_ucontext - Allocate a user context 300 * rvt_alloc_ucontext - Allocate a user context
300 * @ibdev: Vers IB dev 301 * @ibdev: Verbs IB dev
301 * @data: User data allocated 302 * @udata: User data allocated
302 */ 303 */
303static struct ib_ucontext *rvt_alloc_ucontext(struct ib_device *ibdev, 304static struct ib_ucontext *rvt_alloc_ucontext(struct ib_device *ibdev,
304 struct ib_udata *udata) 305 struct ib_udata *udata)
@@ -413,7 +414,6 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
413 * required for rdmavt to function. 414 * required for rdmavt to function.
414 */ 415 */
415 if ((!rdi->driver_f.port_callback) || 416 if ((!rdi->driver_f.port_callback) ||
416 (!rdi->driver_f.get_card_name) ||
417 (!rdi->driver_f.get_pci_dev)) 417 (!rdi->driver_f.get_pci_dev))
418 return -EINVAL; 418 return -EINVAL;
419 break; 419 break;
diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h
index f363505312be..8823b2e7aac6 100644
--- a/drivers/infiniband/sw/rdmavt/vt.h
+++ b/drivers/infiniband/sw/rdmavt/vt.h
@@ -63,19 +63,19 @@
63 63
64#define rvt_pr_info(rdi, fmt, ...) \ 64#define rvt_pr_info(rdi, fmt, ...) \
65 __rvt_pr_info(rdi->driver_f.get_pci_dev(rdi), \ 65 __rvt_pr_info(rdi->driver_f.get_pci_dev(rdi), \
66 rdi->driver_f.get_card_name(rdi), \ 66 rvt_get_ibdev_name(rdi), \
67 fmt, \ 67 fmt, \
68 ##__VA_ARGS__) 68 ##__VA_ARGS__)
69 69
70#define rvt_pr_warn(rdi, fmt, ...) \ 70#define rvt_pr_warn(rdi, fmt, ...) \
71 __rvt_pr_warn(rdi->driver_f.get_pci_dev(rdi), \ 71 __rvt_pr_warn(rdi->driver_f.get_pci_dev(rdi), \
72 rdi->driver_f.get_card_name(rdi), \ 72 rvt_get_ibdev_name(rdi), \
73 fmt, \ 73 fmt, \
74 ##__VA_ARGS__) 74 ##__VA_ARGS__)
75 75
76#define rvt_pr_err(rdi, fmt, ...) \ 76#define rvt_pr_err(rdi, fmt, ...) \
77 __rvt_pr_err(rdi->driver_f.get_pci_dev(rdi), \ 77 __rvt_pr_err(rdi->driver_f.get_pci_dev(rdi), \
78 rdi->driver_f.get_card_name(rdi), \ 78 rvt_get_ibdev_name(rdi), \
79 fmt, \ 79 fmt, \
80 ##__VA_ARGS__) 80 ##__VA_ARGS__)
81 81
diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig
index 320bffc980d8..bad4a576d7cf 100644
--- a/drivers/infiniband/sw/rxe/Kconfig
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -1,8 +1,8 @@
1config RDMA_RXE 1config RDMA_RXE
2 tristate "Software RDMA over Ethernet (RoCE) driver" 2 tristate "Software RDMA over Ethernet (RoCE) driver"
3 depends on INET && PCI && INFINIBAND 3 depends on INET && PCI && INFINIBAND
4 depends on NET_UDP_TUNNEL 4 select NET_UDP_TUNNEL
5 depends on CRYPTO_CRC32 5 select CRYPTO_CRC32
6 select DMA_VIRT_OPS 6 select DMA_VIRT_OPS
7 ---help--- 7 ---help---
8 This driver implements the InfiniBand RDMA transport over 8 This driver implements the InfiniBand RDMA transport over
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 8c3d30b3092d..b7debb6f2eac 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -77,12 +77,6 @@ void rxe_release(struct kref *kref)
77 ib_dealloc_device(&rxe->ib_dev); 77 ib_dealloc_device(&rxe->ib_dev);
78} 78}
79 79
80void rxe_dev_put(struct rxe_dev *rxe)
81{
82 kref_put(&rxe->ref_cnt, rxe_release);
83}
84EXPORT_SYMBOL_GPL(rxe_dev_put);
85
86/* initialize rxe device parameters */ 80/* initialize rxe device parameters */
87static int rxe_init_device_param(struct rxe_dev *rxe) 81static int rxe_init_device_param(struct rxe_dev *rxe)
88{ 82{
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 6447d736d5a4..7d232611303f 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -57,6 +57,7 @@
57#include "rxe_hdr.h" 57#include "rxe_hdr.h"
58#include "rxe_param.h" 58#include "rxe_param.h"
59#include "rxe_verbs.h" 59#include "rxe_verbs.h"
60#include "rxe_loc.h"
60 61
61#define RXE_UVERBS_ABI_VERSION (1) 62#define RXE_UVERBS_ABI_VERSION (1)
62 63
@@ -95,7 +96,10 @@ void rxe_remove_all(void);
95 96
96int rxe_rcv(struct sk_buff *skb); 97int rxe_rcv(struct sk_buff *skb);
97 98
98void rxe_dev_put(struct rxe_dev *rxe); 99static inline void rxe_dev_put(struct rxe_dev *rxe)
100{
101 kref_put(&rxe->ref_cnt, rxe_release);
102}
99struct rxe_dev *net_to_rxe(struct net_device *ndev); 103struct rxe_dev *net_to_rxe(struct net_device *ndev);
100struct rxe_dev *get_rxe_by_name(const char *name); 104struct rxe_dev *get_rxe_by_name(const char *name);
101 105
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index d7472a442a2c..96c3a6c5c4b5 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -237,7 +237,6 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
237 237
238void rxe_release(struct kref *kref); 238void rxe_release(struct kref *kref);
239 239
240void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify);
241int rxe_completer(void *arg); 240int rxe_completer(void *arg);
242int rxe_requester(void *arg); 241int rxe_requester(void *arg);
243int rxe_responder(void *arg); 242int rxe_responder(void *arg);
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 59dee10bebcb..159246b03867 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -82,7 +82,7 @@ struct rxe_dev *get_rxe_by_name(const char *name)
82} 82}
83 83
84 84
85struct rxe_recv_sockets recv_sockets; 85static struct rxe_recv_sockets recv_sockets;
86 86
87struct device *rxe_dma_device(struct rxe_dev *rxe) 87struct device *rxe_dma_device(struct rxe_dev *rxe)
88{ 88{
@@ -452,31 +452,26 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb)
452 452
453int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb) 453int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb)
454{ 454{
455 struct sk_buff *nskb;
456 struct rxe_av *av; 455 struct rxe_av *av;
457 int err; 456 int err;
458 457
459 av = rxe_get_av(pkt); 458 av = rxe_get_av(pkt);
460 459
461 nskb = skb_clone(skb, GFP_ATOMIC); 460 skb->destructor = rxe_skb_tx_dtor;
462 if (!nskb) 461 skb->sk = pkt->qp->sk->sk;
463 return -ENOMEM;
464
465 nskb->destructor = rxe_skb_tx_dtor;
466 nskb->sk = pkt->qp->sk->sk;
467 462
468 rxe_add_ref(pkt->qp); 463 rxe_add_ref(pkt->qp);
469 atomic_inc(&pkt->qp->skb_out); 464 atomic_inc(&pkt->qp->skb_out);
470 465
471 if (av->network_type == RDMA_NETWORK_IPV4) { 466 if (av->network_type == RDMA_NETWORK_IPV4) {
472 err = ip_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb); 467 err = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
473 } else if (av->network_type == RDMA_NETWORK_IPV6) { 468 } else if (av->network_type == RDMA_NETWORK_IPV6) {
474 err = ip6_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb); 469 err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
475 } else { 470 } else {
476 pr_err("Unknown layer 3 protocol: %d\n", av->network_type); 471 pr_err("Unknown layer 3 protocol: %d\n", av->network_type);
477 atomic_dec(&pkt->qp->skb_out); 472 atomic_dec(&pkt->qp->skb_out);
478 rxe_drop_ref(pkt->qp); 473 rxe_drop_ref(pkt->qp);
479 kfree_skb(nskb); 474 kfree_skb(skb);
480 return -EINVAL; 475 return -EINVAL;
481 } 476 }
482 477
@@ -485,7 +480,6 @@ int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb)
485 return -EAGAIN; 480 return -EAGAIN;
486 } 481 }
487 482
488 kfree_skb(skb);
489 return 0; 483 return 0;
490} 484}
491 485
diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h
index 1c06b3bfe1b6..728d8c71b36a 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.h
+++ b/drivers/infiniband/sw/rxe/rxe_net.h
@@ -43,7 +43,6 @@ struct rxe_recv_sockets {
43 struct socket *sk6; 43 struct socket *sk6;
44}; 44};
45 45
46extern struct rxe_recv_sockets recv_sockets;
47extern struct notifier_block rxe_net_notifier; 46extern struct notifier_block rxe_net_notifier;
48void rxe_release_udp_tunnel(struct socket *sk); 47void rxe_release_udp_tunnel(struct socket *sk);
49 48
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 4469592b839d..137d6c0c49d4 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -824,9 +824,9 @@ void rxe_qp_destroy(struct rxe_qp *qp)
824} 824}
825 825
826/* called when the last reference to the qp is dropped */ 826/* called when the last reference to the qp is dropped */
827void rxe_qp_cleanup(struct rxe_pool_entry *arg) 827static void rxe_qp_do_cleanup(struct work_struct *work)
828{ 828{
829 struct rxe_qp *qp = container_of(arg, typeof(*qp), pelem); 829 struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work);
830 830
831 rxe_drop_all_mcast_groups(qp); 831 rxe_drop_all_mcast_groups(qp);
832 832
@@ -859,3 +859,11 @@ void rxe_qp_cleanup(struct rxe_pool_entry *arg)
859 kernel_sock_shutdown(qp->sk, SHUT_RDWR); 859 kernel_sock_shutdown(qp->sk, SHUT_RDWR);
860 sock_release(qp->sk); 860 sock_release(qp->sk);
861} 861}
862
863/* called when the last reference to the qp is dropped */
864void rxe_qp_cleanup(struct rxe_pool_entry *arg)
865{
866 struct rxe_qp *qp = container_of(arg, typeof(*qp), pelem);
867
868 execute_in_process_context(rxe_qp_do_cleanup, &qp->cleanup_work);
869}
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index fb8c83e055e1..4c3f899241d4 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -336,7 +336,6 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
336{ 336{
337 union ib_gid dgid; 337 union ib_gid dgid;
338 union ib_gid *pdgid; 338 union ib_gid *pdgid;
339 u16 index;
340 339
341 if (skb->protocol == htons(ETH_P_IP)) { 340 if (skb->protocol == htons(ETH_P_IP)) {
342 ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, 341 ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
@@ -348,7 +347,7 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
348 347
349 return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid, 348 return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid,
350 IB_GID_TYPE_ROCE_UDP_ENCAP, 349 IB_GID_TYPE_ROCE_UDP_ENCAP,
351 1, rxe->ndev, &index); 350 1, rxe->ndev, NULL);
352} 351}
353 352
354/* rxe_rcv is called from the interface driver */ 353/* rxe_rcv is called from the interface driver */
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 26a7f923045b..7bdaf71b8221 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -594,15 +594,8 @@ int rxe_requester(void *arg)
594 rxe_add_ref(qp); 594 rxe_add_ref(qp);
595 595
596next_wqe: 596next_wqe:
597 if (unlikely(!qp->valid)) { 597 if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
598 rxe_drain_req_pkts(qp, true);
599 goto exit; 598 goto exit;
600 }
601
602 if (unlikely(qp->req.state == QP_STATE_ERROR)) {
603 rxe_drain_req_pkts(qp, true);
604 goto exit;
605 }
606 599
607 if (unlikely(qp->req.state == QP_STATE_RESET)) { 600 if (unlikely(qp->req.state == QP_STATE_RESET)) {
608 qp->req.wqe_index = consumer_index(qp->sq.queue); 601 qp->req.wqe_index = consumer_index(qp->sq.queue);
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 4240866a5331..d37bb9b97569 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -863,8 +863,7 @@ static enum resp_states do_complete(struct rxe_qp *qp,
863 863
864 if (pkt->mask & RXE_IMMDT_MASK) { 864 if (pkt->mask & RXE_IMMDT_MASK) {
865 uwc->wc_flags |= IB_WC_WITH_IMM; 865 uwc->wc_flags |= IB_WC_WITH_IMM;
866 uwc->ex.imm_data = 866 uwc->ex.imm_data = immdt_imm(pkt);
867 (__u32 __force)immdt_imm(pkt);
868 } 867 }
869 868
870 if (pkt->mask & RXE_IETH_MASK) { 869 if (pkt->mask & RXE_IETH_MASK) {
@@ -1210,7 +1209,7 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
1210 } 1209 }
1211} 1210}
1212 1211
1213void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify) 1212static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
1214{ 1213{
1215 struct sk_buff *skb; 1214 struct sk_buff *skb;
1216 1215
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index d03002b9d84d..7210a784abb4 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -814,6 +814,8 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr,
814 (queue_count(qp->sq.queue) > 1); 814 (queue_count(qp->sq.queue) > 1);
815 815
816 rxe_run_task(&qp->req.task, must_sched); 816 rxe_run_task(&qp->req.task, must_sched);
817 if (unlikely(qp->req.state == QP_STATE_ERROR))
818 rxe_run_task(&qp->comp.task, 1);
817 819
818 return err; 820 return err;
819} 821}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 0c2dbe45c729..1019f5e7dbdd 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -35,6 +35,7 @@
35#define RXE_VERBS_H 35#define RXE_VERBS_H
36 36
37#include <linux/interrupt.h> 37#include <linux/interrupt.h>
38#include <linux/workqueue.h>
38#include <rdma/rdma_user_rxe.h> 39#include <rdma/rdma_user_rxe.h>
39#include "rxe_pool.h" 40#include "rxe_pool.h"
40#include "rxe_task.h" 41#include "rxe_task.h"
@@ -281,6 +282,8 @@ struct rxe_qp {
281 struct timer_list rnr_nak_timer; 282 struct timer_list rnr_nak_timer;
282 283
283 spinlock_t state_lock; /* guard requester and completer */ 284 spinlock_t state_lock; /* guard requester and completer */
285
286 struct execute_work cleanup_work;
284}; 287};
285 288
286enum rxe_mem_state { 289enum rxe_mem_state {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 71ea9e26666c..962fbcb57dc7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -766,12 +766,14 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
766 skb_orphan(skb); 766 skb_orphan(skb);
767 skb_dst_drop(skb); 767 skb_dst_drop(skb);
768 768
769 if (netif_queue_stopped(dev)) 769 if (netif_queue_stopped(dev)) {
770 if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP | 770 rc = ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
771 IB_CQ_REPORT_MISSED_EVENTS)) { 771 IB_CQ_REPORT_MISSED_EVENTS);
772 if (unlikely(rc < 0))
772 ipoib_warn(priv, "IPoIB/CM:request notify on send CQ failed\n"); 773 ipoib_warn(priv, "IPoIB/CM:request notify on send CQ failed\n");
774 else if (rc)
773 napi_schedule(&priv->send_napi); 775 napi_schedule(&priv->send_napi);
774 } 776 }
775 777
776 rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req); 778 rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req);
777 if (unlikely(rc)) { 779 if (unlikely(rc)) {
@@ -876,7 +878,7 @@ int ipoib_cm_dev_open(struct net_device *dev)
876 878
877 priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev); 879 priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev);
878 if (IS_ERR(priv->cm.id)) { 880 if (IS_ERR(priv->cm.id)) {
879 printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name); 881 pr_warn("%s: failed to create CM ID\n", priv->ca->name);
880 ret = PTR_ERR(priv->cm.id); 882 ret = PTR_ERR(priv->cm.id);
881 goto err_cm; 883 goto err_cm;
882 } 884 }
@@ -884,8 +886,8 @@ int ipoib_cm_dev_open(struct net_device *dev)
884 ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), 886 ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num),
885 0); 887 0);
886 if (ret) { 888 if (ret) {
887 printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name, 889 pr_warn("%s: failed to listen on ID 0x%llx\n", priv->ca->name,
888 IPOIB_CM_IETF_ID | priv->qp->qp_num); 890 IPOIB_CM_IETF_ID | priv->qp->qp_num);
889 goto err_listen; 891 goto err_listen;
890 } 892 }
891 893
@@ -1562,7 +1564,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
1562 priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); 1564 priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
1563 if (IS_ERR(priv->cm.srq)) { 1565 if (IS_ERR(priv->cm.srq)) {
1564 if (PTR_ERR(priv->cm.srq) != -ENOSYS) 1566 if (PTR_ERR(priv->cm.srq) != -ENOSYS)
1565 printk(KERN_WARNING "%s: failed to allocate SRQ, error %ld\n", 1567 pr_warn("%s: failed to allocate SRQ, error %ld\n",
1566 priv->ca->name, PTR_ERR(priv->cm.srq)); 1568 priv->ca->name, PTR_ERR(priv->cm.srq));
1567 priv->cm.srq = NULL; 1569 priv->cm.srq = NULL;
1568 return; 1570 return;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index e6151a29c412..10384ea50bed 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -644,7 +644,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb,
644 644
645 if (netif_queue_stopped(dev)) 645 if (netif_queue_stopped(dev))
646 if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP | 646 if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
647 IB_CQ_REPORT_MISSED_EVENTS)) 647 IB_CQ_REPORT_MISSED_EVENTS) < 0)
648 ipoib_warn(priv, "request notify on send CQ failed\n"); 648 ipoib_warn(priv, "request notify on send CQ failed\n");
649 649
650 rc = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), 650 rc = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
@@ -1085,8 +1085,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
1085 1085
1086 netif_addr_unlock_bh(priv->dev); 1086 netif_addr_unlock_bh(priv->dev);
1087 1087
1088 err = ib_find_gid(priv->ca, &search_gid, IB_GID_TYPE_IB, 1088 err = ib_find_gid(priv->ca, &search_gid, priv->dev, &port, &index);
1089 priv->dev, &port, &index);
1090 1089
1091 netif_addr_lock_bh(priv->dev); 1090 netif_addr_lock_bh(priv->dev);
1092 1091
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 8880351df179..5930c7d9a8fb 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -768,13 +768,30 @@ static void path_rec_completion(int status,
768 if (!status) { 768 if (!status) {
769 struct rdma_ah_attr av; 769 struct rdma_ah_attr av;
770 770
771 if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av)) 771 if (!ib_init_ah_attr_from_path(priv->ca, priv->port,
772 pathrec, &av))
772 ah = ipoib_create_ah(dev, priv->pd, &av); 773 ah = ipoib_create_ah(dev, priv->pd, &av);
773 } 774 }
774 775
775 spin_lock_irqsave(&priv->lock, flags); 776 spin_lock_irqsave(&priv->lock, flags);
776 777
777 if (!IS_ERR_OR_NULL(ah)) { 778 if (!IS_ERR_OR_NULL(ah)) {
779 /*
780 * pathrec.dgid is used as the database key from the LLADDR,
781 * it must remain unchanged even if the SA returns a different
782 * GID to use in the AH.
783 */
784 if (memcmp(pathrec->dgid.raw, path->pathrec.dgid.raw,
785 sizeof(union ib_gid))) {
786 ipoib_dbg(
787 priv,
788 "%s got PathRec for gid %pI6 while asked for %pI6\n",
789 dev->name, pathrec->dgid.raw,
790 path->pathrec.dgid.raw);
791 memcpy(pathrec->dgid.raw, path->pathrec.dgid.raw,
792 sizeof(union ib_gid));
793 }
794
778 path->pathrec = *pathrec; 795 path->pathrec = *pathrec;
779 796
780 old_ah = path->ah; 797 old_ah = path->ah;
@@ -840,6 +857,23 @@ static void path_rec_completion(int status,
840 } 857 }
841} 858}
842 859
860static void init_path_rec(struct ipoib_dev_priv *priv, struct ipoib_path *path,
861 void *gid)
862{
863 path->dev = priv->dev;
864
865 if (rdma_cap_opa_ah(priv->ca, priv->port))
866 path->pathrec.rec_type = SA_PATH_REC_TYPE_OPA;
867 else
868 path->pathrec.rec_type = SA_PATH_REC_TYPE_IB;
869
870 memcpy(path->pathrec.dgid.raw, gid, sizeof(union ib_gid));
871 path->pathrec.sgid = priv->local_gid;
872 path->pathrec.pkey = cpu_to_be16(priv->pkey);
873 path->pathrec.numb_path = 1;
874 path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class;
875}
876
843static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid) 877static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
844{ 878{
845 struct ipoib_dev_priv *priv = ipoib_priv(dev); 879 struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -852,21 +886,11 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
852 if (!path) 886 if (!path)
853 return NULL; 887 return NULL;
854 888
855 path->dev = dev;
856
857 skb_queue_head_init(&path->queue); 889 skb_queue_head_init(&path->queue);
858 890
859 INIT_LIST_HEAD(&path->neigh_list); 891 INIT_LIST_HEAD(&path->neigh_list);
860 892
861 if (rdma_cap_opa_ah(priv->ca, priv->port)) 893 init_path_rec(priv, path, gid);
862 path->pathrec.rec_type = SA_PATH_REC_TYPE_OPA;
863 else
864 path->pathrec.rec_type = SA_PATH_REC_TYPE_IB;
865 memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid));
866 path->pathrec.sgid = priv->local_gid;
867 path->pathrec.pkey = cpu_to_be16(priv->pkey);
868 path->pathrec.numb_path = 1;
869 path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class;
870 894
871 return path; 895 return path;
872} 896}
@@ -1005,6 +1029,10 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
1005 1029
1006 spin_lock_irqsave(&priv->lock, flags); 1030 spin_lock_irqsave(&priv->lock, flags);
1007 1031
1032 /* no broadcast means that all paths are (going to be) not valid */
1033 if (!priv->broadcast)
1034 goto drop_and_unlock;
1035
1008 path = __path_find(dev, phdr->hwaddr + 4); 1036 path = __path_find(dev, phdr->hwaddr + 4);
1009 if (!path || !path->valid) { 1037 if (!path || !path->valid) {
1010 int new_path = 0; 1038 int new_path = 0;
@@ -1014,6 +1042,10 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
1014 new_path = 1; 1042 new_path = 1;
1015 } 1043 }
1016 if (path) { 1044 if (path) {
1045 if (!new_path)
1046 /* make sure there is no changes in the existing path record */
1047 init_path_rec(priv, path, phdr->hwaddr + 4);
1048
1017 if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { 1049 if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
1018 push_pseudo_header(skb, phdr->hwaddr); 1050 push_pseudo_header(skb, phdr->hwaddr);
1019 __skb_queue_tail(&path->queue, skb); 1051 __skb_queue_tail(&path->queue, skb);
@@ -1030,8 +1062,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
1030 } else 1062 } else
1031 __path_add(dev, path); 1063 __path_add(dev, path);
1032 } else { 1064 } else {
1033 ++dev->stats.tx_dropped; 1065 goto drop_and_unlock;
1034 dev_kfree_skb_any(skb);
1035 } 1066 }
1036 1067
1037 spin_unlock_irqrestore(&priv->lock, flags); 1068 spin_unlock_irqrestore(&priv->lock, flags);
@@ -1051,11 +1082,16 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
1051 push_pseudo_header(skb, phdr->hwaddr); 1082 push_pseudo_header(skb, phdr->hwaddr);
1052 __skb_queue_tail(&path->queue, skb); 1083 __skb_queue_tail(&path->queue, skb);
1053 } else { 1084 } else {
1054 ++dev->stats.tx_dropped; 1085 goto drop_and_unlock;
1055 dev_kfree_skb_any(skb);
1056 } 1086 }
1057 1087
1058 spin_unlock_irqrestore(&priv->lock, flags); 1088 spin_unlock_irqrestore(&priv->lock, flags);
1089 return;
1090
1091drop_and_unlock:
1092 ++dev->stats.tx_dropped;
1093 dev_kfree_skb_any(skb);
1094 spin_unlock_irqrestore(&priv->lock, flags);
1059} 1095}
1060 1096
1061static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) 1097static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -1674,8 +1710,8 @@ static int ipoib_dev_init_default(struct net_device *dev)
1674 1710
1675 priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); 1711 priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
1676 if (!priv->tx_ring) { 1712 if (!priv->tx_ring) {
1677 printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n", 1713 pr_warn("%s: failed to allocate TX ring (%d entries)\n",
1678 priv->ca->name, ipoib_sendq_size); 1714 priv->ca->name, ipoib_sendq_size);
1679 goto out_rx_ring_cleanup; 1715 goto out_rx_ring_cleanup;
1680 } 1716 }
1681 1717
@@ -2207,16 +2243,17 @@ static struct net_device *ipoib_add_port(const char *format,
2207 int result = -ENOMEM; 2243 int result = -ENOMEM;
2208 2244
2209 priv = ipoib_intf_alloc(hca, port, format); 2245 priv = ipoib_intf_alloc(hca, port, format);
2210 if (!priv) 2246 if (!priv) {
2247 pr_warn("%s, %d: ipoib_intf_alloc failed\n", hca->name, port);
2211 goto alloc_mem_failed; 2248 goto alloc_mem_failed;
2249 }
2212 2250
2213 SET_NETDEV_DEV(priv->dev, hca->dev.parent); 2251 SET_NETDEV_DEV(priv->dev, hca->dev.parent);
2214 priv->dev->dev_id = port - 1; 2252 priv->dev->dev_id = port - 1;
2215 2253
2216 result = ib_query_port(hca, port, &attr); 2254 result = ib_query_port(hca, port, &attr);
2217 if (result) { 2255 if (result) {
2218 printk(KERN_WARNING "%s: ib_query_port %d failed\n", 2256 pr_warn("%s: ib_query_port %d failed\n", hca->name, port);
2219 hca->name, port);
2220 goto device_init_failed; 2257 goto device_init_failed;
2221 } 2258 }
2222 2259
@@ -2231,8 +2268,8 @@ static struct net_device *ipoib_add_port(const char *format,
2231 2268
2232 result = ib_query_pkey(hca, port, 0, &priv->pkey); 2269 result = ib_query_pkey(hca, port, 0, &priv->pkey);
2233 if (result) { 2270 if (result) {
2234 printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", 2271 pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n",
2235 hca->name, port, result); 2272 hca->name, port, result);
2236 goto device_init_failed; 2273 goto device_init_failed;
2237 } 2274 }
2238 2275
@@ -2249,8 +2286,8 @@ static struct net_device *ipoib_add_port(const char *format,
2249 2286
2250 result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL); 2287 result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL);
2251 if (result) { 2288 if (result) {
2252 printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", 2289 pr_warn("%s: ib_query_gid port %d failed (ret = %d)\n",
2253 hca->name, port, result); 2290 hca->name, port, result);
2254 goto device_init_failed; 2291 goto device_init_failed;
2255 } 2292 }
2256 2293
@@ -2260,8 +2297,8 @@ static struct net_device *ipoib_add_port(const char *format,
2260 2297
2261 result = ipoib_dev_init(priv->dev, hca, port); 2298 result = ipoib_dev_init(priv->dev, hca, port);
2262 if (result) { 2299 if (result) {
2263 printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n", 2300 pr_warn("%s: failed to initialize port %d (ret = %d)\n",
2264 hca->name, port, result); 2301 hca->name, port, result);
2265 goto device_init_failed; 2302 goto device_init_failed;
2266 } 2303 }
2267 2304
@@ -2271,8 +2308,8 @@ static struct net_device *ipoib_add_port(const char *format,
2271 2308
2272 result = register_netdev(priv->dev); 2309 result = register_netdev(priv->dev);
2273 if (result) { 2310 if (result) {
2274 printk(KERN_WARNING "%s: couldn't register ipoib port %d; error %d\n", 2311 pr_warn("%s: couldn't register ipoib port %d; error %d\n",
2275 hca->name, port, result); 2312 hca->name, port, result);
2276 goto register_failed; 2313 goto register_failed;
2277 } 2314 }
2278 2315
@@ -2337,8 +2374,7 @@ static void ipoib_add_one(struct ib_device *device)
2337 } 2374 }
2338 2375
2339 if (!count) { 2376 if (!count) {
2340 pr_err("Failed to init port, removing it\n"); 2377 kfree(dev_list);
2341 ipoib_remove_one(device, dev_list);
2342 return; 2378 return;
2343 } 2379 }
2344 2380
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index a1ed25422b72..984a88096f39 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -178,7 +178,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
178 priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_rx_completion, NULL, 178 priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_rx_completion, NULL,
179 priv, &cq_attr); 179 priv, &cq_attr);
180 if (IS_ERR(priv->recv_cq)) { 180 if (IS_ERR(priv->recv_cq)) {
181 printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name); 181 pr_warn("%s: failed to create receive CQ\n", ca->name);
182 goto out_cm_dev_cleanup; 182 goto out_cm_dev_cleanup;
183 } 183 }
184 184
@@ -187,7 +187,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
187 priv->send_cq = ib_create_cq(priv->ca, ipoib_ib_tx_completion, NULL, 187 priv->send_cq = ib_create_cq(priv->ca, ipoib_ib_tx_completion, NULL,
188 priv, &cq_attr); 188 priv, &cq_attr);
189 if (IS_ERR(priv->send_cq)) { 189 if (IS_ERR(priv->send_cq)) {
190 printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name); 190 pr_warn("%s: failed to create send CQ\n", ca->name);
191 goto out_free_recv_cq; 191 goto out_free_recv_cq;
192 } 192 }
193 193
@@ -208,7 +208,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
208 208
209 priv->qp = ib_create_qp(priv->pd, &init_attr); 209 priv->qp = ib_create_qp(priv->pd, &init_attr);
210 if (IS_ERR(priv->qp)) { 210 if (IS_ERR(priv->qp)) {
211 printk(KERN_WARNING "%s: failed to create QP\n", ca->name); 211 pr_warn("%s: failed to create QP\n", ca->name);
212 goto out_free_send_cq; 212 goto out_free_send_cq;
213 } 213 }
214 214
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 2a07692007bd..df49c4eb67f7 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -142,8 +142,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
142 hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz); 142 hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz);
143 } 143 }
144 144
145 iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X " 145 iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X VA:%#llX + unsol:%d\n",
146 "VA:%#llX + unsol:%d\n",
147 task->itt, mem_reg->rkey, 146 task->itt, mem_reg->rkey,
148 (unsigned long long)mem_reg->sge.addr, unsol_sz); 147 (unsigned long long)mem_reg->sge.addr, unsol_sz);
149 } 148 }
@@ -436,7 +435,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
436{ 435{
437 struct iser_conn *iser_conn = conn->dd_data; 436 struct iser_conn *iser_conn = conn->dd_data;
438 struct iscsi_iser_task *iser_task = task->dd_data; 437 struct iscsi_iser_task *iser_task = task->dd_data;
439 struct iser_tx_desc *tx_desc = NULL; 438 struct iser_tx_desc *tx_desc;
440 struct iser_mem_reg *mem_reg; 439 struct iser_mem_reg *mem_reg;
441 unsigned long buf_offset; 440 unsigned long buf_offset;
442 unsigned long data_seg_len; 441 unsigned long data_seg_len;
@@ -452,10 +451,8 @@ int iser_send_data_out(struct iscsi_conn *conn,
452 __func__,(int)itt,(int)data_seg_len,(int)buf_offset); 451 __func__,(int)itt,(int)data_seg_len,(int)buf_offset);
453 452
454 tx_desc = kmem_cache_zalloc(ig.desc_cache, GFP_ATOMIC); 453 tx_desc = kmem_cache_zalloc(ig.desc_cache, GFP_ATOMIC);
455 if (tx_desc == NULL) { 454 if (!tx_desc)
456 iser_err("Failed to alloc desc for post dataout\n");
457 return -ENOMEM; 455 return -ENOMEM;
458 }
459 456
460 tx_desc->type = ISCSI_TX_DATAOUT; 457 tx_desc->type = ISCSI_TX_DATAOUT;
461 tx_desc->cqe.done = iser_dataout_comp; 458 tx_desc->cqe.done = iser_dataout_comp;
@@ -475,8 +472,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
475 tx_desc->num_sge = 2; 472 tx_desc->num_sge = 2;
476 473
477 if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) { 474 if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) {
478 iser_err("Offset:%ld & DSL:%ld in Data-Out " 475 iser_err("Offset:%ld & DSL:%ld in Data-Out inconsistent with total len:%ld, itt:%d\n",
479 "inconsistent with total len:%ld, itt:%d\n",
480 buf_offset, data_seg_len, 476 buf_offset, data_seg_len,
481 iser_task->data[ISER_DIR_OUT].data_len, itt); 477 iser_task->data[ISER_DIR_OUT].data_len, itt);
482 err = -EINVAL; 478 err = -EINVAL;
@@ -614,8 +610,8 @@ iser_check_remote_inv(struct iser_conn *iser_conn,
614 iser_conn, rkey); 610 iser_conn, rkey);
615 611
616 if (unlikely(!iser_conn->snd_w_inv)) { 612 if (unlikely(!iser_conn->snd_w_inv)) {
617 iser_err("conn %p: unexpected remote invalidation, " 613 iser_err("conn %p: unexpected remote invalidation, terminating connection\n",
618 "terminating connection\n", iser_conn); 614 iser_conn);
619 return -EPROTO; 615 return -EPROTO;
620 } 616 }
621 617
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 1b02283ce20e..fff40b097947 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -2124,6 +2124,9 @@ isert_rdma_rw_ctx_post(struct isert_cmd *cmd, struct isert_conn *conn,
2124 u32 rkey, offset; 2124 u32 rkey, offset;
2125 int ret; 2125 int ret;
2126 2126
2127 if (cmd->ctx_init_done)
2128 goto rdma_ctx_post;
2129
2127 if (dir == DMA_FROM_DEVICE) { 2130 if (dir == DMA_FROM_DEVICE) {
2128 addr = cmd->write_va; 2131 addr = cmd->write_va;
2129 rkey = cmd->write_stag; 2132 rkey = cmd->write_stag;
@@ -2151,11 +2154,15 @@ isert_rdma_rw_ctx_post(struct isert_cmd *cmd, struct isert_conn *conn,
2151 se_cmd->t_data_sg, se_cmd->t_data_nents, 2154 se_cmd->t_data_sg, se_cmd->t_data_nents,
2152 offset, addr, rkey, dir); 2155 offset, addr, rkey, dir);
2153 } 2156 }
2157
2154 if (ret < 0) { 2158 if (ret < 0) {
2155 isert_err("Cmd: %p failed to prepare RDMA res\n", cmd); 2159 isert_err("Cmd: %p failed to prepare RDMA res\n", cmd);
2156 return ret; 2160 return ret;
2157 } 2161 }
2158 2162
2163 cmd->ctx_init_done = true;
2164
2165rdma_ctx_post:
2159 ret = rdma_rw_ctx_post(&cmd->rw, conn->qp, port_num, cqe, chain_wr); 2166 ret = rdma_rw_ctx_post(&cmd->rw, conn->qp, port_num, cqe, chain_wr);
2160 if (ret < 0) 2167 if (ret < 0)
2161 isert_err("Cmd: %p failed to post RDMA res\n", cmd); 2168 isert_err("Cmd: %p failed to post RDMA res\n", cmd);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index d6fd248320ae..3b296bac4f60 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -126,6 +126,7 @@ struct isert_cmd {
126 struct rdma_rw_ctx rw; 126 struct rdma_rw_ctx rw;
127 struct work_struct comp_work; 127 struct work_struct comp_work;
128 struct scatterlist sg; 128 struct scatterlist sg;
129 bool ctx_init_done;
129}; 130};
130 131
131static inline struct isert_cmd *tx_desc_to_cmd(struct iser_tx_desc *desc) 132static inline struct isert_cmd *tx_desc_to_cmd(struct iser_tx_desc *desc)
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
index 4b615c1451e7..15711dcc6f58 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
@@ -710,7 +710,7 @@ vema_get_port(struct opa_vnic_ctrl_port *cport, u8 port_num)
710 710
711/** 711/**
712 * opa_vnic_vema_send_trap -- This function sends a trap to the EM 712 * opa_vnic_vema_send_trap -- This function sends a trap to the EM
713 * @cport: pointer to vnic control port 713 * @adapter: pointer to vnic adapter
714 * @data: pointer to trap data filled by calling function 714 * @data: pointer to trap data filled by calling function
715 * @lid: issuers lid (encap_slid from vesw_port_info) 715 * @lid: issuers lid (encap_slid from vesw_port_info)
716 * 716 *
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 972d4b3c5223..b48843833d69 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -41,6 +41,7 @@
41#include <linux/random.h> 41#include <linux/random.h>
42#include <linux/jiffies.h> 42#include <linux/jiffies.h>
43#include <linux/lockdep.h> 43#include <linux/lockdep.h>
44#include <linux/inet.h>
44#include <rdma/ib_cache.h> 45#include <rdma/ib_cache.h>
45 46
46#include <linux/atomic.h> 47#include <linux/atomic.h>
@@ -144,7 +145,9 @@ static void srp_remove_one(struct ib_device *device, void *client_data);
144static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc); 145static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
145static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, 146static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
146 const char *opname); 147 const char *opname);
147static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); 148static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
149static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
150 struct rdma_cm_event *event);
148 151
149static struct scsi_transport_template *ib_srp_transport_template; 152static struct scsi_transport_template *ib_srp_transport_template;
150static struct workqueue_struct *srp_remove_wq; 153static struct workqueue_struct *srp_remove_wq;
@@ -265,8 +268,8 @@ static void srp_qp_event(struct ib_event *event, void *context)
265 ib_event_msg(event->event), event->event); 268 ib_event_msg(event->event), event->event);
266} 269}
267 270
268static int srp_init_qp(struct srp_target_port *target, 271static int srp_init_ib_qp(struct srp_target_port *target,
269 struct ib_qp *qp) 272 struct ib_qp *qp)
270{ 273{
271 struct ib_qp_attr *attr; 274 struct ib_qp_attr *attr;
272 int ret; 275 int ret;
@@ -277,7 +280,7 @@ static int srp_init_qp(struct srp_target_port *target,
277 280
278 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev, 281 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
279 target->srp_host->port, 282 target->srp_host->port,
280 be16_to_cpu(target->pkey), 283 be16_to_cpu(target->ib_cm.pkey),
281 &attr->pkey_index); 284 &attr->pkey_index);
282 if (ret) 285 if (ret)
283 goto out; 286 goto out;
@@ -298,32 +301,110 @@ out:
298 return ret; 301 return ret;
299} 302}
300 303
301static int srp_new_cm_id(struct srp_rdma_ch *ch) 304static int srp_new_ib_cm_id(struct srp_rdma_ch *ch)
302{ 305{
303 struct srp_target_port *target = ch->target; 306 struct srp_target_port *target = ch->target;
304 struct ib_cm_id *new_cm_id; 307 struct ib_cm_id *new_cm_id;
305 308
306 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev, 309 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
307 srp_cm_handler, ch); 310 srp_ib_cm_handler, ch);
308 if (IS_ERR(new_cm_id)) 311 if (IS_ERR(new_cm_id))
309 return PTR_ERR(new_cm_id); 312 return PTR_ERR(new_cm_id);
310 313
311 if (ch->cm_id) 314 if (ch->ib_cm.cm_id)
312 ib_destroy_cm_id(ch->cm_id); 315 ib_destroy_cm_id(ch->ib_cm.cm_id);
313 ch->cm_id = new_cm_id; 316 ch->ib_cm.cm_id = new_cm_id;
314 if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev, 317 if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
315 target->srp_host->port)) 318 target->srp_host->port))
316 ch->path.rec_type = SA_PATH_REC_TYPE_OPA; 319 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_OPA;
317 else 320 else
318 ch->path.rec_type = SA_PATH_REC_TYPE_IB; 321 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_IB;
319 ch->path.sgid = target->sgid; 322 ch->ib_cm.path.sgid = target->sgid;
320 ch->path.dgid = target->orig_dgid; 323 ch->ib_cm.path.dgid = target->ib_cm.orig_dgid;
321 ch->path.pkey = target->pkey; 324 ch->ib_cm.path.pkey = target->ib_cm.pkey;
322 ch->path.service_id = target->service_id; 325 ch->ib_cm.path.service_id = target->ib_cm.service_id;
323 326
324 return 0; 327 return 0;
325} 328}
326 329
330static const char *inet_ntop(const void *sa, char *dst, unsigned int size)
331{
332 switch (((struct sockaddr *)sa)->sa_family) {
333 case AF_INET:
334 snprintf(dst, size, "%pI4",
335 &((struct sockaddr_in *)sa)->sin_addr);
336 break;
337 case AF_INET6:
338 snprintf(dst, size, "%pI6",
339 &((struct sockaddr_in6 *)sa)->sin6_addr);
340 break;
341 default:
342 snprintf(dst, size, "???");
343 break;
344 }
345 return dst;
346}
347
348static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
349{
350 struct srp_target_port *target = ch->target;
351 struct rdma_cm_id *new_cm_id;
352 char src_addr[64], dst_addr[64];
353 int ret;
354
355 new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch,
356 RDMA_PS_TCP, IB_QPT_RC);
357 if (IS_ERR(new_cm_id)) {
358 ret = PTR_ERR(new_cm_id);
359 new_cm_id = NULL;
360 goto out;
361 }
362
363 init_completion(&ch->done);
364 ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ?
365 (struct sockaddr *)&target->rdma_cm.src : NULL,
366 (struct sockaddr *)&target->rdma_cm.dst,
367 SRP_PATH_REC_TIMEOUT_MS);
368 if (ret) {
369 pr_err("No route available from %s to %s (%d)\n",
370 target->rdma_cm.src_specified ?
371 inet_ntop(&target->rdma_cm.src, src_addr,
372 sizeof(src_addr)) : "(any)",
373 inet_ntop(&target->rdma_cm.dst, dst_addr,
374 sizeof(dst_addr)),
375 ret);
376 goto out;
377 }
378 ret = wait_for_completion_interruptible(&ch->done);
379 if (ret < 0)
380 goto out;
381
382 ret = ch->status;
383 if (ret) {
384 pr_err("Resolving address %s failed (%d)\n",
385 inet_ntop(&target->rdma_cm.dst, dst_addr,
386 sizeof(dst_addr)),
387 ret);
388 goto out;
389 }
390
391 swap(ch->rdma_cm.cm_id, new_cm_id);
392
393out:
394 if (new_cm_id)
395 rdma_destroy_id(new_cm_id);
396
397 return ret;
398}
399
400static int srp_new_cm_id(struct srp_rdma_ch *ch)
401{
402 struct srp_target_port *target = ch->target;
403
404 return target->using_rdma_cm ? srp_new_rdma_cm_id(ch) :
405 srp_new_ib_cm_id(ch);
406}
407
327static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target) 408static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
328{ 409{
329 struct srp_device *dev = target->srp_host->srp_dev; 410 struct srp_device *dev = target->srp_host->srp_dev;
@@ -521,16 +602,25 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
521 init_attr->send_cq = send_cq; 602 init_attr->send_cq = send_cq;
522 init_attr->recv_cq = recv_cq; 603 init_attr->recv_cq = recv_cq;
523 604
524 qp = ib_create_qp(dev->pd, init_attr); 605 if (target->using_rdma_cm) {
525 if (IS_ERR(qp)) { 606 ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr);
526 ret = PTR_ERR(qp); 607 qp = ch->rdma_cm.cm_id->qp;
608 } else {
609 qp = ib_create_qp(dev->pd, init_attr);
610 if (!IS_ERR(qp)) {
611 ret = srp_init_ib_qp(target, qp);
612 if (ret)
613 ib_destroy_qp(qp);
614 } else {
615 ret = PTR_ERR(qp);
616 }
617 }
618 if (ret) {
619 pr_err("QP creation failed for dev %s: %d\n",
620 dev_name(&dev->dev->dev), ret);
527 goto err_send_cq; 621 goto err_send_cq;
528 } 622 }
529 623
530 ret = srp_init_qp(target, qp);
531 if (ret)
532 goto err_qp;
533
534 if (dev->use_fast_reg) { 624 if (dev->use_fast_reg) {
535 fr_pool = srp_alloc_fr_pool(target); 625 fr_pool = srp_alloc_fr_pool(target);
536 if (IS_ERR(fr_pool)) { 626 if (IS_ERR(fr_pool)) {
@@ -574,7 +664,10 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
574 return 0; 664 return 0;
575 665
576err_qp: 666err_qp:
577 ib_destroy_qp(qp); 667 if (target->using_rdma_cm)
668 rdma_destroy_qp(ch->rdma_cm.cm_id);
669 else
670 ib_destroy_qp(qp);
578 671
579err_send_cq: 672err_send_cq:
580 ib_free_cq(send_cq); 673 ib_free_cq(send_cq);
@@ -600,9 +693,16 @@ static void srp_free_ch_ib(struct srp_target_port *target,
600 if (!ch->target) 693 if (!ch->target)
601 return; 694 return;
602 695
603 if (ch->cm_id) { 696 if (target->using_rdma_cm) {
604 ib_destroy_cm_id(ch->cm_id); 697 if (ch->rdma_cm.cm_id) {
605 ch->cm_id = NULL; 698 rdma_destroy_id(ch->rdma_cm.cm_id);
699 ch->rdma_cm.cm_id = NULL;
700 }
701 } else {
702 if (ch->ib_cm.cm_id) {
703 ib_destroy_cm_id(ch->ib_cm.cm_id);
704 ch->ib_cm.cm_id = NULL;
705 }
606 } 706 }
607 707
608 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */ 708 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
@@ -658,16 +758,16 @@ static void srp_path_rec_completion(int status,
658 shost_printk(KERN_ERR, target->scsi_host, 758 shost_printk(KERN_ERR, target->scsi_host,
659 PFX "Got failed path rec status %d\n", status); 759 PFX "Got failed path rec status %d\n", status);
660 else 760 else
661 ch->path = *pathrec; 761 ch->ib_cm.path = *pathrec;
662 complete(&ch->done); 762 complete(&ch->done);
663} 763}
664 764
665static int srp_lookup_path(struct srp_rdma_ch *ch) 765static int srp_ib_lookup_path(struct srp_rdma_ch *ch)
666{ 766{
667 struct srp_target_port *target = ch->target; 767 struct srp_target_port *target = ch->target;
668 int ret = -ENODEV; 768 int ret = -ENODEV;
669 769
670 ch->path.numb_path = 1; 770 ch->ib_cm.path.numb_path = 1;
671 771
672 init_completion(&ch->done); 772 init_completion(&ch->done);
673 773
@@ -678,10 +778,10 @@ static int srp_lookup_path(struct srp_rdma_ch *ch)
678 if (!scsi_host_get(target->scsi_host)) 778 if (!scsi_host_get(target->scsi_host))
679 goto out; 779 goto out;
680 780
681 ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client, 781 ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client,
682 target->srp_host->srp_dev->dev, 782 target->srp_host->srp_dev->dev,
683 target->srp_host->port, 783 target->srp_host->port,
684 &ch->path, 784 &ch->ib_cm.path,
685 IB_SA_PATH_REC_SERVICE_ID | 785 IB_SA_PATH_REC_SERVICE_ID |
686 IB_SA_PATH_REC_DGID | 786 IB_SA_PATH_REC_DGID |
687 IB_SA_PATH_REC_SGID | 787 IB_SA_PATH_REC_SGID |
@@ -690,8 +790,8 @@ static int srp_lookup_path(struct srp_rdma_ch *ch)
690 SRP_PATH_REC_TIMEOUT_MS, 790 SRP_PATH_REC_TIMEOUT_MS,
691 GFP_KERNEL, 791 GFP_KERNEL,
692 srp_path_rec_completion, 792 srp_path_rec_completion,
693 ch, &ch->path_query); 793 ch, &ch->ib_cm.path_query);
694 ret = ch->path_query_id; 794 ret = ch->ib_cm.path_query_id;
695 if (ret < 0) 795 if (ret < 0)
696 goto put; 796 goto put;
697 797
@@ -702,7 +802,10 @@ static int srp_lookup_path(struct srp_rdma_ch *ch)
702 ret = ch->status; 802 ret = ch->status;
703 if (ret < 0) 803 if (ret < 0)
704 shost_printk(KERN_WARNING, target->scsi_host, 804 shost_printk(KERN_WARNING, target->scsi_host,
705 PFX "Path record query failed\n"); 805 PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n",
806 ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw,
807 be16_to_cpu(target->ib_cm.pkey),
808 be64_to_cpu(target->ib_cm.service_id));
706 809
707put: 810put:
708 scsi_host_put(target->scsi_host); 811 scsi_host_put(target->scsi_host);
@@ -711,6 +814,34 @@ out:
711 return ret; 814 return ret;
712} 815}
713 816
817static int srp_rdma_lookup_path(struct srp_rdma_ch *ch)
818{
819 struct srp_target_port *target = ch->target;
820 int ret;
821
822 init_completion(&ch->done);
823
824 ret = rdma_resolve_route(ch->rdma_cm.cm_id, SRP_PATH_REC_TIMEOUT_MS);
825 if (ret)
826 return ret;
827
828 wait_for_completion_interruptible(&ch->done);
829
830 if (ch->status != 0)
831 shost_printk(KERN_WARNING, target->scsi_host,
832 PFX "Path resolution failed\n");
833
834 return ch->status;
835}
836
837static int srp_lookup_path(struct srp_rdma_ch *ch)
838{
839 struct srp_target_port *target = ch->target;
840
841 return target->using_rdma_cm ? srp_rdma_lookup_path(ch) :
842 srp_ib_lookup_path(ch);
843}
844
714static u8 srp_get_subnet_timeout(struct srp_host *host) 845static u8 srp_get_subnet_timeout(struct srp_host *host)
715{ 846{
716 struct ib_port_attr attr; 847 struct ib_port_attr attr;
@@ -732,48 +863,76 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
732{ 863{
733 struct srp_target_port *target = ch->target; 864 struct srp_target_port *target = ch->target;
734 struct { 865 struct {
735 struct ib_cm_req_param param; 866 struct rdma_conn_param rdma_param;
736 struct srp_login_req priv; 867 struct srp_login_req_rdma rdma_req;
868 struct ib_cm_req_param ib_param;
869 struct srp_login_req ib_req;
737 } *req = NULL; 870 } *req = NULL;
871 char *ipi, *tpi;
738 int status; 872 int status;
739 u8 subnet_timeout;
740
741 subnet_timeout = srp_get_subnet_timeout(target->srp_host);
742 873
743 req = kzalloc(sizeof *req, GFP_KERNEL); 874 req = kzalloc(sizeof *req, GFP_KERNEL);
744 if (!req) 875 if (!req)
745 return -ENOMEM; 876 return -ENOMEM;
746 877
747 req->param.primary_path = &ch->path; 878 req->ib_param.flow_control = 1;
748 req->param.alternate_path = NULL; 879 req->ib_param.retry_count = target->tl_retry_count;
749 req->param.service_id = target->service_id;
750 req->param.qp_num = ch->qp->qp_num;
751 req->param.qp_type = ch->qp->qp_type;
752 req->param.private_data = &req->priv;
753 req->param.private_data_len = sizeof req->priv;
754 req->param.flow_control = 1;
755
756 get_random_bytes(&req->param.starting_psn, 4);
757 req->param.starting_psn &= 0xffffff;
758 880
759 /* 881 /*
760 * Pick some arbitrary defaults here; we could make these 882 * Pick some arbitrary defaults here; we could make these
761 * module parameters if anyone cared about setting them. 883 * module parameters if anyone cared about setting them.
762 */ 884 */
763 req->param.responder_resources = 4; 885 req->ib_param.responder_resources = 4;
764 req->param.remote_cm_response_timeout = subnet_timeout + 2; 886 req->ib_param.rnr_retry_count = 7;
765 req->param.local_cm_response_timeout = subnet_timeout + 2; 887 req->ib_param.max_cm_retries = 15;
766 req->param.retry_count = target->tl_retry_count; 888
767 req->param.rnr_retry_count = 7; 889 req->ib_req.opcode = SRP_LOGIN_REQ;
768 req->param.max_cm_retries = 15; 890 req->ib_req.tag = 0;
769 891 req->ib_req.req_it_iu_len = cpu_to_be32(target->max_iu_len);
770 req->priv.opcode = SRP_LOGIN_REQ; 892 req->ib_req.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
771 req->priv.tag = 0;
772 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
773 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
774 SRP_BUF_FORMAT_INDIRECT); 893 SRP_BUF_FORMAT_INDIRECT);
775 req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI : 894 req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI :
776 SRP_MULTICHAN_SINGLE); 895 SRP_MULTICHAN_SINGLE);
896
897 if (target->using_rdma_cm) {
898 req->rdma_param.flow_control = req->ib_param.flow_control;
899 req->rdma_param.responder_resources =
900 req->ib_param.responder_resources;
901 req->rdma_param.initiator_depth = req->ib_param.initiator_depth;
902 req->rdma_param.retry_count = req->ib_param.retry_count;
903 req->rdma_param.rnr_retry_count = req->ib_param.rnr_retry_count;
904 req->rdma_param.private_data = &req->rdma_req;
905 req->rdma_param.private_data_len = sizeof(req->rdma_req);
906
907 req->rdma_req.opcode = req->ib_req.opcode;
908 req->rdma_req.tag = req->ib_req.tag;
909 req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len;
910 req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt;
911 req->rdma_req.req_flags = req->ib_req.req_flags;
912
913 ipi = req->rdma_req.initiator_port_id;
914 tpi = req->rdma_req.target_port_id;
915 } else {
916 u8 subnet_timeout;
917
918 subnet_timeout = srp_get_subnet_timeout(target->srp_host);
919
920 req->ib_param.primary_path = &ch->ib_cm.path;
921 req->ib_param.alternate_path = NULL;
922 req->ib_param.service_id = target->ib_cm.service_id;
923 get_random_bytes(&req->ib_param.starting_psn, 4);
924 req->ib_param.starting_psn &= 0xffffff;
925 req->ib_param.qp_num = ch->qp->qp_num;
926 req->ib_param.qp_type = ch->qp->qp_type;
927 req->ib_param.local_cm_response_timeout = subnet_timeout + 2;
928 req->ib_param.remote_cm_response_timeout = subnet_timeout + 2;
929 req->ib_param.private_data = &req->ib_req;
930 req->ib_param.private_data_len = sizeof(req->ib_req);
931
932 ipi = req->ib_req.initiator_port_id;
933 tpi = req->ib_req.target_port_id;
934 }
935
777 /* 936 /*
778 * In the published SRP specification (draft rev. 16a), the 937 * In the published SRP specification (draft rev. 16a), the
779 * port identifier format is 8 bytes of ID extension followed 938 * port identifier format is 8 bytes of ID extension followed
@@ -784,19 +943,15 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
784 * recognized by the I/O Class they report. 943 * recognized by the I/O Class they report.
785 */ 944 */
786 if (target->io_class == SRP_REV10_IB_IO_CLASS) { 945 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
787 memcpy(req->priv.initiator_port_id, 946 memcpy(ipi, &target->sgid.global.interface_id, 8);
788 &target->sgid.global.interface_id, 8); 947 memcpy(ipi + 8, &target->initiator_ext, 8);
789 memcpy(req->priv.initiator_port_id + 8, 948 memcpy(tpi, &target->ioc_guid, 8);
790 &target->initiator_ext, 8); 949 memcpy(tpi + 8, &target->id_ext, 8);
791 memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
792 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
793 } else { 950 } else {
794 memcpy(req->priv.initiator_port_id, 951 memcpy(ipi, &target->initiator_ext, 8);
795 &target->initiator_ext, 8); 952 memcpy(ipi + 8, &target->sgid.global.interface_id, 8);
796 memcpy(req->priv.initiator_port_id + 8, 953 memcpy(tpi, &target->id_ext, 8);
797 &target->sgid.global.interface_id, 8); 954 memcpy(tpi + 8, &target->ioc_guid, 8);
798 memcpy(req->priv.target_port_id, &target->id_ext, 8);
799 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
800 } 955 }
801 956
802 /* 957 /*
@@ -809,12 +964,14 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
809 PFX "Topspin/Cisco initiator port ID workaround " 964 PFX "Topspin/Cisco initiator port ID workaround "
810 "activated for target GUID %016llx\n", 965 "activated for target GUID %016llx\n",
811 be64_to_cpu(target->ioc_guid)); 966 be64_to_cpu(target->ioc_guid));
812 memset(req->priv.initiator_port_id, 0, 8); 967 memset(ipi, 0, 8);
813 memcpy(req->priv.initiator_port_id + 8, 968 memcpy(ipi + 8, &target->srp_host->srp_dev->dev->node_guid, 8);
814 &target->srp_host->srp_dev->dev->node_guid, 8);
815 } 969 }
816 970
817 status = ib_send_cm_req(ch->cm_id, &req->param); 971 if (target->using_rdma_cm)
972 status = rdma_connect(ch->rdma_cm.cm_id, &req->rdma_param);
973 else
974 status = ib_send_cm_req(ch->ib_cm.cm_id, &req->ib_param);
818 975
819 kfree(req); 976 kfree(req);
820 977
@@ -841,14 +998,23 @@ static bool srp_queue_remove_work(struct srp_target_port *target)
841static void srp_disconnect_target(struct srp_target_port *target) 998static void srp_disconnect_target(struct srp_target_port *target)
842{ 999{
843 struct srp_rdma_ch *ch; 1000 struct srp_rdma_ch *ch;
844 int i; 1001 int i, ret;
845 1002
846 /* XXX should send SRP_I_LOGOUT request */ 1003 /* XXX should send SRP_I_LOGOUT request */
847 1004
848 for (i = 0; i < target->ch_count; i++) { 1005 for (i = 0; i < target->ch_count; i++) {
849 ch = &target->ch[i]; 1006 ch = &target->ch[i];
850 ch->connected = false; 1007 ch->connected = false;
851 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) { 1008 ret = 0;
1009 if (target->using_rdma_cm) {
1010 if (ch->rdma_cm.cm_id)
1011 rdma_disconnect(ch->rdma_cm.cm_id);
1012 } else {
1013 if (ch->ib_cm.cm_id)
1014 ret = ib_send_cm_dreq(ch->ib_cm.cm_id,
1015 NULL, 0);
1016 }
1017 if (ret < 0) {
852 shost_printk(KERN_DEBUG, target->scsi_host, 1018 shost_printk(KERN_DEBUG, target->scsi_host,
853 PFX "Sending CM DREQ failed\n"); 1019 PFX "Sending CM DREQ failed\n");
854 } 1020 }
@@ -962,6 +1128,7 @@ static void srp_remove_target(struct srp_target_port *target)
962 scsi_remove_host(target->scsi_host); 1128 scsi_remove_host(target->scsi_host);
963 srp_stop_rport_timers(target->rport); 1129 srp_stop_rport_timers(target->rport);
964 srp_disconnect_target(target); 1130 srp_disconnect_target(target);
1131 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
965 for (i = 0; i < target->ch_count; i++) { 1132 for (i = 0; i < target->ch_count; i++) {
966 ch = &target->ch[i]; 1133 ch = &target->ch[i];
967 srp_free_ch_ib(target, ch); 1134 srp_free_ch_ib(target, ch);
@@ -2349,7 +2516,7 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2349 struct srp_target_port *target = ch->target; 2516 struct srp_target_port *target = ch->target;
2350 struct ib_qp_attr *qp_attr = NULL; 2517 struct ib_qp_attr *qp_attr = NULL;
2351 int attr_mask = 0; 2518 int attr_mask = 0;
2352 int ret; 2519 int ret = 0;
2353 int i; 2520 int i;
2354 2521
2355 if (lrsp->opcode == SRP_LOGIN_RSP) { 2522 if (lrsp->opcode == SRP_LOGIN_RSP) {
@@ -2379,40 +2546,42 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2379 goto error; 2546 goto error;
2380 } 2547 }
2381 2548
2382 ret = -ENOMEM;
2383 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2384 if (!qp_attr)
2385 goto error;
2386
2387 qp_attr->qp_state = IB_QPS_RTR;
2388 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2389 if (ret)
2390 goto error_free;
2391
2392 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2393 if (ret)
2394 goto error_free;
2395
2396 for (i = 0; i < target->queue_size; i++) { 2549 for (i = 0; i < target->queue_size; i++) {
2397 struct srp_iu *iu = ch->rx_ring[i]; 2550 struct srp_iu *iu = ch->rx_ring[i];
2398 2551
2399 ret = srp_post_recv(ch, iu); 2552 ret = srp_post_recv(ch, iu);
2400 if (ret) 2553 if (ret)
2401 goto error_free; 2554 goto error;
2402 } 2555 }
2403 2556
2404 qp_attr->qp_state = IB_QPS_RTS; 2557 if (!target->using_rdma_cm) {
2405 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); 2558 ret = -ENOMEM;
2406 if (ret) 2559 qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL);
2407 goto error_free; 2560 if (!qp_attr)
2561 goto error;
2408 2562
2409 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask); 2563 qp_attr->qp_state = IB_QPS_RTR;
2564 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2565 if (ret)
2566 goto error_free;
2410 2567
2411 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask); 2568 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2412 if (ret) 2569 if (ret)
2413 goto error_free; 2570 goto error_free;
2414 2571
2415 ret = ib_send_cm_rtu(cm_id, NULL, 0); 2572 qp_attr->qp_state = IB_QPS_RTS;
2573 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2574 if (ret)
2575 goto error_free;
2576
2577 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2578
2579 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2580 if (ret)
2581 goto error_free;
2582
2583 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2584 }
2416 2585
2417error_free: 2586error_free:
2418 kfree(qp_attr); 2587 kfree(qp_attr);
@@ -2421,41 +2590,43 @@ error:
2421 ch->status = ret; 2590 ch->status = ret;
2422} 2591}
2423 2592
2424static void srp_cm_rej_handler(struct ib_cm_id *cm_id, 2593static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id,
2425 struct ib_cm_event *event, 2594 struct ib_cm_event *event,
2426 struct srp_rdma_ch *ch) 2595 struct srp_rdma_ch *ch)
2427{ 2596{
2428 struct srp_target_port *target = ch->target; 2597 struct srp_target_port *target = ch->target;
2429 struct Scsi_Host *shost = target->scsi_host; 2598 struct Scsi_Host *shost = target->scsi_host;
2430 struct ib_class_port_info *cpi; 2599 struct ib_class_port_info *cpi;
2431 int opcode; 2600 int opcode;
2601 u16 dlid;
2432 2602
2433 switch (event->param.rej_rcvd.reason) { 2603 switch (event->param.rej_rcvd.reason) {
2434 case IB_CM_REJ_PORT_CM_REDIRECT: 2604 case IB_CM_REJ_PORT_CM_REDIRECT:
2435 cpi = event->param.rej_rcvd.ari; 2605 cpi = event->param.rej_rcvd.ari;
2436 sa_path_set_dlid(&ch->path, ntohs(cpi->redirect_lid)); 2606 dlid = be16_to_cpu(cpi->redirect_lid);
2437 ch->path.pkey = cpi->redirect_pkey; 2607 sa_path_set_dlid(&ch->ib_cm.path, dlid);
2608 ch->ib_cm.path.pkey = cpi->redirect_pkey;
2438 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff; 2609 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2439 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16); 2610 memcpy(ch->ib_cm.path.dgid.raw, cpi->redirect_gid, 16);
2440 2611
2441 ch->status = sa_path_get_dlid(&ch->path) ? 2612 ch->status = dlid ? SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2442 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2443 break; 2613 break;
2444 2614
2445 case IB_CM_REJ_PORT_REDIRECT: 2615 case IB_CM_REJ_PORT_REDIRECT:
2446 if (srp_target_is_topspin(target)) { 2616 if (srp_target_is_topspin(target)) {
2617 union ib_gid *dgid = &ch->ib_cm.path.dgid;
2618
2447 /* 2619 /*
2448 * Topspin/Cisco SRP gateways incorrectly send 2620 * Topspin/Cisco SRP gateways incorrectly send
2449 * reject reason code 25 when they mean 24 2621 * reject reason code 25 when they mean 24
2450 * (port redirect). 2622 * (port redirect).
2451 */ 2623 */
2452 memcpy(ch->path.dgid.raw, 2624 memcpy(dgid->raw, event->param.rej_rcvd.ari, 16);
2453 event->param.rej_rcvd.ari, 16);
2454 2625
2455 shost_printk(KERN_DEBUG, shost, 2626 shost_printk(KERN_DEBUG, shost,
2456 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n", 2627 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2457 be64_to_cpu(ch->path.dgid.global.subnet_prefix), 2628 be64_to_cpu(dgid->global.subnet_prefix),
2458 be64_to_cpu(ch->path.dgid.global.interface_id)); 2629 be64_to_cpu(dgid->global.interface_id));
2459 2630
2460 ch->status = SRP_PORT_REDIRECT; 2631 ch->status = SRP_PORT_REDIRECT;
2461 } else { 2632 } else {
@@ -2484,7 +2655,8 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2484 shost_printk(KERN_WARNING, shost, PFX 2655 shost_printk(KERN_WARNING, shost, PFX
2485 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n", 2656 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2486 target->sgid.raw, 2657 target->sgid.raw,
2487 target->orig_dgid.raw, reason); 2658 target->ib_cm.orig_dgid.raw,
2659 reason);
2488 } else 2660 } else
2489 shost_printk(KERN_WARNING, shost, 2661 shost_printk(KERN_WARNING, shost,
2490 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," 2662 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
@@ -2504,7 +2676,7 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2504 } 2676 }
2505} 2677}
2506 2678
2507static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) 2679static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2508{ 2680{
2509 struct srp_rdma_ch *ch = cm_id->context; 2681 struct srp_rdma_ch *ch = cm_id->context;
2510 struct srp_target_port *target = ch->target; 2682 struct srp_target_port *target = ch->target;
@@ -2527,7 +2699,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2527 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n"); 2699 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2528 comp = 1; 2700 comp = 1;
2529 2701
2530 srp_cm_rej_handler(cm_id, event, ch); 2702 srp_ib_cm_rej_handler(cm_id, event, ch);
2531 break; 2703 break;
2532 2704
2533 case IB_CM_DREQ_RECEIVED: 2705 case IB_CM_DREQ_RECEIVED:
@@ -2565,6 +2737,135 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2565 return 0; 2737 return 0;
2566} 2738}
2567 2739
2740static void srp_rdma_cm_rej_handler(struct srp_rdma_ch *ch,
2741 struct rdma_cm_event *event)
2742{
2743 struct srp_target_port *target = ch->target;
2744 struct Scsi_Host *shost = target->scsi_host;
2745 int opcode;
2746
2747 switch (event->status) {
2748 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2749 shost_printk(KERN_WARNING, shost,
2750 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2751 ch->status = -ECONNRESET;
2752 break;
2753
2754 case IB_CM_REJ_CONSUMER_DEFINED:
2755 opcode = *(u8 *) event->param.conn.private_data;
2756 if (opcode == SRP_LOGIN_REJ) {
2757 struct srp_login_rej *rej =
2758 (struct srp_login_rej *)
2759 event->param.conn.private_data;
2760 u32 reason = be32_to_cpu(rej->reason);
2761
2762 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2763 shost_printk(KERN_WARNING, shost,
2764 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2765 else
2766 shost_printk(KERN_WARNING, shost,
2767 PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason);
2768 } else {
2769 shost_printk(KERN_WARNING, shost,
2770 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n",
2771 opcode);
2772 }
2773 ch->status = -ECONNRESET;
2774 break;
2775
2776 case IB_CM_REJ_STALE_CONN:
2777 shost_printk(KERN_WARNING, shost,
2778 " REJ reason: stale connection\n");
2779 ch->status = SRP_STALE_CONN;
2780 break;
2781
2782 default:
2783 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2784 event->status);
2785 ch->status = -ECONNRESET;
2786 break;
2787 }
2788}
2789
2790static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
2791 struct rdma_cm_event *event)
2792{
2793 struct srp_rdma_ch *ch = cm_id->context;
2794 struct srp_target_port *target = ch->target;
2795 int comp = 0;
2796
2797 switch (event->event) {
2798 case RDMA_CM_EVENT_ADDR_RESOLVED:
2799 ch->status = 0;
2800 comp = 1;
2801 break;
2802
2803 case RDMA_CM_EVENT_ADDR_ERROR:
2804 ch->status = -ENXIO;
2805 comp = 1;
2806 break;
2807
2808 case RDMA_CM_EVENT_ROUTE_RESOLVED:
2809 ch->status = 0;
2810 comp = 1;
2811 break;
2812
2813 case RDMA_CM_EVENT_ROUTE_ERROR:
2814 case RDMA_CM_EVENT_UNREACHABLE:
2815 ch->status = -EHOSTUNREACH;
2816 comp = 1;
2817 break;
2818
2819 case RDMA_CM_EVENT_CONNECT_ERROR:
2820 shost_printk(KERN_DEBUG, target->scsi_host,
2821 PFX "Sending CM REQ failed\n");
2822 comp = 1;
2823 ch->status = -ECONNRESET;
2824 break;
2825
2826 case RDMA_CM_EVENT_ESTABLISHED:
2827 comp = 1;
2828 srp_cm_rep_handler(NULL, event->param.conn.private_data, ch);
2829 break;
2830
2831 case RDMA_CM_EVENT_REJECTED:
2832 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2833 comp = 1;
2834
2835 srp_rdma_cm_rej_handler(ch, event);
2836 break;
2837
2838 case RDMA_CM_EVENT_DISCONNECTED:
2839 if (ch->connected) {
2840 shost_printk(KERN_WARNING, target->scsi_host,
2841 PFX "received DREQ\n");
2842 rdma_disconnect(ch->rdma_cm.cm_id);
2843 comp = 1;
2844 ch->status = 0;
2845 queue_work(system_long_wq, &target->tl_err_work);
2846 }
2847 break;
2848
2849 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
2850 shost_printk(KERN_ERR, target->scsi_host,
2851 PFX "connection closed\n");
2852
2853 comp = 1;
2854 ch->status = 0;
2855 break;
2856
2857 default:
2858 shost_printk(KERN_WARNING, target->scsi_host,
2859 PFX "Unhandled CM event %d\n", event->event);
2860 break;
2861 }
2862
2863 if (comp)
2864 complete(&ch->done);
2865
2866 return 0;
2867}
2868
2568/** 2869/**
2569 * srp_change_queue_depth - setting device queue depth 2870 * srp_change_queue_depth - setting device queue depth
2570 * @sdev: scsi device struct 2871 * @sdev: scsi device struct
@@ -2717,6 +3018,16 @@ static int srp_reset_host(struct scsi_cmnd *scmnd)
2717 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED; 3018 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2718} 3019}
2719 3020
3021static int srp_target_alloc(struct scsi_target *starget)
3022{
3023 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
3024 struct srp_target_port *target = host_to_target(shost);
3025
3026 if (target->target_can_queue)
3027 starget->can_queue = target->target_can_queue;
3028 return 0;
3029}
3030
2720static int srp_slave_alloc(struct scsi_device *sdev) 3031static int srp_slave_alloc(struct scsi_device *sdev)
2721{ 3032{
2722 struct Scsi_Host *shost = sdev->host; 3033 struct Scsi_Host *shost = sdev->host;
@@ -2766,7 +3077,10 @@ static ssize_t show_service_id(struct device *dev,
2766{ 3077{
2767 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3078 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2768 3079
2769 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id)); 3080 if (target->using_rdma_cm)
3081 return -ENOENT;
3082 return sprintf(buf, "0x%016llx\n",
3083 be64_to_cpu(target->ib_cm.service_id));
2770} 3084}
2771 3085
2772static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, 3086static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
@@ -2774,7 +3088,9 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2774{ 3088{
2775 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3089 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2776 3090
2777 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey)); 3091 if (target->using_rdma_cm)
3092 return -ENOENT;
3093 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey));
2778} 3094}
2779 3095
2780static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, 3096static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
@@ -2791,7 +3107,9 @@ static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2791 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3107 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2792 struct srp_rdma_ch *ch = &target->ch[0]; 3108 struct srp_rdma_ch *ch = &target->ch[0];
2793 3109
2794 return sprintf(buf, "%pI6\n", ch->path.dgid.raw); 3110 if (target->using_rdma_cm)
3111 return -ENOENT;
3112 return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw);
2795} 3113}
2796 3114
2797static ssize_t show_orig_dgid(struct device *dev, 3115static ssize_t show_orig_dgid(struct device *dev,
@@ -2799,7 +3117,9 @@ static ssize_t show_orig_dgid(struct device *dev,
2799{ 3117{
2800 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3118 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2801 3119
2802 return sprintf(buf, "%pI6\n", target->orig_dgid.raw); 3120 if (target->using_rdma_cm)
3121 return -ENOENT;
3122 return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw);
2803} 3123}
2804 3124
2805static ssize_t show_req_lim(struct device *dev, 3125static ssize_t show_req_lim(struct device *dev,
@@ -2921,6 +3241,7 @@ static struct scsi_host_template srp_template = {
2921 .module = THIS_MODULE, 3241 .module = THIS_MODULE,
2922 .name = "InfiniBand SRP initiator", 3242 .name = "InfiniBand SRP initiator",
2923 .proc_name = DRV_NAME, 3243 .proc_name = DRV_NAME,
3244 .target_alloc = srp_target_alloc,
2924 .slave_alloc = srp_slave_alloc, 3245 .slave_alloc = srp_slave_alloc,
2925 .slave_configure = srp_slave_configure, 3246 .slave_configure = srp_slave_configure,
2926 .info = srp_target_info, 3247 .info = srp_target_info,
@@ -3044,6 +3365,9 @@ static bool srp_conn_unique(struct srp_host *host,
3044 if (t != target && 3365 if (t != target &&
3045 target->id_ext == t->id_ext && 3366 target->id_ext == t->id_ext &&
3046 target->ioc_guid == t->ioc_guid && 3367 target->ioc_guid == t->ioc_guid &&
3368 (!target->using_rdma_cm ||
3369 memcmp(&target->rdma_cm.dst, &t->rdma_cm.dst,
3370 sizeof(target->rdma_cm.dst)) == 0) &&
3047 target->initiator_ext == t->initiator_ext) { 3371 target->initiator_ext == t->initiator_ext) {
3048 ret = false; 3372 ret = false;
3049 break; 3373 break;
@@ -3060,6 +3384,9 @@ out:
3060 * 3384 *
3061 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>, 3385 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3062 * pkey=<P_Key>,service_id=<service ID> 3386 * pkey=<P_Key>,service_id=<service ID>
3387 * or
3388 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,
3389 * [src=<IPv4 address>,]dest=<IPv4 address>:<port number>
3063 * 3390 *
3064 * to the add_target sysfs attribute. 3391 * to the add_target sysfs attribute.
3065 */ 3392 */
@@ -3080,11 +3407,20 @@ enum {
3080 SRP_OPT_COMP_VECTOR = 1 << 12, 3407 SRP_OPT_COMP_VECTOR = 1 << 12,
3081 SRP_OPT_TL_RETRY_COUNT = 1 << 13, 3408 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
3082 SRP_OPT_QUEUE_SIZE = 1 << 14, 3409 SRP_OPT_QUEUE_SIZE = 1 << 14,
3083 SRP_OPT_ALL = (SRP_OPT_ID_EXT | 3410 SRP_OPT_IP_SRC = 1 << 15,
3084 SRP_OPT_IOC_GUID | 3411 SRP_OPT_IP_DEST = 1 << 16,
3085 SRP_OPT_DGID | 3412 SRP_OPT_TARGET_CAN_QUEUE= 1 << 17,
3086 SRP_OPT_PKEY | 3413};
3087 SRP_OPT_SERVICE_ID), 3414
3415static unsigned int srp_opt_mandatory[] = {
3416 SRP_OPT_ID_EXT |
3417 SRP_OPT_IOC_GUID |
3418 SRP_OPT_DGID |
3419 SRP_OPT_PKEY |
3420 SRP_OPT_SERVICE_ID,
3421 SRP_OPT_ID_EXT |
3422 SRP_OPT_IOC_GUID |
3423 SRP_OPT_IP_DEST,
3088}; 3424};
3089 3425
3090static const match_table_t srp_opt_tokens = { 3426static const match_table_t srp_opt_tokens = {
@@ -3095,6 +3431,7 @@ static const match_table_t srp_opt_tokens = {
3095 { SRP_OPT_SERVICE_ID, "service_id=%s" }, 3431 { SRP_OPT_SERVICE_ID, "service_id=%s" },
3096 { SRP_OPT_MAX_SECT, "max_sect=%d" }, 3432 { SRP_OPT_MAX_SECT, "max_sect=%d" },
3097 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" }, 3433 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
3434 { SRP_OPT_TARGET_CAN_QUEUE, "target_can_queue=%d" },
3098 { SRP_OPT_IO_CLASS, "io_class=%x" }, 3435 { SRP_OPT_IO_CLASS, "io_class=%x" },
3099 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" }, 3436 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
3100 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" }, 3437 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
@@ -3103,15 +3440,33 @@ static const match_table_t srp_opt_tokens = {
3103 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" }, 3440 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
3104 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" }, 3441 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
3105 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" }, 3442 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
3443 { SRP_OPT_IP_SRC, "src=%s" },
3444 { SRP_OPT_IP_DEST, "dest=%s" },
3106 { SRP_OPT_ERR, NULL } 3445 { SRP_OPT_ERR, NULL }
3107}; 3446};
3108 3447
3109static int srp_parse_options(const char *buf, struct srp_target_port *target) 3448static int srp_parse_in(struct net *net, struct sockaddr_storage *sa,
3449 const char *addr_port_str)
3450{
3451 char *addr = kstrdup(addr_port_str, GFP_KERNEL);
3452 char *port_str = addr;
3453 int ret;
3454
3455 if (!addr)
3456 return -ENOMEM;
3457 strsep(&port_str, ":");
3458 ret = inet_pton_with_scope(net, AF_UNSPEC, addr, port_str, sa);
3459 kfree(addr);
3460 return ret;
3461}
3462
3463static int srp_parse_options(struct net *net, const char *buf,
3464 struct srp_target_port *target)
3110{ 3465{
3111 char *options, *sep_opt; 3466 char *options, *sep_opt;
3112 char *p; 3467 char *p;
3113 char dgid[3];
3114 substring_t args[MAX_OPT_ARGS]; 3468 substring_t args[MAX_OPT_ARGS];
3469 unsigned long long ull;
3115 int opt_mask = 0; 3470 int opt_mask = 0;
3116 int token; 3471 int token;
3117 int ret = -EINVAL; 3472 int ret = -EINVAL;
@@ -3136,7 +3491,13 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
3136 ret = -ENOMEM; 3491 ret = -ENOMEM;
3137 goto out; 3492 goto out;
3138 } 3493 }
3139 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); 3494 ret = kstrtoull(p, 16, &ull);
3495 if (ret) {
3496 pr_warn("invalid id_ext parameter '%s'\n", p);
3497 kfree(p);
3498 goto out;
3499 }
3500 target->id_ext = cpu_to_be64(ull);
3140 kfree(p); 3501 kfree(p);
3141 break; 3502 break;
3142 3503
@@ -3146,7 +3507,13 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
3146 ret = -ENOMEM; 3507 ret = -ENOMEM;
3147 goto out; 3508 goto out;
3148 } 3509 }
3149 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16)); 3510 ret = kstrtoull(p, 16, &ull);
3511 if (ret) {
3512 pr_warn("invalid ioc_guid parameter '%s'\n", p);
3513 kfree(p);
3514 goto out;
3515 }
3516 target->ioc_guid = cpu_to_be64(ull);
3150 kfree(p); 3517 kfree(p);
3151 break; 3518 break;
3152 3519
@@ -3162,16 +3529,10 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
3162 goto out; 3529 goto out;
3163 } 3530 }
3164 3531
3165 for (i = 0; i < 16; ++i) { 3532 ret = hex2bin(target->ib_cm.orig_dgid.raw, p, 16);
3166 strlcpy(dgid, p + i * 2, sizeof(dgid));
3167 if (sscanf(dgid, "%hhx",
3168 &target->orig_dgid.raw[i]) < 1) {
3169 ret = -EINVAL;
3170 kfree(p);
3171 goto out;
3172 }
3173 }
3174 kfree(p); 3533 kfree(p);
3534 if (ret < 0)
3535 goto out;
3175 break; 3536 break;
3176 3537
3177 case SRP_OPT_PKEY: 3538 case SRP_OPT_PKEY:
@@ -3179,7 +3540,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
3179 pr_warn("bad P_Key parameter '%s'\n", p); 3540 pr_warn("bad P_Key parameter '%s'\n", p);
3180 goto out; 3541 goto out;
3181 } 3542 }
3182 target->pkey = cpu_to_be16(token); 3543 target->ib_cm.pkey = cpu_to_be16(token);
3183 break; 3544 break;
3184 3545
3185 case SRP_OPT_SERVICE_ID: 3546 case SRP_OPT_SERVICE_ID:
@@ -3188,7 +3549,45 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
3188 ret = -ENOMEM; 3549 ret = -ENOMEM;
3189 goto out; 3550 goto out;
3190 } 3551 }
3191 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16)); 3552 ret = kstrtoull(p, 16, &ull);
3553 if (ret) {
3554 pr_warn("bad service_id parameter '%s'\n", p);
3555 kfree(p);
3556 goto out;
3557 }
3558 target->ib_cm.service_id = cpu_to_be64(ull);
3559 kfree(p);
3560 break;
3561
3562 case SRP_OPT_IP_SRC:
3563 p = match_strdup(args);
3564 if (!p) {
3565 ret = -ENOMEM;
3566 goto out;
3567 }
3568 ret = srp_parse_in(net, &target->rdma_cm.src.ss, p);
3569 if (ret < 0) {
3570 pr_warn("bad source parameter '%s'\n", p);
3571 kfree(p);
3572 goto out;
3573 }
3574 target->rdma_cm.src_specified = true;
3575 kfree(p);
3576 break;
3577
3578 case SRP_OPT_IP_DEST:
3579 p = match_strdup(args);
3580 if (!p) {
3581 ret = -ENOMEM;
3582 goto out;
3583 }
3584 ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p);
3585 if (ret < 0) {
3586 pr_warn("bad dest parameter '%s'\n", p);
3587 kfree(p);
3588 goto out;
3589 }
3590 target->using_rdma_cm = true;
3192 kfree(p); 3591 kfree(p);
3193 break; 3592 break;
3194 3593
@@ -3221,6 +3620,15 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
3221 target->scsi_host->cmd_per_lun = token; 3620 target->scsi_host->cmd_per_lun = token;
3222 break; 3621 break;
3223 3622
3623 case SRP_OPT_TARGET_CAN_QUEUE:
3624 if (match_int(args, &token) || token < 1) {
3625 pr_warn("bad max target_can_queue parameter '%s'\n",
3626 p);
3627 goto out;
3628 }
3629 target->target_can_queue = token;
3630 break;
3631
3224 case SRP_OPT_IO_CLASS: 3632 case SRP_OPT_IO_CLASS:
3225 if (match_hex(args, &token)) { 3633 if (match_hex(args, &token)) {
3226 pr_warn("bad IO class parameter '%s'\n", p); 3634 pr_warn("bad IO class parameter '%s'\n", p);
@@ -3242,7 +3650,13 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
3242 ret = -ENOMEM; 3650 ret = -ENOMEM;
3243 goto out; 3651 goto out;
3244 } 3652 }
3245 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); 3653 ret = kstrtoull(p, 16, &ull);
3654 if (ret) {
3655 pr_warn("bad initiator_ext value '%s'\n", p);
3656 kfree(p);
3657 goto out;
3658 }
3659 target->initiator_ext = cpu_to_be64(ull);
3246 kfree(p); 3660 kfree(p);
3247 break; 3661 break;
3248 3662
@@ -3297,14 +3711,14 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
3297 } 3711 }
3298 } 3712 }
3299 3713
3300 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL) 3714 for (i = 0; i < ARRAY_SIZE(srp_opt_mandatory); i++) {
3301 ret = 0; 3715 if ((opt_mask & srp_opt_mandatory[i]) == srp_opt_mandatory[i]) {
3302 else 3716 ret = 0;
3303 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i) 3717 break;
3304 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) && 3718 }
3305 !(srp_opt_tokens[i].token & opt_mask)) 3719 }
3306 pr_warn("target creation request is missing parameter '%s'\n", 3720 if (ret)
3307 srp_opt_tokens[i].pattern); 3721 pr_warn("target creation request is missing one or more parameters\n");
3308 3722
3309 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue 3723 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3310 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) 3724 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
@@ -3345,6 +3759,7 @@ static ssize_t srp_create_target(struct device *dev,
3345 3759
3346 target = host_to_target(target_host); 3760 target = host_to_target(target_host);
3347 3761
3762 target->net = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
3348 target->io_class = SRP_REV16A_IB_IO_CLASS; 3763 target->io_class = SRP_REV16A_IB_IO_CLASS;
3349 target->scsi_host = target_host; 3764 target->scsi_host = target_host;
3350 target->srp_host = host; 3765 target->srp_host = host;
@@ -3366,18 +3781,29 @@ static ssize_t srp_create_target(struct device *dev,
3366 if (ret < 0) 3781 if (ret < 0)
3367 goto put; 3782 goto put;
3368 3783
3369 ret = srp_parse_options(buf, target); 3784 ret = srp_parse_options(target->net, buf, target);
3370 if (ret) 3785 if (ret)
3371 goto out; 3786 goto out;
3372 3787
3373 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; 3788 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3374 3789
3375 if (!srp_conn_unique(target->srp_host, target)) { 3790 if (!srp_conn_unique(target->srp_host, target)) {
3376 shost_printk(KERN_INFO, target->scsi_host, 3791 if (target->using_rdma_cm) {
3377 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", 3792 char dst_addr[64];
3378 be64_to_cpu(target->id_ext), 3793
3379 be64_to_cpu(target->ioc_guid), 3794 shost_printk(KERN_INFO, target->scsi_host,
3380 be64_to_cpu(target->initiator_ext)); 3795 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%s\n",
3796 be64_to_cpu(target->id_ext),
3797 be64_to_cpu(target->ioc_guid),
3798 inet_ntop(&target->rdma_cm.dst, dst_addr,
3799 sizeof(dst_addr)));
3800 } else {
3801 shost_printk(KERN_INFO, target->scsi_host,
3802 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3803 be64_to_cpu(target->id_ext),
3804 be64_to_cpu(target->ioc_guid),
3805 be64_to_cpu(target->initiator_ext));
3806 }
3381 ret = -EEXIST; 3807 ret = -EEXIST;
3382 goto out; 3808 goto out;
3383 } 3809 }
@@ -3478,11 +3904,18 @@ static ssize_t srp_create_target(struct device *dev,
3478 3904
3479 ret = srp_connect_ch(ch, multich); 3905 ret = srp_connect_ch(ch, multich);
3480 if (ret) { 3906 if (ret) {
3907 char dst[64];
3908
3909 if (target->using_rdma_cm)
3910 inet_ntop(&target->rdma_cm.dst, dst,
3911 sizeof(dst));
3912 else
3913 snprintf(dst, sizeof(dst), "%pI6",
3914 target->ib_cm.orig_dgid.raw);
3481 shost_printk(KERN_ERR, target->scsi_host, 3915 shost_printk(KERN_ERR, target->scsi_host,
3482 PFX "Connection %d/%d to %pI6 failed\n", 3916 PFX "Connection %d/%d to %s failed\n",
3483 ch_start + cpu_idx, 3917 ch_start + cpu_idx,
3484 target->ch_count, 3918 target->ch_count, dst);
3485 ch->target->orig_dgid.raw);
3486 if (node_idx == 0 && cpu_idx == 0) { 3919 if (node_idx == 0 && cpu_idx == 0) {
3487 goto free_ch; 3920 goto free_ch;
3488 } else { 3921 } else {
@@ -3507,13 +3940,25 @@ connected:
3507 goto err_disconnect; 3940 goto err_disconnect;
3508 3941
3509 if (target->state != SRP_TARGET_REMOVED) { 3942 if (target->state != SRP_TARGET_REMOVED) {
3510 shost_printk(KERN_DEBUG, target->scsi_host, PFX 3943 if (target->using_rdma_cm) {
3511 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n", 3944 char dst[64];
3512 be64_to_cpu(target->id_ext), 3945
3513 be64_to_cpu(target->ioc_guid), 3946 inet_ntop(&target->rdma_cm.dst, dst, sizeof(dst));
3514 be16_to_cpu(target->pkey), 3947 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3515 be64_to_cpu(target->service_id), 3948 "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %s\n",
3516 target->sgid.raw, target->orig_dgid.raw); 3949 be64_to_cpu(target->id_ext),
3950 be64_to_cpu(target->ioc_guid),
3951 target->sgid.raw, dst);
3952 } else {
3953 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3954 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3955 be64_to_cpu(target->id_ext),
3956 be64_to_cpu(target->ioc_guid),
3957 be16_to_cpu(target->ib_cm.pkey),
3958 be64_to_cpu(target->ib_cm.service_id),
3959 target->sgid.raw,
3960 target->ib_cm.orig_dgid.raw);
3961 }
3517 } 3962 }
3518 3963
3519 ret = count; 3964 ret = count;
@@ -3523,8 +3968,16 @@ out:
3523 3968
3524put: 3969put:
3525 scsi_host_put(target->scsi_host); 3970 scsi_host_put(target->scsi_host);
3526 if (ret < 0) 3971 if (ret < 0) {
3972 /*
3973 * If a call to srp_remove_target() has not been scheduled,
3974 * drop the network namespace reference now that was obtained
3975 * earlier in this function.
3976 */
3977 if (target->state != SRP_TARGET_REMOVED)
3978 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
3527 scsi_host_put(target->scsi_host); 3979 scsi_host_put(target->scsi_host);
3980 }
3528 3981
3529 return ret; 3982 return ret;
3530 3983
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index a814f5ef16f9..a2706086b9c7 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -45,6 +45,7 @@
45#include <rdma/ib_sa.h> 45#include <rdma/ib_sa.h>
46#include <rdma/ib_cm.h> 46#include <rdma/ib_cm.h>
47#include <rdma/ib_fmr_pool.h> 47#include <rdma/ib_fmr_pool.h>
48#include <rdma/rdma_cm.h>
48 49
49enum { 50enum {
50 SRP_PATH_REC_TIMEOUT_MS = 1000, 51 SRP_PATH_REC_TIMEOUT_MS = 1000,
@@ -153,11 +154,18 @@ struct srp_rdma_ch {
153 struct completion done; 154 struct completion done;
154 int status; 155 int status;
155 156
156 struct sa_path_rec path; 157 union {
157 struct ib_sa_query *path_query; 158 struct ib_cm {
158 int path_query_id; 159 struct sa_path_rec path;
160 struct ib_sa_query *path_query;
161 int path_query_id;
162 struct ib_cm_id *cm_id;
163 } ib_cm;
164 struct rdma_cm {
165 struct rdma_cm_id *cm_id;
166 } rdma_cm;
167 };
159 168
160 struct ib_cm_id *cm_id;
161 struct srp_iu **tx_ring; 169 struct srp_iu **tx_ring;
162 struct srp_iu **rx_ring; 170 struct srp_iu **rx_ring;
163 struct srp_request *req_ring; 171 struct srp_request *req_ring;
@@ -182,6 +190,7 @@ struct srp_target_port {
182 /* read only in the hot path */ 190 /* read only in the hot path */
183 u32 global_rkey; 191 u32 global_rkey;
184 struct srp_rdma_ch *ch; 192 struct srp_rdma_ch *ch;
193 struct net *net;
185 u32 ch_count; 194 u32 ch_count;
186 u32 lkey; 195 u32 lkey;
187 enum srp_target_state state; 196 enum srp_target_state state;
@@ -194,7 +203,6 @@ struct srp_target_port {
194 union ib_gid sgid; 203 union ib_gid sgid;
195 __be64 id_ext; 204 __be64 id_ext;
196 __be64 ioc_guid; 205 __be64 ioc_guid;
197 __be64 service_id;
198 __be64 initiator_ext; 206 __be64 initiator_ext;
199 u16 io_class; 207 u16 io_class;
200 struct srp_host *srp_host; 208 struct srp_host *srp_host;
@@ -203,6 +211,7 @@ struct srp_target_port {
203 char target_name[32]; 211 char target_name[32];
204 unsigned int scsi_id; 212 unsigned int scsi_id;
205 unsigned int sg_tablesize; 213 unsigned int sg_tablesize;
214 unsigned int target_can_queue;
206 int mr_pool_size; 215 int mr_pool_size;
207 int mr_per_cmd; 216 int mr_per_cmd;
208 int queue_size; 217 int queue_size;
@@ -210,8 +219,28 @@ struct srp_target_port {
210 int comp_vector; 219 int comp_vector;
211 int tl_retry_count; 220 int tl_retry_count;
212 221
213 union ib_gid orig_dgid; 222 bool using_rdma_cm;
214 __be16 pkey; 223
224 union {
225 struct {
226 __be64 service_id;
227 union ib_gid orig_dgid;
228 __be16 pkey;
229 } ib_cm;
230 struct {
231 union {
232 struct sockaddr_in ip4;
233 struct sockaddr_in6 ip6;
234 struct sockaddr_storage ss;
235 } src;
236 union {
237 struct sockaddr_in ip4;
238 struct sockaddr_in6 ip6;
239 struct sockaddr_storage ss;
240 } dst;
241 bool src_specified;
242 } rdma_cm;
243 };
215 244
216 u32 rq_tmo_jiffies; 245 u32 rq_tmo_jiffies;
217 246
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index bfa576aa9f03..0373b7c40902 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -41,6 +41,7 @@
41#include <linux/string.h> 41#include <linux/string.h>
42#include <linux/delay.h> 42#include <linux/delay.h>
43#include <linux/atomic.h> 43#include <linux/atomic.h>
44#include <rdma/ib_cache.h>
44#include <scsi/scsi_proto.h> 45#include <scsi/scsi_proto.h>
45#include <scsi/scsi_tcq.h> 46#include <scsi/scsi_tcq.h>
46#include <target/target_core_base.h> 47#include <target/target_core_base.h>
@@ -120,7 +121,9 @@ static bool srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new)
120} 121}
121 122
122/** 123/**
123 * srpt_event_handler() - Asynchronous IB event callback function. 124 * srpt_event_handler - asynchronous IB event callback function
125 * @handler: IB event handler registered by ib_register_event_handler().
126 * @event: Description of the event that occurred.
124 * 127 *
125 * Callback function called by the InfiniBand core when an asynchronous IB 128 * Callback function called by the InfiniBand core when an asynchronous IB
126 * event occurs. This callback may occur in interrupt context. See also 129 * event occurs. This callback may occur in interrupt context. See also
@@ -132,6 +135,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
132{ 135{
133 struct srpt_device *sdev; 136 struct srpt_device *sdev;
134 struct srpt_port *sport; 137 struct srpt_port *sport;
138 u8 port_num;
135 139
136 sdev = ib_get_client_data(event->device, &srpt_client); 140 sdev = ib_get_client_data(event->device, &srpt_client);
137 if (!sdev || sdev->device != event->device) 141 if (!sdev || sdev->device != event->device)
@@ -142,10 +146,15 @@ static void srpt_event_handler(struct ib_event_handler *handler,
142 146
143 switch (event->event) { 147 switch (event->event) {
144 case IB_EVENT_PORT_ERR: 148 case IB_EVENT_PORT_ERR:
145 if (event->element.port_num <= sdev->device->phys_port_cnt) { 149 port_num = event->element.port_num - 1;
146 sport = &sdev->port[event->element.port_num - 1]; 150 if (port_num < sdev->device->phys_port_cnt) {
151 sport = &sdev->port[port_num];
147 sport->lid = 0; 152 sport->lid = 0;
148 sport->sm_lid = 0; 153 sport->sm_lid = 0;
154 } else {
155 WARN(true, "event %d: port_num %d out of range 1..%d\n",
156 event->event, port_num + 1,
157 sdev->device->phys_port_cnt);
149 } 158 }
150 break; 159 break;
151 case IB_EVENT_PORT_ACTIVE: 160 case IB_EVENT_PORT_ACTIVE:
@@ -155,25 +164,31 @@ static void srpt_event_handler(struct ib_event_handler *handler,
155 case IB_EVENT_CLIENT_REREGISTER: 164 case IB_EVENT_CLIENT_REREGISTER:
156 case IB_EVENT_GID_CHANGE: 165 case IB_EVENT_GID_CHANGE:
157 /* Refresh port data asynchronously. */ 166 /* Refresh port data asynchronously. */
158 if (event->element.port_num <= sdev->device->phys_port_cnt) { 167 port_num = event->element.port_num - 1;
159 sport = &sdev->port[event->element.port_num - 1]; 168 if (port_num < sdev->device->phys_port_cnt) {
169 sport = &sdev->port[port_num];
160 if (!sport->lid && !sport->sm_lid) 170 if (!sport->lid && !sport->sm_lid)
161 schedule_work(&sport->work); 171 schedule_work(&sport->work);
172 } else {
173 WARN(true, "event %d: port_num %d out of range 1..%d\n",
174 event->event, port_num + 1,
175 sdev->device->phys_port_cnt);
162 } 176 }
163 break; 177 break;
164 default: 178 default:
165 pr_err("received unrecognized IB event %d\n", 179 pr_err("received unrecognized IB event %d\n", event->event);
166 event->event);
167 break; 180 break;
168 } 181 }
169} 182}
170 183
171/** 184/**
172 * srpt_srq_event() - SRQ event callback function. 185 * srpt_srq_event - SRQ event callback function
186 * @event: Description of the event that occurred.
187 * @ctx: Context pointer specified at SRQ creation time.
173 */ 188 */
174static void srpt_srq_event(struct ib_event *event, void *ctx) 189static void srpt_srq_event(struct ib_event *event, void *ctx)
175{ 190{
176 pr_info("SRQ event %d\n", event->event); 191 pr_debug("SRQ event %d\n", event->event);
177} 192}
178 193
179static const char *get_ch_state_name(enum rdma_ch_state s) 194static const char *get_ch_state_name(enum rdma_ch_state s)
@@ -194,16 +209,18 @@ static const char *get_ch_state_name(enum rdma_ch_state s)
194} 209}
195 210
196/** 211/**
197 * srpt_qp_event() - QP event callback function. 212 * srpt_qp_event - QP event callback function
213 * @event: Description of the event that occurred.
214 * @ch: SRPT RDMA channel.
198 */ 215 */
199static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) 216static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
200{ 217{
201 pr_debug("QP event %d on cm_id=%p sess_name=%s state=%d\n", 218 pr_debug("QP event %d on ch=%p sess_name=%s state=%d\n",
202 event->event, ch->cm_id, ch->sess_name, ch->state); 219 event->event, ch, ch->sess_name, ch->state);
203 220
204 switch (event->event) { 221 switch (event->event) {
205 case IB_EVENT_COMM_EST: 222 case IB_EVENT_COMM_EST:
206 ib_cm_notify(ch->cm_id, event->event); 223 ib_cm_notify(ch->ib_cm.cm_id, event->event);
207 break; 224 break;
208 case IB_EVENT_QP_LAST_WQE_REACHED: 225 case IB_EVENT_QP_LAST_WQE_REACHED:
209 pr_debug("%s-%d, state %s: received Last WQE event.\n", 226 pr_debug("%s-%d, state %s: received Last WQE event.\n",
@@ -217,8 +234,8 @@ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
217} 234}
218 235
219/** 236/**
220 * srpt_set_ioc() - Helper function for initializing an IOUnitInfo structure. 237 * srpt_set_ioc - initialize a IOUnitInfo structure
221 * 238 * @c_list: controller list.
222 * @slot: one-based slot number. 239 * @slot: one-based slot number.
223 * @value: four-bit value. 240 * @value: four-bit value.
224 * 241 *
@@ -241,7 +258,8 @@ static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
241} 258}
242 259
243/** 260/**
244 * srpt_get_class_port_info() - Copy ClassPortInfo to a management datagram. 261 * srpt_get_class_port_info - copy ClassPortInfo to a management datagram
262 * @mad: Datagram that will be sent as response to DM_ATTR_CLASS_PORT_INFO.
245 * 263 *
246 * See also section 16.3.3.1 ClassPortInfo in the InfiniBand Architecture 264 * See also section 16.3.3.1 ClassPortInfo in the InfiniBand Architecture
247 * Specification. 265 * Specification.
@@ -260,7 +278,8 @@ static void srpt_get_class_port_info(struct ib_dm_mad *mad)
260} 278}
261 279
262/** 280/**
263 * srpt_get_iou() - Write IOUnitInfo to a management datagram. 281 * srpt_get_iou - write IOUnitInfo to a management datagram
282 * @mad: Datagram that will be sent as response to DM_ATTR_IOU_INFO.
264 * 283 *
265 * See also section 16.3.3.3 IOUnitInfo in the InfiniBand Architecture 284 * See also section 16.3.3.3 IOUnitInfo in the InfiniBand Architecture
266 * Specification. See also section B.7, table B.6 in the SRP r16a document. 285 * Specification. See also section B.7, table B.6 in the SRP r16a document.
@@ -284,7 +303,10 @@ static void srpt_get_iou(struct ib_dm_mad *mad)
284} 303}
285 304
286/** 305/**
287 * srpt_get_ioc() - Write IOControllerprofile to a management datagram. 306 * srpt_get_ioc - write IOControllerprofile to a management datagram
307 * @sport: HCA port through which the MAD has been received.
308 * @slot: Slot number specified in DM_ATTR_IOC_PROFILE query.
309 * @mad: Datagram that will be sent as response to DM_ATTR_IOC_PROFILE.
288 * 310 *
289 * See also section 16.3.3.4 IOControllerProfile in the InfiniBand 311 * See also section 16.3.3.4 IOControllerProfile in the InfiniBand
290 * Architecture Specification. See also section B.7, table B.7 in the SRP 312 * Architecture Specification. See also section B.7, table B.7 in the SRP
@@ -314,7 +336,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
314 if (sdev->use_srq) 336 if (sdev->use_srq)
315 send_queue_depth = sdev->srq_size; 337 send_queue_depth = sdev->srq_size;
316 else 338 else
317 send_queue_depth = min(SRPT_RQ_SIZE, 339 send_queue_depth = min(MAX_SRPT_RQ_SIZE,
318 sdev->device->attrs.max_qp_wr); 340 sdev->device->attrs.max_qp_wr);
319 341
320 memset(iocp, 0, sizeof(*iocp)); 342 memset(iocp, 0, sizeof(*iocp));
@@ -342,7 +364,12 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
342} 364}
343 365
344/** 366/**
345 * srpt_get_svc_entries() - Write ServiceEntries to a management datagram. 367 * srpt_get_svc_entries - write ServiceEntries to a management datagram
368 * @ioc_guid: I/O controller GUID to use in reply.
369 * @slot: I/O controller number.
370 * @hi: End of the range of service entries to be specified in the reply.
371 * @lo: Start of the range of service entries to be specified in the reply..
372 * @mad: Datagram that will be sent as response to DM_ATTR_SVC_ENTRIES.
346 * 373 *
347 * See also section 16.3.3.5 ServiceEntries in the InfiniBand Architecture 374 * See also section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
348 * Specification. See also section B.7, table B.8 in the SRP r16a document. 375 * Specification. See also section B.7, table B.8 in the SRP r16a document.
@@ -379,8 +406,8 @@ static void srpt_get_svc_entries(u64 ioc_guid,
379} 406}
380 407
381/** 408/**
382 * srpt_mgmt_method_get() - Process a received management datagram. 409 * srpt_mgmt_method_get - process a received management datagram
383 * @sp: source port through which the MAD has been received. 410 * @sp: HCA port through which the MAD has been received.
384 * @rq_mad: received MAD. 411 * @rq_mad: received MAD.
385 * @rsp_mad: response MAD. 412 * @rsp_mad: response MAD.
386 */ 413 */
@@ -419,7 +446,9 @@ static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
419} 446}
420 447
421/** 448/**
422 * srpt_mad_send_handler() - Post MAD-send callback function. 449 * srpt_mad_send_handler - MAD send completion callback
450 * @mad_agent: Return value of ib_register_mad_agent().
451 * @mad_wc: Work completion reporting that the MAD has been sent.
423 */ 452 */
424static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent, 453static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
425 struct ib_mad_send_wc *mad_wc) 454 struct ib_mad_send_wc *mad_wc)
@@ -429,7 +458,10 @@ static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
429} 458}
430 459
431/** 460/**
432 * srpt_mad_recv_handler() - MAD reception callback function. 461 * srpt_mad_recv_handler - MAD reception callback function
462 * @mad_agent: Return value of ib_register_mad_agent().
463 * @send_buf: Not used.
464 * @mad_wc: Work completion reporting that a MAD has been received.
433 */ 465 */
434static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, 466static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
435 struct ib_mad_send_buf *send_buf, 467 struct ib_mad_send_buf *send_buf,
@@ -493,8 +525,18 @@ err:
493 ib_free_recv_mad(mad_wc); 525 ib_free_recv_mad(mad_wc);
494} 526}
495 527
528static int srpt_format_guid(char *buf, unsigned int size, const __be64 *guid)
529{
530 const __be16 *g = (const __be16 *)guid;
531
532 return snprintf(buf, size, "%04x:%04x:%04x:%04x",
533 be16_to_cpu(g[0]), be16_to_cpu(g[1]),
534 be16_to_cpu(g[2]), be16_to_cpu(g[3]));
535}
536
496/** 537/**
497 * srpt_refresh_port() - Configure a HCA port. 538 * srpt_refresh_port - configure a HCA port
539 * @sport: SRPT HCA port.
498 * 540 *
499 * Enable InfiniBand management datagram processing, update the cached sm_lid, 541 * Enable InfiniBand management datagram processing, update the cached sm_lid,
500 * lid and gid values, and register a callback function for processing MADs 542 * lid and gid values, and register a callback function for processing MADs
@@ -507,7 +549,6 @@ static int srpt_refresh_port(struct srpt_port *sport)
507 struct ib_mad_reg_req reg_req; 549 struct ib_mad_reg_req reg_req;
508 struct ib_port_modify port_modify; 550 struct ib_port_modify port_modify;
509 struct ib_port_attr port_attr; 551 struct ib_port_attr port_attr;
510 __be16 *guid;
511 int ret; 552 int ret;
512 553
513 memset(&port_modify, 0, sizeof(port_modify)); 554 memset(&port_modify, 0, sizeof(port_modify));
@@ -531,11 +572,8 @@ static int srpt_refresh_port(struct srpt_port *sport)
531 goto err_query_port; 572 goto err_query_port;
532 573
533 sport->port_guid_wwn.priv = sport; 574 sport->port_guid_wwn.priv = sport;
534 guid = (__be16 *)&sport->gid.global.interface_id; 575 srpt_format_guid(sport->port_guid, sizeof(sport->port_guid),
535 snprintf(sport->port_guid, sizeof(sport->port_guid), 576 &sport->gid.global.interface_id);
536 "%04x:%04x:%04x:%04x",
537 be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
538 be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
539 sport->port_gid_wwn.priv = sport; 577 sport->port_gid_wwn.priv = sport;
540 snprintf(sport->port_gid, sizeof(sport->port_gid), 578 snprintf(sport->port_gid, sizeof(sport->port_gid),
541 "0x%016llx%016llx", 579 "0x%016llx%016llx",
@@ -577,7 +615,8 @@ err_mod_port:
577} 615}
578 616
579/** 617/**
580 * srpt_unregister_mad_agent() - Unregister MAD callback functions. 618 * srpt_unregister_mad_agent - unregister MAD callback functions
619 * @sdev: SRPT HCA pointer.
581 * 620 *
582 * Note: It is safe to call this function more than once for the same device. 621 * Note: It is safe to call this function more than once for the same device.
583 */ 622 */
@@ -602,7 +641,11 @@ static void srpt_unregister_mad_agent(struct srpt_device *sdev)
602} 641}
603 642
604/** 643/**
605 * srpt_alloc_ioctx() - Allocate an SRPT I/O context structure. 644 * srpt_alloc_ioctx - allocate a SRPT I/O context structure
645 * @sdev: SRPT HCA pointer.
646 * @ioctx_size: I/O context size.
647 * @dma_size: Size of I/O context DMA buffer.
648 * @dir: DMA data direction.
606 */ 649 */
607static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev, 650static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev,
608 int ioctx_size, int dma_size, 651 int ioctx_size, int dma_size,
@@ -633,7 +676,11 @@ err:
633} 676}
634 677
635/** 678/**
636 * srpt_free_ioctx() - Free an SRPT I/O context structure. 679 * srpt_free_ioctx - free a SRPT I/O context structure
680 * @sdev: SRPT HCA pointer.
681 * @ioctx: I/O context pointer.
682 * @dma_size: Size of I/O context DMA buffer.
683 * @dir: DMA data direction.
637 */ 684 */
638static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx, 685static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx,
639 int dma_size, enum dma_data_direction dir) 686 int dma_size, enum dma_data_direction dir)
@@ -647,7 +694,7 @@ static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx,
647} 694}
648 695
649/** 696/**
650 * srpt_alloc_ioctx_ring() - Allocate a ring of SRPT I/O context structures. 697 * srpt_alloc_ioctx_ring - allocate a ring of SRPT I/O context structures
651 * @sdev: Device to allocate the I/O context ring for. 698 * @sdev: Device to allocate the I/O context ring for.
652 * @ring_size: Number of elements in the I/O context ring. 699 * @ring_size: Number of elements in the I/O context ring.
653 * @ioctx_size: I/O context size. 700 * @ioctx_size: I/O context size.
@@ -685,7 +732,12 @@ out:
685} 732}
686 733
687/** 734/**
688 * srpt_free_ioctx_ring() - Free the ring of SRPT I/O context structures. 735 * srpt_free_ioctx_ring - free the ring of SRPT I/O context structures
736 * @ioctx_ring: I/O context ring to be freed.
737 * @sdev: SRPT HCA pointer.
738 * @ring_size: Number of ring elements.
739 * @dma_size: Size of I/O context DMA buffer.
740 * @dir: DMA data direction.
689 */ 741 */
690static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring, 742static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring,
691 struct srpt_device *sdev, int ring_size, 743 struct srpt_device *sdev, int ring_size,
@@ -702,23 +754,9 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring,
702} 754}
703 755
704/** 756/**
705 * srpt_get_cmd_state() - Get the state of a SCSI command. 757 * srpt_set_cmd_state - set the state of a SCSI command
706 */ 758 * @ioctx: Send I/O context.
707static enum srpt_command_state srpt_get_cmd_state(struct srpt_send_ioctx *ioctx) 759 * @new: New I/O context state.
708{
709 enum srpt_command_state state;
710 unsigned long flags;
711
712 BUG_ON(!ioctx);
713
714 spin_lock_irqsave(&ioctx->spinlock, flags);
715 state = ioctx->state;
716 spin_unlock_irqrestore(&ioctx->spinlock, flags);
717 return state;
718}
719
720/**
721 * srpt_set_cmd_state() - Set the state of a SCSI command.
722 * 760 *
723 * Does not modify the state of aborted commands. Returns the previous command 761 * Does not modify the state of aborted commands. Returns the previous command
724 * state. 762 * state.
@@ -727,21 +765,19 @@ static enum srpt_command_state srpt_set_cmd_state(struct srpt_send_ioctx *ioctx,
727 enum srpt_command_state new) 765 enum srpt_command_state new)
728{ 766{
729 enum srpt_command_state previous; 767 enum srpt_command_state previous;
730 unsigned long flags;
731 768
732 BUG_ON(!ioctx);
733
734 spin_lock_irqsave(&ioctx->spinlock, flags);
735 previous = ioctx->state; 769 previous = ioctx->state;
736 if (previous != SRPT_STATE_DONE) 770 if (previous != SRPT_STATE_DONE)
737 ioctx->state = new; 771 ioctx->state = new;
738 spin_unlock_irqrestore(&ioctx->spinlock, flags);
739 772
740 return previous; 773 return previous;
741} 774}
742 775
743/** 776/**
744 * srpt_test_and_set_cmd_state() - Test and set the state of a command. 777 * srpt_test_and_set_cmd_state - test and set the state of a command
778 * @ioctx: Send I/O context.
779 * @old: Current I/O context state.
780 * @new: New I/O context state.
745 * 781 *
746 * Returns true if and only if the previous command state was equal to 'old'. 782 * Returns true if and only if the previous command state was equal to 'old'.
747 */ 783 */
@@ -750,22 +786,23 @@ static bool srpt_test_and_set_cmd_state(struct srpt_send_ioctx *ioctx,
750 enum srpt_command_state new) 786 enum srpt_command_state new)
751{ 787{
752 enum srpt_command_state previous; 788 enum srpt_command_state previous;
753 unsigned long flags;
754 789
755 WARN_ON(!ioctx); 790 WARN_ON(!ioctx);
756 WARN_ON(old == SRPT_STATE_DONE); 791 WARN_ON(old == SRPT_STATE_DONE);
757 WARN_ON(new == SRPT_STATE_NEW); 792 WARN_ON(new == SRPT_STATE_NEW);
758 793
759 spin_lock_irqsave(&ioctx->spinlock, flags);
760 previous = ioctx->state; 794 previous = ioctx->state;
761 if (previous == old) 795 if (previous == old)
762 ioctx->state = new; 796 ioctx->state = new;
763 spin_unlock_irqrestore(&ioctx->spinlock, flags); 797
764 return previous == old; 798 return previous == old;
765} 799}
766 800
767/** 801/**
768 * srpt_post_recv() - Post an IB receive request. 802 * srpt_post_recv - post an IB receive request
803 * @sdev: SRPT HCA pointer.
804 * @ch: SRPT RDMA channel.
805 * @ioctx: Receive I/O context pointer.
769 */ 806 */
770static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, 807static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch,
771 struct srpt_recv_ioctx *ioctx) 808 struct srpt_recv_ioctx *ioctx)
@@ -791,7 +828,8 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch,
791} 828}
792 829
793/** 830/**
794 * srpt_zerolength_write() - Perform a zero-length RDMA write. 831 * srpt_zerolength_write - perform a zero-length RDMA write
832 * @ch: SRPT RDMA channel.
795 * 833 *
796 * A quote from the InfiniBand specification: C9-88: For an HCA responder 834 * A quote from the InfiniBand specification: C9-88: For an HCA responder
797 * using Reliable Connection service, for each zero-length RDMA READ or WRITE 835 * using Reliable Connection service, for each zero-length RDMA READ or WRITE
@@ -802,6 +840,9 @@ static int srpt_zerolength_write(struct srpt_rdma_ch *ch)
802{ 840{
803 struct ib_send_wr wr, *bad_wr; 841 struct ib_send_wr wr, *bad_wr;
804 842
843 pr_debug("%s-%d: queued zerolength write\n", ch->sess_name,
844 ch->qp->qp_num);
845
805 memset(&wr, 0, sizeof(wr)); 846 memset(&wr, 0, sizeof(wr));
806 wr.opcode = IB_WR_RDMA_WRITE; 847 wr.opcode = IB_WR_RDMA_WRITE;
807 wr.wr_cqe = &ch->zw_cqe; 848 wr.wr_cqe = &ch->zw_cqe;
@@ -813,13 +854,17 @@ static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc)
813{ 854{
814 struct srpt_rdma_ch *ch = cq->cq_context; 855 struct srpt_rdma_ch *ch = cq->cq_context;
815 856
857 pr_debug("%s-%d wc->status %d\n", ch->sess_name, ch->qp->qp_num,
858 wc->status);
859
816 if (wc->status == IB_WC_SUCCESS) { 860 if (wc->status == IB_WC_SUCCESS) {
817 srpt_process_wait_list(ch); 861 srpt_process_wait_list(ch);
818 } else { 862 } else {
819 if (srpt_set_ch_state(ch, CH_DISCONNECTED)) 863 if (srpt_set_ch_state(ch, CH_DISCONNECTED))
820 schedule_work(&ch->release_work); 864 schedule_work(&ch->release_work);
821 else 865 else
822 WARN_ONCE(1, "%s-%d\n", ch->sess_name, ch->qp->qp_num); 866 pr_debug("%s-%d: already disconnected.\n",
867 ch->sess_name, ch->qp->qp_num);
823 } 868 }
824} 869}
825 870
@@ -928,11 +973,13 @@ static inline void *srpt_get_desc_buf(struct srp_cmd *srp_cmd)
928} 973}
929 974
930/** 975/**
931 * srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request. 976 * srpt_get_desc_tbl - parse the data descriptors of a SRP_CMD request
932 * @ioctx: Pointer to the I/O context associated with the request. 977 * @ioctx: Pointer to the I/O context associated with the request.
933 * @srp_cmd: Pointer to the SRP_CMD request data. 978 * @srp_cmd: Pointer to the SRP_CMD request data.
934 * @dir: Pointer to the variable to which the transfer direction will be 979 * @dir: Pointer to the variable to which the transfer direction will be
935 * written. 980 * written.
981 * @sg: [out] scatterlist allocated for the parsed SRP_CMD.
982 * @sg_cnt: [out] length of @sg.
936 * @data_len: Pointer to the variable to which the total data length of all 983 * @data_len: Pointer to the variable to which the total data length of all
937 * descriptors in the SRP_CMD request will be written. 984 * descriptors in the SRP_CMD request will be written.
938 * 985 *
@@ -998,7 +1045,9 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
998} 1045}
999 1046
1000/** 1047/**
1001 * srpt_init_ch_qp() - Initialize queue pair attributes. 1048 * srpt_init_ch_qp - initialize queue pair attributes
1049 * @ch: SRPT RDMA channel.
1050 * @qp: Queue pair pointer.
1002 * 1051 *
1003 * Initialized the attributes of queue pair 'qp' by allowing local write, 1052 * Initialized the attributes of queue pair 'qp' by allowing local write,
1004 * remote read and remote write. Also transitions 'qp' to state IB_QPS_INIT. 1053 * remote read and remote write. Also transitions 'qp' to state IB_QPS_INIT.
@@ -1015,7 +1064,12 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
1015 attr->qp_state = IB_QPS_INIT; 1064 attr->qp_state = IB_QPS_INIT;
1016 attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE; 1065 attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
1017 attr->port_num = ch->sport->port; 1066 attr->port_num = ch->sport->port;
1018 attr->pkey_index = 0; 1067
1068 ret = ib_find_cached_pkey(ch->sport->sdev->device, ch->sport->port,
1069 ch->pkey, &attr->pkey_index);
1070 if (ret < 0)
1071 pr_err("Translating pkey %#x failed (%d) - using index 0\n",
1072 ch->pkey, ret);
1019 1073
1020 ret = ib_modify_qp(qp, attr, 1074 ret = ib_modify_qp(qp, attr,
1021 IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT | 1075 IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
@@ -1026,7 +1080,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
1026} 1080}
1027 1081
1028/** 1082/**
1029 * srpt_ch_qp_rtr() - Change the state of a channel to 'ready to receive' (RTR). 1083 * srpt_ch_qp_rtr - change the state of a channel to 'ready to receive' (RTR)
1030 * @ch: channel of the queue pair. 1084 * @ch: channel of the queue pair.
1031 * @qp: queue pair to change the state of. 1085 * @qp: queue pair to change the state of.
1032 * 1086 *
@@ -1043,7 +1097,7 @@ static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp)
1043 int ret; 1097 int ret;
1044 1098
1045 qp_attr.qp_state = IB_QPS_RTR; 1099 qp_attr.qp_state = IB_QPS_RTR;
1046 ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask); 1100 ret = ib_cm_init_qp_attr(ch->ib_cm.cm_id, &qp_attr, &attr_mask);
1047 if (ret) 1101 if (ret)
1048 goto out; 1102 goto out;
1049 1103
@@ -1056,7 +1110,7 @@ out:
1056} 1110}
1057 1111
1058/** 1112/**
1059 * srpt_ch_qp_rts() - Change the state of a channel to 'ready to send' (RTS). 1113 * srpt_ch_qp_rts - change the state of a channel to 'ready to send' (RTS)
1060 * @ch: channel of the queue pair. 1114 * @ch: channel of the queue pair.
1061 * @qp: queue pair to change the state of. 1115 * @qp: queue pair to change the state of.
1062 * 1116 *
@@ -1073,7 +1127,7 @@ static int srpt_ch_qp_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp)
1073 int ret; 1127 int ret;
1074 1128
1075 qp_attr.qp_state = IB_QPS_RTS; 1129 qp_attr.qp_state = IB_QPS_RTS;
1076 ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask); 1130 ret = ib_cm_init_qp_attr(ch->ib_cm.cm_id, &qp_attr, &attr_mask);
1077 if (ret) 1131 if (ret)
1078 goto out; 1132 goto out;
1079 1133
@@ -1086,7 +1140,8 @@ out:
1086} 1140}
1087 1141
1088/** 1142/**
1089 * srpt_ch_qp_err() - Set the channel queue pair state to 'error'. 1143 * srpt_ch_qp_err - set the channel queue pair state to 'error'
1144 * @ch: SRPT RDMA channel.
1090 */ 1145 */
1091static int srpt_ch_qp_err(struct srpt_rdma_ch *ch) 1146static int srpt_ch_qp_err(struct srpt_rdma_ch *ch)
1092{ 1147{
@@ -1097,7 +1152,8 @@ static int srpt_ch_qp_err(struct srpt_rdma_ch *ch)
1097} 1152}
1098 1153
1099/** 1154/**
1100 * srpt_get_send_ioctx() - Obtain an I/O context for sending to the initiator. 1155 * srpt_get_send_ioctx - obtain an I/O context for sending to the initiator
1156 * @ch: SRPT RDMA channel.
1101 */ 1157 */
1102static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch) 1158static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
1103{ 1159{
@@ -1119,11 +1175,9 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
1119 return ioctx; 1175 return ioctx;
1120 1176
1121 BUG_ON(ioctx->ch != ch); 1177 BUG_ON(ioctx->ch != ch);
1122 spin_lock_init(&ioctx->spinlock);
1123 ioctx->state = SRPT_STATE_NEW; 1178 ioctx->state = SRPT_STATE_NEW;
1124 ioctx->n_rdma = 0; 1179 ioctx->n_rdma = 0;
1125 ioctx->n_rw_ctx = 0; 1180 ioctx->n_rw_ctx = 0;
1126 init_completion(&ioctx->tx_done);
1127 ioctx->queue_status_only = false; 1181 ioctx->queue_status_only = false;
1128 /* 1182 /*
1129 * transport_init_se_cmd() does not initialize all fields, so do it 1183 * transport_init_se_cmd() does not initialize all fields, so do it
@@ -1136,14 +1190,12 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
1136} 1190}
1137 1191
1138/** 1192/**
1139 * srpt_abort_cmd() - Abort a SCSI command. 1193 * srpt_abort_cmd - abort a SCSI command
1140 * @ioctx: I/O context associated with the SCSI command. 1194 * @ioctx: I/O context associated with the SCSI command.
1141 * @context: Preferred execution context.
1142 */ 1195 */
1143static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) 1196static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
1144{ 1197{
1145 enum srpt_command_state state; 1198 enum srpt_command_state state;
1146 unsigned long flags;
1147 1199
1148 BUG_ON(!ioctx); 1200 BUG_ON(!ioctx);
1149 1201
@@ -1152,7 +1204,6 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
1152 * the ib_srpt driver, change the state to the next state. 1204 * the ib_srpt driver, change the state to the next state.
1153 */ 1205 */
1154 1206
1155 spin_lock_irqsave(&ioctx->spinlock, flags);
1156 state = ioctx->state; 1207 state = ioctx->state;
1157 switch (state) { 1208 switch (state) {
1158 case SRPT_STATE_NEED_DATA: 1209 case SRPT_STATE_NEED_DATA:
@@ -1167,7 +1218,6 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
1167 __func__, state); 1218 __func__, state);
1168 break; 1219 break;
1169 } 1220 }
1170 spin_unlock_irqrestore(&ioctx->spinlock, flags);
1171 1221
1172 pr_debug("Aborting cmd with state %d -> %d and tag %lld\n", state, 1222 pr_debug("Aborting cmd with state %d -> %d and tag %lld\n", state,
1173 ioctx->state, ioctx->cmd.tag); 1223 ioctx->state, ioctx->cmd.tag);
@@ -1206,6 +1256,10 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
1206} 1256}
1207 1257
1208/** 1258/**
1259 * srpt_rdma_read_done - RDMA read completion callback
1260 * @cq: Completion queue.
1261 * @wc: Work completion.
1262 *
1209 * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping 1263 * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping
1210 * the data that has been transferred via IB RDMA had to be postponed until the 1264 * the data that has been transferred via IB RDMA had to be postponed until the
1211 * check_stop_free() callback. None of this is necessary anymore and needs to 1265 * check_stop_free() callback. None of this is necessary anymore and needs to
@@ -1233,11 +1287,11 @@ static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
1233 target_execute_cmd(&ioctx->cmd); 1287 target_execute_cmd(&ioctx->cmd);
1234 else 1288 else
1235 pr_err("%s[%d]: wrong state = %d\n", __func__, 1289 pr_err("%s[%d]: wrong state = %d\n", __func__,
1236 __LINE__, srpt_get_cmd_state(ioctx)); 1290 __LINE__, ioctx->state);
1237} 1291}
1238 1292
1239/** 1293/**
1240 * srpt_build_cmd_rsp() - Build an SRP_RSP response. 1294 * srpt_build_cmd_rsp - build a SRP_RSP response
1241 * @ch: RDMA channel through which the request has been received. 1295 * @ch: RDMA channel through which the request has been received.
1242 * @ioctx: I/O context associated with the SRP_CMD request. The response will 1296 * @ioctx: I/O context associated with the SRP_CMD request. The response will
1243 * be built in the buffer ioctx->buf points at and hence this function will 1297 * be built in the buffer ioctx->buf points at and hence this function will
@@ -1297,7 +1351,7 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
1297} 1351}
1298 1352
1299/** 1353/**
1300 * srpt_build_tskmgmt_rsp() - Build a task management response. 1354 * srpt_build_tskmgmt_rsp - build a task management response
1301 * @ch: RDMA channel through which the request has been received. 1355 * @ch: RDMA channel through which the request has been received.
1302 * @ioctx: I/O context in which the SRP_RSP response will be built. 1356 * @ioctx: I/O context in which the SRP_RSP response will be built.
1303 * @rsp_code: RSP_CODE that will be stored in the response. 1357 * @rsp_code: RSP_CODE that will be stored in the response.
@@ -1345,7 +1399,10 @@ static int srpt_check_stop_free(struct se_cmd *cmd)
1345} 1399}
1346 1400
1347/** 1401/**
1348 * srpt_handle_cmd() - Process SRP_CMD. 1402 * srpt_handle_cmd - process a SRP_CMD information unit
1403 * @ch: SRPT RDMA channel.
1404 * @recv_ioctx: Receive I/O context.
1405 * @send_ioctx: Send I/O context.
1349 */ 1406 */
1350static void srpt_handle_cmd(struct srpt_rdma_ch *ch, 1407static void srpt_handle_cmd(struct srpt_rdma_ch *ch,
1351 struct srpt_recv_ioctx *recv_ioctx, 1408 struct srpt_recv_ioctx *recv_ioctx,
@@ -1427,7 +1484,10 @@ static int srp_tmr_to_tcm(int fn)
1427} 1484}
1428 1485
1429/** 1486/**
1430 * srpt_handle_tsk_mgmt() - Process an SRP_TSK_MGMT information unit. 1487 * srpt_handle_tsk_mgmt - process a SRP_TSK_MGMT information unit
1488 * @ch: SRPT RDMA channel.
1489 * @recv_ioctx: Receive I/O context.
1490 * @send_ioctx: Send I/O context.
1431 * 1491 *
1432 * Returns 0 if and only if the request will be processed by the target core. 1492 * Returns 0 if and only if the request will be processed by the target core.
1433 * 1493 *
@@ -1449,9 +1509,9 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1449 srp_tsk = recv_ioctx->ioctx.buf; 1509 srp_tsk = recv_ioctx->ioctx.buf;
1450 cmd = &send_ioctx->cmd; 1510 cmd = &send_ioctx->cmd;
1451 1511
1452 pr_debug("recv tsk_mgmt fn %d for task_tag %lld and cmd tag %lld" 1512 pr_debug("recv tsk_mgmt fn %d for task_tag %lld and cmd tag %lld ch %p sess %p\n",
1453 " cm_id %p sess %p\n", srp_tsk->tsk_mgmt_func, 1513 srp_tsk->tsk_mgmt_func, srp_tsk->task_tag, srp_tsk->tag, ch,
1454 srp_tsk->task_tag, srp_tsk->tag, ch->cm_id, ch->sess); 1514 ch->sess);
1455 1515
1456 srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT); 1516 srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT);
1457 send_ioctx->cmd.tag = srp_tsk->tag; 1517 send_ioctx->cmd.tag = srp_tsk->tag;
@@ -1470,41 +1530,42 @@ fail:
1470} 1530}
1471 1531
1472/** 1532/**
1473 * srpt_handle_new_iu() - Process a newly received information unit. 1533 * srpt_handle_new_iu - process a newly received information unit
1474 * @ch: RDMA channel through which the information unit has been received. 1534 * @ch: RDMA channel through which the information unit has been received.
1475 * @ioctx: SRPT I/O context associated with the information unit. 1535 * @recv_ioctx: Receive I/O context associated with the information unit.
1476 */ 1536 */
1477static void srpt_handle_new_iu(struct srpt_rdma_ch *ch, 1537static bool
1478 struct srpt_recv_ioctx *recv_ioctx, 1538srpt_handle_new_iu(struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *recv_ioctx)
1479 struct srpt_send_ioctx *send_ioctx)
1480{ 1539{
1540 struct srpt_send_ioctx *send_ioctx = NULL;
1481 struct srp_cmd *srp_cmd; 1541 struct srp_cmd *srp_cmd;
1542 bool res = false;
1543 u8 opcode;
1482 1544
1483 BUG_ON(!ch); 1545 BUG_ON(!ch);
1484 BUG_ON(!recv_ioctx); 1546 BUG_ON(!recv_ioctx);
1485 1547
1548 if (unlikely(ch->state == CH_CONNECTING))
1549 goto push;
1550
1486 ib_dma_sync_single_for_cpu(ch->sport->sdev->device, 1551 ib_dma_sync_single_for_cpu(ch->sport->sdev->device,
1487 recv_ioctx->ioctx.dma, srp_max_req_size, 1552 recv_ioctx->ioctx.dma, srp_max_req_size,
1488 DMA_FROM_DEVICE); 1553 DMA_FROM_DEVICE);
1489 1554
1490 if (unlikely(ch->state == CH_CONNECTING))
1491 goto out_wait;
1492
1493 if (unlikely(ch->state != CH_LIVE))
1494 return;
1495
1496 srp_cmd = recv_ioctx->ioctx.buf; 1555 srp_cmd = recv_ioctx->ioctx.buf;
1497 if (srp_cmd->opcode == SRP_CMD || srp_cmd->opcode == SRP_TSK_MGMT) { 1556 opcode = srp_cmd->opcode;
1498 if (!send_ioctx) { 1557 if (opcode == SRP_CMD || opcode == SRP_TSK_MGMT) {
1499 if (!list_empty(&ch->cmd_wait_list)) 1558 send_ioctx = srpt_get_send_ioctx(ch);
1500 goto out_wait;
1501 send_ioctx = srpt_get_send_ioctx(ch);
1502 }
1503 if (unlikely(!send_ioctx)) 1559 if (unlikely(!send_ioctx))
1504 goto out_wait; 1560 goto push;
1505 } 1561 }
1506 1562
1507 switch (srp_cmd->opcode) { 1563 if (!list_empty(&recv_ioctx->wait_list)) {
1564 WARN_ON_ONCE(!ch->processing_wait_list);
1565 list_del_init(&recv_ioctx->wait_list);
1566 }
1567
1568 switch (opcode) {
1508 case SRP_CMD: 1569 case SRP_CMD:
1509 srpt_handle_cmd(ch, recv_ioctx, send_ioctx); 1570 srpt_handle_cmd(ch, recv_ioctx, send_ioctx);
1510 break; 1571 break;
@@ -1524,16 +1585,22 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1524 pr_err("Received SRP_RSP\n"); 1585 pr_err("Received SRP_RSP\n");
1525 break; 1586 break;
1526 default: 1587 default:
1527 pr_err("received IU with unknown opcode 0x%x\n", 1588 pr_err("received IU with unknown opcode 0x%x\n", opcode);
1528 srp_cmd->opcode);
1529 break; 1589 break;
1530 } 1590 }
1531 1591
1532 srpt_post_recv(ch->sport->sdev, ch, recv_ioctx); 1592 srpt_post_recv(ch->sport->sdev, ch, recv_ioctx);
1533 return; 1593 res = true;
1594
1595out:
1596 return res;
1534 1597
1535out_wait: 1598push:
1536 list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list); 1599 if (list_empty(&recv_ioctx->wait_list)) {
1600 WARN_ON_ONCE(ch->processing_wait_list);
1601 list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list);
1602 }
1603 goto out;
1537} 1604}
1538 1605
1539static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc) 1606static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -1548,10 +1615,10 @@ static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
1548 req_lim = atomic_dec_return(&ch->req_lim); 1615 req_lim = atomic_dec_return(&ch->req_lim);
1549 if (unlikely(req_lim < 0)) 1616 if (unlikely(req_lim < 0))
1550 pr_err("req_lim = %d < 0\n", req_lim); 1617 pr_err("req_lim = %d < 0\n", req_lim);
1551 srpt_handle_new_iu(ch, ioctx, NULL); 1618 srpt_handle_new_iu(ch, ioctx);
1552 } else { 1619 } else {
1553 pr_info("receiving failed for ioctx %p with status %d\n", 1620 pr_info_ratelimited("receiving failed for ioctx %p with status %d\n",
1554 ioctx, wc->status); 1621 ioctx, wc->status);
1555 } 1622 }
1556} 1623}
1557 1624
@@ -1562,22 +1629,28 @@ static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
1562 */ 1629 */
1563static void srpt_process_wait_list(struct srpt_rdma_ch *ch) 1630static void srpt_process_wait_list(struct srpt_rdma_ch *ch)
1564{ 1631{
1565 struct srpt_send_ioctx *ioctx; 1632 struct srpt_recv_ioctx *recv_ioctx, *tmp;
1633
1634 WARN_ON_ONCE(ch->state == CH_CONNECTING);
1566 1635
1567 while (!list_empty(&ch->cmd_wait_list) && 1636 if (list_empty(&ch->cmd_wait_list))
1568 ch->state >= CH_LIVE && 1637 return;
1569 (ioctx = srpt_get_send_ioctx(ch)) != NULL) {
1570 struct srpt_recv_ioctx *recv_ioctx;
1571 1638
1572 recv_ioctx = list_first_entry(&ch->cmd_wait_list, 1639 WARN_ON_ONCE(ch->processing_wait_list);
1573 struct srpt_recv_ioctx, 1640 ch->processing_wait_list = true;
1574 wait_list); 1641 list_for_each_entry_safe(recv_ioctx, tmp, &ch->cmd_wait_list,
1575 list_del(&recv_ioctx->wait_list); 1642 wait_list) {
1576 srpt_handle_new_iu(ch, recv_ioctx, ioctx); 1643 if (!srpt_handle_new_iu(ch, recv_ioctx))
1644 break;
1577 } 1645 }
1646 ch->processing_wait_list = false;
1578} 1647}
1579 1648
1580/** 1649/**
1650 * srpt_send_done - send completion callback
1651 * @cq: Completion queue.
1652 * @wc: Work completion.
1653 *
1581 * Note: Although this has not yet been observed during tests, at least in 1654 * Note: Although this has not yet been observed during tests, at least in
1582 * theory it is possible that the srpt_get_send_ioctx() call invoked by 1655 * theory it is possible that the srpt_get_send_ioctx() call invoked by
1583 * srpt_handle_new_iu() fails. This is possible because the req_lim_delta 1656 * srpt_handle_new_iu() fails. This is possible because the req_lim_delta
@@ -1619,7 +1692,8 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
1619} 1692}
1620 1693
1621/** 1694/**
1622 * srpt_create_ch_ib() - Create receive and send completion queues. 1695 * srpt_create_ch_ib - create receive and send completion queues
1696 * @ch: SRPT RDMA channel.
1623 */ 1697 */
1624static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) 1698static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1625{ 1699{
@@ -1627,7 +1701,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1627 struct srpt_port *sport = ch->sport; 1701 struct srpt_port *sport = ch->sport;
1628 struct srpt_device *sdev = sport->sdev; 1702 struct srpt_device *sdev = sport->sdev;
1629 const struct ib_device_attr *attrs = &sdev->device->attrs; 1703 const struct ib_device_attr *attrs = &sdev->device->attrs;
1630 u32 srp_sq_size = sport->port_attrib.srp_sq_size; 1704 int sq_size = sport->port_attrib.srp_sq_size;
1631 int i, ret; 1705 int i, ret;
1632 1706
1633 WARN_ON(ch->rq_size < 1); 1707 WARN_ON(ch->rq_size < 1);
@@ -1638,12 +1712,12 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1638 goto out; 1712 goto out;
1639 1713
1640retry: 1714retry:
1641 ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + srp_sq_size, 1715 ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + sq_size,
1642 0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE); 1716 0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE);
1643 if (IS_ERR(ch->cq)) { 1717 if (IS_ERR(ch->cq)) {
1644 ret = PTR_ERR(ch->cq); 1718 ret = PTR_ERR(ch->cq);
1645 pr_err("failed to create CQ cqe= %d ret= %d\n", 1719 pr_err("failed to create CQ cqe= %d ret= %d\n",
1646 ch->rq_size + srp_sq_size, ret); 1720 ch->rq_size + sq_size, ret);
1647 goto out; 1721 goto out;
1648 } 1722 }
1649 1723
@@ -1661,8 +1735,8 @@ retry:
1661 * both both, as RDMA contexts will also post completions for the 1735 * both both, as RDMA contexts will also post completions for the
1662 * RDMA READ case. 1736 * RDMA READ case.
1663 */ 1737 */
1664 qp_init->cap.max_send_wr = min(srp_sq_size / 2, attrs->max_qp_wr + 0U); 1738 qp_init->cap.max_send_wr = min(sq_size / 2, attrs->max_qp_wr);
1665 qp_init->cap.max_rdma_ctxs = srp_sq_size / 2; 1739 qp_init->cap.max_rdma_ctxs = sq_size / 2;
1666 qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE); 1740 qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE);
1667 qp_init->port_num = ch->sport->port; 1741 qp_init->port_num = ch->sport->port;
1668 if (sdev->use_srq) { 1742 if (sdev->use_srq) {
@@ -1676,8 +1750,8 @@ retry:
1676 if (IS_ERR(ch->qp)) { 1750 if (IS_ERR(ch->qp)) {
1677 ret = PTR_ERR(ch->qp); 1751 ret = PTR_ERR(ch->qp);
1678 if (ret == -ENOMEM) { 1752 if (ret == -ENOMEM) {
1679 srp_sq_size /= 2; 1753 sq_size /= 2;
1680 if (srp_sq_size >= MIN_SRPT_SQ_SIZE) { 1754 if (sq_size >= MIN_SRPT_SQ_SIZE) {
1681 ib_destroy_cq(ch->cq); 1755 ib_destroy_cq(ch->cq);
1682 goto retry; 1756 goto retry;
1683 } 1757 }
@@ -1688,9 +1762,9 @@ retry:
1688 1762
1689 atomic_set(&ch->sq_wr_avail, qp_init->cap.max_send_wr); 1763 atomic_set(&ch->sq_wr_avail, qp_init->cap.max_send_wr);
1690 1764
1691 pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n", 1765 pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d ch= %p\n",
1692 __func__, ch->cq->cqe, qp_init->cap.max_send_sge, 1766 __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1693 qp_init->cap.max_send_wr, ch->cm_id); 1767 qp_init->cap.max_send_wr, ch);
1694 1768
1695 ret = srpt_init_ch_qp(ch, ch->qp); 1769 ret = srpt_init_ch_qp(ch, ch->qp);
1696 if (ret) 1770 if (ret)
@@ -1718,7 +1792,8 @@ static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
1718} 1792}
1719 1793
1720/** 1794/**
1721 * srpt_close_ch() - Close an RDMA channel. 1795 * srpt_close_ch - close a RDMA channel
1796 * @ch: SRPT RDMA channel.
1722 * 1797 *
1723 * Make sure all resources associated with the channel will be deallocated at 1798 * Make sure all resources associated with the channel will be deallocated at
1724 * an appropriate time. 1799 * an appropriate time.
@@ -1743,8 +1818,6 @@ static bool srpt_close_ch(struct srpt_rdma_ch *ch)
1743 pr_err("%s-%d: changing queue pair into error state failed: %d\n", 1818 pr_err("%s-%d: changing queue pair into error state failed: %d\n",
1744 ch->sess_name, ch->qp->qp_num, ret); 1819 ch->sess_name, ch->qp->qp_num, ret);
1745 1820
1746 pr_debug("%s-%d: queued zerolength write\n", ch->sess_name,
1747 ch->qp->qp_num);
1748 ret = srpt_zerolength_write(ch); 1821 ret = srpt_zerolength_write(ch);
1749 if (ret < 0) { 1822 if (ret < 0) {
1750 pr_err("%s-%d: queuing zero-length write failed: %d\n", 1823 pr_err("%s-%d: queuing zero-length write failed: %d\n",
@@ -1776,9 +1849,9 @@ static int srpt_disconnect_ch(struct srpt_rdma_ch *ch)
1776 if (!srpt_set_ch_state(ch, CH_DISCONNECTING)) 1849 if (!srpt_set_ch_state(ch, CH_DISCONNECTING))
1777 return -ENOTCONN; 1850 return -ENOTCONN;
1778 1851
1779 ret = ib_send_cm_dreq(ch->cm_id, NULL, 0); 1852 ret = ib_send_cm_dreq(ch->ib_cm.cm_id, NULL, 0);
1780 if (ret < 0) 1853 if (ret < 0)
1781 ret = ib_send_cm_drep(ch->cm_id, NULL, 0); 1854 ret = ib_send_cm_drep(ch->ib_cm.cm_id, NULL, 0);
1782 1855
1783 if (ret < 0 && srpt_close_ch(ch)) 1856 if (ret < 0 && srpt_close_ch(ch))
1784 ret = 0; 1857 ret = 0;
@@ -1786,83 +1859,135 @@ static int srpt_disconnect_ch(struct srpt_rdma_ch *ch)
1786 return ret; 1859 return ret;
1787} 1860}
1788 1861
1789/* 1862static bool srpt_ch_closed(struct srpt_port *sport, struct srpt_rdma_ch *ch)
1790 * Send DREQ and wait for DREP. Return true if and only if this function
1791 * changed the state of @ch.
1792 */
1793static bool srpt_disconnect_ch_sync(struct srpt_rdma_ch *ch)
1794 __must_hold(&sdev->mutex)
1795{ 1863{
1796 DECLARE_COMPLETION_ONSTACK(release_done); 1864 struct srpt_nexus *nexus;
1797 struct srpt_device *sdev = ch->sport->sdev; 1865 struct srpt_rdma_ch *ch2;
1798 bool wait; 1866 bool res = true;
1867
1868 rcu_read_lock();
1869 list_for_each_entry(nexus, &sport->nexus_list, entry) {
1870 list_for_each_entry(ch2, &nexus->ch_list, list) {
1871 if (ch2 == ch) {
1872 res = false;
1873 goto done;
1874 }
1875 }
1876 }
1877done:
1878 rcu_read_unlock();
1799 1879
1800 lockdep_assert_held(&sdev->mutex); 1880 return res;
1881}
1882
1883/* Send DREQ and wait for DREP. */
1884static void srpt_disconnect_ch_sync(struct srpt_rdma_ch *ch)
1885{
1886 struct srpt_port *sport = ch->sport;
1801 1887
1802 pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num, 1888 pr_debug("ch %s-%d state %d\n", ch->sess_name, ch->qp->qp_num,
1803 ch->state); 1889 ch->state);
1804 1890
1805 WARN_ON(ch->release_done); 1891 mutex_lock(&sport->mutex);
1806 ch->release_done = &release_done;
1807 wait = !list_empty(&ch->list);
1808 srpt_disconnect_ch(ch); 1892 srpt_disconnect_ch(ch);
1809 mutex_unlock(&sdev->mutex); 1893 mutex_unlock(&sport->mutex);
1810 1894
1811 if (!wait) 1895 while (wait_event_timeout(sport->ch_releaseQ, srpt_ch_closed(sport, ch),
1812 goto out; 1896 5 * HZ) == 0)
1813
1814 while (wait_for_completion_timeout(&release_done, 180 * HZ) == 0)
1815 pr_info("%s(%s-%d state %d): still waiting ...\n", __func__, 1897 pr_info("%s(%s-%d state %d): still waiting ...\n", __func__,
1816 ch->sess_name, ch->qp->qp_num, ch->state); 1898 ch->sess_name, ch->qp->qp_num, ch->state);
1817 1899
1818out:
1819 mutex_lock(&sdev->mutex);
1820 return wait;
1821} 1900}
1822 1901
1823static void srpt_set_enabled(struct srpt_port *sport, bool enabled) 1902static void __srpt_close_all_ch(struct srpt_port *sport)
1824 __must_hold(&sdev->mutex)
1825{ 1903{
1826 struct srpt_device *sdev = sport->sdev; 1904 struct srpt_nexus *nexus;
1827 struct srpt_rdma_ch *ch; 1905 struct srpt_rdma_ch *ch;
1828 1906
1829 lockdep_assert_held(&sdev->mutex); 1907 lockdep_assert_held(&sport->mutex);
1830 1908
1831 if (sport->enabled == enabled) 1909 list_for_each_entry(nexus, &sport->nexus_list, entry) {
1832 return; 1910 list_for_each_entry(ch, &nexus->ch_list, list) {
1833 sport->enabled = enabled; 1911 if (srpt_disconnect_ch(ch) >= 0)
1834 if (sport->enabled) 1912 pr_info("Closing channel %s-%d because target %s_%d has been disabled\n",
1835 return; 1913 ch->sess_name, ch->qp->qp_num,
1914 sport->sdev->device->name, sport->port);
1915 srpt_close_ch(ch);
1916 }
1917 }
1918}
1836 1919
1837again: 1920/*
1838 list_for_each_entry(ch, &sdev->rch_list, list) { 1921 * Look up (i_port_id, t_port_id) in sport->nexus_list. Create an entry if
1839 if (ch->sport == sport) { 1922 * it does not yet exist.
1840 pr_info("%s: closing channel %s-%d\n", 1923 */
1841 sdev->device->name, ch->sess_name, 1924static struct srpt_nexus *srpt_get_nexus(struct srpt_port *sport,
1842 ch->qp->qp_num); 1925 const u8 i_port_id[16],
1843 if (srpt_disconnect_ch_sync(ch)) 1926 const u8 t_port_id[16])
1844 goto again; 1927{
1928 struct srpt_nexus *nexus = NULL, *tmp_nexus = NULL, *n;
1929
1930 for (;;) {
1931 mutex_lock(&sport->mutex);
1932 list_for_each_entry(n, &sport->nexus_list, entry) {
1933 if (memcmp(n->i_port_id, i_port_id, 16) == 0 &&
1934 memcmp(n->t_port_id, t_port_id, 16) == 0) {
1935 nexus = n;
1936 break;
1937 }
1845 } 1938 }
1939 if (!nexus && tmp_nexus) {
1940 list_add_tail_rcu(&tmp_nexus->entry,
1941 &sport->nexus_list);
1942 swap(nexus, tmp_nexus);
1943 }
1944 mutex_unlock(&sport->mutex);
1945
1946 if (nexus)
1947 break;
1948 tmp_nexus = kzalloc(sizeof(*nexus), GFP_KERNEL);
1949 if (!tmp_nexus) {
1950 nexus = ERR_PTR(-ENOMEM);
1951 break;
1952 }
1953 INIT_LIST_HEAD(&tmp_nexus->ch_list);
1954 memcpy(tmp_nexus->i_port_id, i_port_id, 16);
1955 memcpy(tmp_nexus->t_port_id, t_port_id, 16);
1846 } 1956 }
1847 1957
1958 kfree(tmp_nexus);
1959
1960 return nexus;
1961}
1962
1963static void srpt_set_enabled(struct srpt_port *sport, bool enabled)
1964 __must_hold(&sport->mutex)
1965{
1966 lockdep_assert_held(&sport->mutex);
1967
1968 if (sport->enabled == enabled)
1969 return;
1970 sport->enabled = enabled;
1971 if (!enabled)
1972 __srpt_close_all_ch(sport);
1848} 1973}
1849 1974
1850static void srpt_free_ch(struct kref *kref) 1975static void srpt_free_ch(struct kref *kref)
1851{ 1976{
1852 struct srpt_rdma_ch *ch = container_of(kref, struct srpt_rdma_ch, kref); 1977 struct srpt_rdma_ch *ch = container_of(kref, struct srpt_rdma_ch, kref);
1853 1978
1854 kfree(ch); 1979 kfree_rcu(ch, rcu);
1855} 1980}
1856 1981
1857static void srpt_release_channel_work(struct work_struct *w) 1982static void srpt_release_channel_work(struct work_struct *w)
1858{ 1983{
1859 struct srpt_rdma_ch *ch; 1984 struct srpt_rdma_ch *ch;
1860 struct srpt_device *sdev; 1985 struct srpt_device *sdev;
1986 struct srpt_port *sport;
1861 struct se_session *se_sess; 1987 struct se_session *se_sess;
1862 1988
1863 ch = container_of(w, struct srpt_rdma_ch, release_work); 1989 ch = container_of(w, struct srpt_rdma_ch, release_work);
1864 pr_debug("%s: %s-%d; release_done = %p\n", __func__, ch->sess_name, 1990 pr_debug("%s-%d\n", ch->sess_name, ch->qp->qp_num);
1865 ch->qp->qp_num, ch->release_done);
1866 1991
1867 sdev = ch->sport->sdev; 1992 sdev = ch->sport->sdev;
1868 BUG_ON(!sdev); 1993 BUG_ON(!sdev);
@@ -1877,169 +2002,141 @@ static void srpt_release_channel_work(struct work_struct *w)
1877 transport_deregister_session(se_sess); 2002 transport_deregister_session(se_sess);
1878 ch->sess = NULL; 2003 ch->sess = NULL;
1879 2004
1880 ib_destroy_cm_id(ch->cm_id); 2005 ib_destroy_cm_id(ch->ib_cm.cm_id);
1881 2006
1882 srpt_destroy_ch_ib(ch); 2007 srpt_destroy_ch_ib(ch);
1883 2008
1884 srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, 2009 srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
1885 ch->sport->sdev, ch->rq_size, 2010 ch->sport->sdev, ch->rq_size,
1886 ch->rsp_size, DMA_TO_DEVICE); 2011 ch->max_rsp_size, DMA_TO_DEVICE);
1887 2012
1888 srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring, 2013 srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring,
1889 sdev, ch->rq_size, 2014 sdev, ch->rq_size,
1890 srp_max_req_size, DMA_FROM_DEVICE); 2015 srp_max_req_size, DMA_FROM_DEVICE);
1891 2016
1892 mutex_lock(&sdev->mutex); 2017 sport = ch->sport;
1893 list_del_init(&ch->list); 2018 mutex_lock(&sport->mutex);
1894 if (ch->release_done) 2019 list_del_rcu(&ch->list);
1895 complete(ch->release_done); 2020 mutex_unlock(&sport->mutex);
1896 mutex_unlock(&sdev->mutex);
1897 2021
1898 wake_up(&sdev->ch_releaseQ); 2022 wake_up(&sport->ch_releaseQ);
1899 2023
1900 kref_put(&ch->kref, srpt_free_ch); 2024 kref_put(&ch->kref, srpt_free_ch);
1901} 2025}
1902 2026
1903/** 2027/**
1904 * srpt_cm_req_recv() - Process the event IB_CM_REQ_RECEIVED. 2028 * srpt_cm_req_recv - process the event IB_CM_REQ_RECEIVED
2029 * @cm_id: IB/CM connection identifier.
2030 * @port_num: Port through which the IB/CM REQ message was received.
2031 * @pkey: P_Key of the incoming connection.
2032 * @req: SRP login request.
2033 * @src_addr: GID of the port that submitted the login request.
1905 * 2034 *
1906 * Ownership of the cm_id is transferred to the target session if this 2035 * Ownership of the cm_id is transferred to the target session if this
1907 * functions returns zero. Otherwise the caller remains the owner of cm_id. 2036 * functions returns zero. Otherwise the caller remains the owner of cm_id.
1908 */ 2037 */
1909static int srpt_cm_req_recv(struct ib_cm_id *cm_id, 2038static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1910 struct ib_cm_req_event_param *param, 2039 u8 port_num, __be16 pkey,
1911 void *private_data) 2040 const struct srp_login_req *req,
2041 const char *src_addr)
1912{ 2042{
1913 struct srpt_device *sdev = cm_id->context; 2043 struct srpt_device *sdev = cm_id->context;
1914 struct srpt_port *sport = &sdev->port[param->port - 1]; 2044 struct srpt_port *sport = &sdev->port[port_num - 1];
1915 struct srp_login_req *req; 2045 struct srpt_nexus *nexus;
1916 struct srp_login_rsp *rsp; 2046 struct srp_login_rsp *rsp = NULL;
1917 struct srp_login_rej *rej; 2047 struct srp_login_rej *rej = NULL;
1918 struct ib_cm_rep_param *rep_param; 2048 struct ib_cm_rep_param *rep_param = NULL;
1919 struct srpt_rdma_ch *ch, *tmp_ch; 2049 struct srpt_rdma_ch *ch;
1920 __be16 *guid; 2050 char i_port_id[36];
1921 u32 it_iu_len; 2051 u32 it_iu_len;
1922 int i, ret = 0; 2052 int i, ret;
1923 2053
1924 WARN_ON_ONCE(irqs_disabled()); 2054 WARN_ON_ONCE(irqs_disabled());
1925 2055
1926 if (WARN_ON(!sdev || !private_data)) 2056 if (WARN_ON(!sdev || !req))
1927 return -EINVAL; 2057 return -EINVAL;
1928 2058
1929 req = (struct srp_login_req *)private_data;
1930
1931 it_iu_len = be32_to_cpu(req->req_it_iu_len); 2059 it_iu_len = be32_to_cpu(req->req_it_iu_len);
1932 2060
1933 pr_info("Received SRP_LOGIN_REQ with i_port_id 0x%llx:0x%llx," 2061 pr_info("Received SRP_LOGIN_REQ with i_port_id %pI6, t_port_id %pI6 and it_iu_len %d on port %d (guid=%pI6); pkey %#04x\n",
1934 " t_port_id 0x%llx:0x%llx and it_iu_len %d on port %d" 2062 req->initiator_port_id, req->target_port_id, it_iu_len,
1935 " (guid=0x%llx:0x%llx)\n", 2063 port_num, &sport->gid, be16_to_cpu(pkey));
1936 be64_to_cpu(*(__be64 *)&req->initiator_port_id[0]),
1937 be64_to_cpu(*(__be64 *)&req->initiator_port_id[8]),
1938 be64_to_cpu(*(__be64 *)&req->target_port_id[0]),
1939 be64_to_cpu(*(__be64 *)&req->target_port_id[8]),
1940 it_iu_len,
1941 param->port,
1942 be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]),
1943 be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8]));
1944 2064
2065 nexus = srpt_get_nexus(sport, req->initiator_port_id,
2066 req->target_port_id);
2067 if (IS_ERR(nexus)) {
2068 ret = PTR_ERR(nexus);
2069 goto out;
2070 }
2071
2072 ret = -ENOMEM;
1945 rsp = kzalloc(sizeof(*rsp), GFP_KERNEL); 2073 rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
1946 rej = kzalloc(sizeof(*rej), GFP_KERNEL); 2074 rej = kzalloc(sizeof(*rej), GFP_KERNEL);
1947 rep_param = kzalloc(sizeof(*rep_param), GFP_KERNEL); 2075 rep_param = kzalloc(sizeof(*rep_param), GFP_KERNEL);
1948 2076 if (!rsp || !rej || !rep_param)
1949 if (!rsp || !rej || !rep_param) {
1950 ret = -ENOMEM;
1951 goto out; 2077 goto out;
1952 }
1953 2078
2079 ret = -EINVAL;
1954 if (it_iu_len > srp_max_req_size || it_iu_len < 64) { 2080 if (it_iu_len > srp_max_req_size || it_iu_len < 64) {
1955 rej->reason = cpu_to_be32( 2081 rej->reason = cpu_to_be32(
1956 SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE); 2082 SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1957 ret = -EINVAL; 2083 pr_err("rejected SRP_LOGIN_REQ because its length (%d bytes) is out of range (%d .. %d)\n",
1958 pr_err("rejected SRP_LOGIN_REQ because its"
1959 " length (%d bytes) is out of range (%d .. %d)\n",
1960 it_iu_len, 64, srp_max_req_size); 2084 it_iu_len, 64, srp_max_req_size);
1961 goto reject; 2085 goto reject;
1962 } 2086 }
1963 2087
1964 if (!sport->enabled) { 2088 if (!sport->enabled) {
1965 rej->reason = cpu_to_be32( 2089 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1966 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); 2090 pr_info("rejected SRP_LOGIN_REQ because target port %s_%d has not yet been enabled\n",
1967 ret = -EINVAL; 2091 sport->sdev->device->name, port_num);
1968 pr_err("rejected SRP_LOGIN_REQ because the target port"
1969 " has not yet been enabled\n");
1970 goto reject; 2092 goto reject;
1971 } 2093 }
1972 2094
1973 if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) {
1974 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1975
1976 mutex_lock(&sdev->mutex);
1977
1978 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1979 if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1980 && !memcmp(ch->t_port_id, req->target_port_id, 16)
1981 && param->port == ch->sport->port
1982 && param->listen_id == ch->sport->sdev->cm_id
1983 && ch->cm_id) {
1984 if (srpt_disconnect_ch(ch) < 0)
1985 continue;
1986 pr_info("Relogin - closed existing channel %s\n",
1987 ch->sess_name);
1988 rsp->rsp_flags =
1989 SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1990 }
1991 }
1992
1993 mutex_unlock(&sdev->mutex);
1994
1995 } else
1996 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1997
1998 if (*(__be64 *)req->target_port_id != cpu_to_be64(srpt_service_guid) 2095 if (*(__be64 *)req->target_port_id != cpu_to_be64(srpt_service_guid)
1999 || *(__be64 *)(req->target_port_id + 8) != 2096 || *(__be64 *)(req->target_port_id + 8) !=
2000 cpu_to_be64(srpt_service_guid)) { 2097 cpu_to_be64(srpt_service_guid)) {
2001 rej->reason = cpu_to_be32( 2098 rej->reason = cpu_to_be32(
2002 SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL); 2099 SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
2003 ret = -ENOMEM; 2100 pr_err("rejected SRP_LOGIN_REQ because it has an invalid target port identifier.\n");
2004 pr_err("rejected SRP_LOGIN_REQ because it"
2005 " has an invalid target port identifier.\n");
2006 goto reject; 2101 goto reject;
2007 } 2102 }
2008 2103
2104 ret = -ENOMEM;
2009 ch = kzalloc(sizeof(*ch), GFP_KERNEL); 2105 ch = kzalloc(sizeof(*ch), GFP_KERNEL);
2010 if (!ch) { 2106 if (!ch) {
2011 rej->reason = cpu_to_be32( 2107 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2012 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); 2108 pr_err("rejected SRP_LOGIN_REQ because out of memory.\n");
2013 pr_err("rejected SRP_LOGIN_REQ because no memory.\n");
2014 ret = -ENOMEM;
2015 goto reject; 2109 goto reject;
2016 } 2110 }
2017 2111
2018 kref_init(&ch->kref); 2112 kref_init(&ch->kref);
2113 ch->pkey = be16_to_cpu(pkey);
2114 ch->nexus = nexus;
2019 ch->zw_cqe.done = srpt_zerolength_write_done; 2115 ch->zw_cqe.done = srpt_zerolength_write_done;
2020 INIT_WORK(&ch->release_work, srpt_release_channel_work); 2116 INIT_WORK(&ch->release_work, srpt_release_channel_work);
2021 memcpy(ch->i_port_id, req->initiator_port_id, 16); 2117 ch->sport = sport;
2022 memcpy(ch->t_port_id, req->target_port_id, 16); 2118 ch->ib_cm.cm_id = cm_id;
2023 ch->sport = &sdev->port[param->port - 1];
2024 ch->cm_id = cm_id;
2025 cm_id->context = ch; 2119 cm_id->context = ch;
2026 /* 2120 /*
2027 * ch->rq_size should be at least as large as the initiator queue 2121 * ch->rq_size should be at least as large as the initiator queue
2028 * depth to avoid that the initiator driver has to report QUEUE_FULL 2122 * depth to avoid that the initiator driver has to report QUEUE_FULL
2029 * to the SCSI mid-layer. 2123 * to the SCSI mid-layer.
2030 */ 2124 */
2031 ch->rq_size = min(SRPT_RQ_SIZE, sdev->device->attrs.max_qp_wr); 2125 ch->rq_size = min(MAX_SRPT_RQ_SIZE, sdev->device->attrs.max_qp_wr);
2032 spin_lock_init(&ch->spinlock); 2126 spin_lock_init(&ch->spinlock);
2033 ch->state = CH_CONNECTING; 2127 ch->state = CH_CONNECTING;
2034 INIT_LIST_HEAD(&ch->cmd_wait_list); 2128 INIT_LIST_HEAD(&ch->cmd_wait_list);
2035 ch->rsp_size = ch->sport->port_attrib.srp_max_rsp_size; 2129 ch->max_rsp_size = ch->sport->port_attrib.srp_max_rsp_size;
2036 2130
2037 ch->ioctx_ring = (struct srpt_send_ioctx **) 2131 ch->ioctx_ring = (struct srpt_send_ioctx **)
2038 srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size, 2132 srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size,
2039 sizeof(*ch->ioctx_ring[0]), 2133 sizeof(*ch->ioctx_ring[0]),
2040 ch->rsp_size, DMA_TO_DEVICE); 2134 ch->max_rsp_size, DMA_TO_DEVICE);
2041 if (!ch->ioctx_ring) 2135 if (!ch->ioctx_ring) {
2136 pr_err("rejected SRP_LOGIN_REQ because creating a new QP SQ ring failed.\n");
2137 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2042 goto free_ch; 2138 goto free_ch;
2139 }
2043 2140
2044 INIT_LIST_HEAD(&ch->free_list); 2141 INIT_LIST_HEAD(&ch->free_list);
2045 for (i = 0; i < ch->rq_size; i++) { 2142 for (i = 0; i < ch->rq_size; i++) {
@@ -2058,59 +2155,88 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2058 cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); 2155 cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2059 goto free_ring; 2156 goto free_ring;
2060 } 2157 }
2158 for (i = 0; i < ch->rq_size; i++)
2159 INIT_LIST_HEAD(&ch->ioctx_recv_ring[i]->wait_list);
2061 } 2160 }
2062 2161
2063 ret = srpt_create_ch_ib(ch); 2162 ret = srpt_create_ch_ib(ch);
2064 if (ret) { 2163 if (ret) {
2065 rej->reason = cpu_to_be32(
2066 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2067 pr_err("rejected SRP_LOGIN_REQ because creating"
2068 " a new RDMA channel failed.\n");
2069 goto free_recv_ring;
2070 }
2071
2072 ret = srpt_ch_qp_rtr(ch, ch->qp);
2073 if (ret) {
2074 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); 2164 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2075 pr_err("rejected SRP_LOGIN_REQ because enabling" 2165 pr_err("rejected SRP_LOGIN_REQ because creating a new RDMA channel failed.\n");
2076 " RTR failed (error code = %d)\n", ret); 2166 goto free_recv_ring;
2077 goto destroy_ib;
2078 } 2167 }
2079 2168
2080 guid = (__be16 *)&param->primary_path->dgid.global.interface_id; 2169 strlcpy(ch->sess_name, src_addr, sizeof(ch->sess_name));
2081 snprintf(ch->ini_guid, sizeof(ch->ini_guid), "%04x:%04x:%04x:%04x", 2170 snprintf(i_port_id, sizeof(i_port_id), "0x%016llx%016llx",
2082 be16_to_cpu(guid[0]), be16_to_cpu(guid[1]), 2171 be64_to_cpu(*(__be64 *)nexus->i_port_id),
2083 be16_to_cpu(guid[2]), be16_to_cpu(guid[3])); 2172 be64_to_cpu(*(__be64 *)(nexus->i_port_id + 8)));
2084 snprintf(ch->sess_name, sizeof(ch->sess_name), "0x%016llx%016llx",
2085 be64_to_cpu(*(__be64 *)ch->i_port_id),
2086 be64_to_cpu(*(__be64 *)(ch->i_port_id + 8)));
2087 2173
2088 pr_debug("registering session %s\n", ch->sess_name); 2174 pr_debug("registering session %s\n", ch->sess_name);
2089 2175
2090 if (sport->port_guid_tpg.se_tpg_wwn) 2176 if (sport->port_guid_tpg.se_tpg_wwn)
2091 ch->sess = target_alloc_session(&sport->port_guid_tpg, 0, 0, 2177 ch->sess = target_alloc_session(&sport->port_guid_tpg, 0, 0,
2092 TARGET_PROT_NORMAL, 2178 TARGET_PROT_NORMAL,
2093 ch->ini_guid, ch, NULL); 2179 ch->sess_name, ch, NULL);
2094 if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess)) 2180 if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess))
2095 ch->sess = target_alloc_session(&sport->port_gid_tpg, 0, 0, 2181 ch->sess = target_alloc_session(&sport->port_gid_tpg, 0, 0,
2096 TARGET_PROT_NORMAL, ch->sess_name, ch, 2182 TARGET_PROT_NORMAL, i_port_id, ch,
2097 NULL); 2183 NULL);
2098 /* Retry without leading "0x" */ 2184 /* Retry without leading "0x" */
2099 if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess)) 2185 if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess))
2100 ch->sess = target_alloc_session(&sport->port_gid_tpg, 0, 0, 2186 ch->sess = target_alloc_session(&sport->port_gid_tpg, 0, 0,
2101 TARGET_PROT_NORMAL, 2187 TARGET_PROT_NORMAL,
2102 ch->sess_name + 2, ch, NULL); 2188 i_port_id + 2, ch, NULL);
2103 if (IS_ERR_OR_NULL(ch->sess)) { 2189 if (IS_ERR_OR_NULL(ch->sess)) {
2104 pr_info("Rejected login because no ACL has been configured yet for initiator %s.\n", 2190 ret = PTR_ERR(ch->sess);
2105 ch->sess_name); 2191 pr_info("Rejected login for initiator %s: ret = %d.\n",
2106 rej->reason = cpu_to_be32((PTR_ERR(ch->sess) == -ENOMEM) ? 2192 ch->sess_name, ret);
2193 rej->reason = cpu_to_be32(ret == -ENOMEM ?
2107 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES : 2194 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES :
2108 SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); 2195 SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
2196 goto reject;
2197 }
2198
2199 mutex_lock(&sport->mutex);
2200
2201 if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) {
2202 struct srpt_rdma_ch *ch2;
2203
2204 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
2205
2206 list_for_each_entry(ch2, &nexus->ch_list, list) {
2207 if (srpt_disconnect_ch(ch2) < 0)
2208 continue;
2209 pr_info("Relogin - closed existing channel %s\n",
2210 ch2->sess_name);
2211 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
2212 }
2213 } else {
2214 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
2215 }
2216
2217 list_add_tail_rcu(&ch->list, &nexus->ch_list);
2218
2219 if (!sport->enabled) {
2220 rej->reason = cpu_to_be32(
2221 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2222 pr_info("rejected SRP_LOGIN_REQ because target %s_%d is not enabled\n",
2223 sdev->device->name, port_num);
2224 mutex_unlock(&sport->mutex);
2225 goto reject;
2226 }
2227
2228 mutex_unlock(&sport->mutex);
2229
2230 ret = srpt_ch_qp_rtr(ch, ch->qp);
2231 if (ret) {
2232 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2233 pr_err("rejected SRP_LOGIN_REQ because enabling RTR failed (error code = %d)\n",
2234 ret);
2109 goto destroy_ib; 2235 goto destroy_ib;
2110 } 2236 }
2111 2237
2112 pr_debug("Establish connection sess=%p name=%s cm_id=%p\n", ch->sess, 2238 pr_debug("Establish connection sess=%p name=%s ch=%p\n", ch->sess,
2113 ch->sess_name, ch->cm_id); 2239 ch->sess_name, ch);
2114 2240
2115 /* create srp_login_response */ 2241 /* create srp_login_response */
2116 rsp->opcode = SRP_LOGIN_RSP; 2242 rsp->opcode = SRP_LOGIN_RSP;
@@ -2118,8 +2244,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2118 rsp->max_it_iu_len = req->req_it_iu_len; 2244 rsp->max_it_iu_len = req->req_it_iu_len;
2119 rsp->max_ti_iu_len = req->req_it_iu_len; 2245 rsp->max_ti_iu_len = req->req_it_iu_len;
2120 ch->max_ti_iu_len = it_iu_len; 2246 ch->max_ti_iu_len = it_iu_len;
2121 rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT 2247 rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
2122 | SRP_BUF_FORMAT_INDIRECT); 2248 SRP_BUF_FORMAT_INDIRECT);
2123 rsp->req_lim_delta = cpu_to_be32(ch->rq_size); 2249 rsp->req_lim_delta = cpu_to_be32(ch->rq_size);
2124 atomic_set(&ch->req_lim, ch->rq_size); 2250 atomic_set(&ch->req_lim, ch->rq_size);
2125 atomic_set(&ch->req_lim_delta, 0); 2251 atomic_set(&ch->req_lim_delta, 0);
@@ -2135,25 +2261,31 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2135 rep_param->responder_resources = 4; 2261 rep_param->responder_resources = 4;
2136 rep_param->initiator_depth = 4; 2262 rep_param->initiator_depth = 4;
2137 2263
2138 ret = ib_send_cm_rep(cm_id, rep_param); 2264 /*
2139 if (ret) { 2265 * Hold the sport mutex while accepting a connection to avoid that
2140 pr_err("sending SRP_LOGIN_REQ response failed" 2266 * srpt_disconnect_ch() is invoked concurrently with this code.
2141 " (error code = %d)\n", ret); 2267 */
2142 goto release_channel; 2268 mutex_lock(&sport->mutex);
2143 } 2269 if (sport->enabled && ch->state == CH_CONNECTING)
2270 ret = ib_send_cm_rep(cm_id, rep_param);
2271 else
2272 ret = -EINVAL;
2273 mutex_unlock(&sport->mutex);
2144 2274
2145 mutex_lock(&sdev->mutex); 2275 switch (ret) {
2146 list_add_tail(&ch->list, &sdev->rch_list); 2276 case 0:
2147 mutex_unlock(&sdev->mutex); 2277 break;
2278 case -EINVAL:
2279 goto reject;
2280 default:
2281 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2282 pr_err("sending SRP_LOGIN_REQ response failed (error code = %d)\n",
2283 ret);
2284 goto reject;
2285 }
2148 2286
2149 goto out; 2287 goto out;
2150 2288
2151release_channel:
2152 srpt_disconnect_ch(ch);
2153 transport_deregister_session_configfs(ch->sess);
2154 transport_deregister_session(ch->sess);
2155 ch->sess = NULL;
2156
2157destroy_ib: 2289destroy_ib:
2158 srpt_destroy_ch_ib(ch); 2290 srpt_destroy_ch_ib(ch);
2159 2291
@@ -2165,15 +2297,20 @@ free_recv_ring:
2165free_ring: 2297free_ring:
2166 srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, 2298 srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
2167 ch->sport->sdev, ch->rq_size, 2299 ch->sport->sdev, ch->rq_size,
2168 ch->rsp_size, DMA_TO_DEVICE); 2300 ch->max_rsp_size, DMA_TO_DEVICE);
2169free_ch: 2301free_ch:
2302 cm_id->context = NULL;
2170 kfree(ch); 2303 kfree(ch);
2304 ch = NULL;
2305
2306 WARN_ON_ONCE(ret == 0);
2171 2307
2172reject: 2308reject:
2309 pr_info("Rejecting login with reason %#x\n", be32_to_cpu(rej->reason));
2173 rej->opcode = SRP_LOGIN_REJ; 2310 rej->opcode = SRP_LOGIN_REJ;
2174 rej->tag = req->tag; 2311 rej->tag = req->tag;
2175 rej->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT 2312 rej->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
2176 | SRP_BUF_FORMAT_INDIRECT); 2313 SRP_BUF_FORMAT_INDIRECT);
2177 2314
2178 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, 2315 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
2179 (void *)rej, sizeof(*rej)); 2316 (void *)rej, sizeof(*rej));
@@ -2186,6 +2323,19 @@ out:
2186 return ret; 2323 return ret;
2187} 2324}
2188 2325
2326static int srpt_ib_cm_req_recv(struct ib_cm_id *cm_id,
2327 struct ib_cm_req_event_param *param,
2328 void *private_data)
2329{
2330 char sguid[40];
2331
2332 srpt_format_guid(sguid, sizeof(sguid),
2333 &param->primary_path->dgid.global.interface_id);
2334
2335 return srpt_cm_req_recv(cm_id, param->port, param->primary_path->pkey,
2336 private_data, sguid);
2337}
2338
2189static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch, 2339static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch,
2190 enum ib_cm_rej_reason reason, 2340 enum ib_cm_rej_reason reason,
2191 const u8 *private_data, 2341 const u8 *private_data,
@@ -2206,7 +2356,8 @@ static void srpt_cm_rej_recv(struct srpt_rdma_ch *ch,
2206} 2356}
2207 2357
2208/** 2358/**
2209 * srpt_cm_rtu_recv() - Process an IB_CM_RTU_RECEIVED or USER_ESTABLISHED event. 2359 * srpt_cm_rtu_recv - process an IB_CM_RTU_RECEIVED or USER_ESTABLISHED event
2360 * @ch: SRPT RDMA channel.
2210 * 2361 *
2211 * An IB_CM_RTU_RECEIVED message indicates that the connection is established 2362 * An IB_CM_RTU_RECEIVED message indicates that the connection is established
2212 * and that the recipient may begin transmitting (RTU = ready to use). 2363 * and that the recipient may begin transmitting (RTU = ready to use).
@@ -2215,21 +2366,34 @@ static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch)
2215{ 2366{
2216 int ret; 2367 int ret;
2217 2368
2218 if (srpt_set_ch_state(ch, CH_LIVE)) { 2369 ret = srpt_ch_qp_rts(ch, ch->qp);
2219 ret = srpt_ch_qp_rts(ch, ch->qp); 2370 if (ret < 0) {
2371 pr_err("%s-%d: QP transition to RTS failed\n", ch->sess_name,
2372 ch->qp->qp_num);
2373 srpt_close_ch(ch);
2374 return;
2375 }
2220 2376
2221 if (ret == 0) { 2377 /*
2222 /* Trigger wait list processing. */ 2378 * Note: calling srpt_close_ch() if the transition to the LIVE state
2223 ret = srpt_zerolength_write(ch); 2379 * fails is not necessary since that means that that function has
2224 WARN_ONCE(ret < 0, "%d\n", ret); 2380 * already been invoked from another thread.
2225 } else { 2381 */
2226 srpt_close_ch(ch); 2382 if (!srpt_set_ch_state(ch, CH_LIVE)) {
2227 } 2383 pr_err("%s-%d: channel transition to LIVE state failed\n",
2384 ch->sess_name, ch->qp->qp_num);
2385 return;
2228 } 2386 }
2387
2388 /* Trigger wait list processing. */
2389 ret = srpt_zerolength_write(ch);
2390 WARN_ONCE(ret < 0, "%d\n", ret);
2229} 2391}
2230 2392
2231/** 2393/**
2232 * srpt_cm_handler() - IB connection manager callback function. 2394 * srpt_cm_handler - IB connection manager callback function
2395 * @cm_id: IB/CM connection identifier.
2396 * @event: IB/CM event.
2233 * 2397 *
2234 * A non-zero return value will cause the caller destroy the CM ID. 2398 * A non-zero return value will cause the caller destroy the CM ID.
2235 * 2399 *
@@ -2246,8 +2410,8 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2246 ret = 0; 2410 ret = 0;
2247 switch (event->event) { 2411 switch (event->event) {
2248 case IB_CM_REQ_RECEIVED: 2412 case IB_CM_REQ_RECEIVED:
2249 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd, 2413 ret = srpt_ib_cm_req_recv(cm_id, &event->param.req_rcvd,
2250 event->private_data); 2414 event->private_data);
2251 break; 2415 break;
2252 case IB_CM_REJ_RECEIVED: 2416 case IB_CM_REJ_RECEIVED:
2253 srpt_cm_rej_recv(ch, event->param.rej_rcvd.reason, 2417 srpt_cm_rej_recv(ch, event->param.rej_rcvd.reason,
@@ -2294,11 +2458,11 @@ static int srpt_write_pending_status(struct se_cmd *se_cmd)
2294 struct srpt_send_ioctx *ioctx; 2458 struct srpt_send_ioctx *ioctx;
2295 2459
2296 ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); 2460 ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd);
2297 return srpt_get_cmd_state(ioctx) == SRPT_STATE_NEED_DATA; 2461 return ioctx->state == SRPT_STATE_NEED_DATA;
2298} 2462}
2299 2463
2300/* 2464/*
2301 * srpt_write_pending() - Start data transfer from initiator to target (write). 2465 * srpt_write_pending - Start data transfer from initiator to target (write).
2302 */ 2466 */
2303static int srpt_write_pending(struct se_cmd *se_cmd) 2467static int srpt_write_pending(struct se_cmd *se_cmd)
2304{ 2468{
@@ -2355,7 +2519,8 @@ static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status)
2355} 2519}
2356 2520
2357/** 2521/**
2358 * srpt_queue_response() - Transmits the response to a SCSI command. 2522 * srpt_queue_response - transmit the response to a SCSI command
2523 * @cmd: SCSI target command.
2359 * 2524 *
2360 * Callback function called by the TCM core. Must not block since it can be 2525 * Callback function called by the TCM core. Must not block since it can be
2361 * invoked on the context of the IB completion handler. 2526 * invoked on the context of the IB completion handler.
@@ -2369,13 +2534,11 @@ static void srpt_queue_response(struct se_cmd *cmd)
2369 struct ib_send_wr send_wr, *first_wr = &send_wr, *bad_wr; 2534 struct ib_send_wr send_wr, *first_wr = &send_wr, *bad_wr;
2370 struct ib_sge sge; 2535 struct ib_sge sge;
2371 enum srpt_command_state state; 2536 enum srpt_command_state state;
2372 unsigned long flags;
2373 int resp_len, ret, i; 2537 int resp_len, ret, i;
2374 u8 srp_tm_status; 2538 u8 srp_tm_status;
2375 2539
2376 BUG_ON(!ch); 2540 BUG_ON(!ch);
2377 2541
2378 spin_lock_irqsave(&ioctx->spinlock, flags);
2379 state = ioctx->state; 2542 state = ioctx->state;
2380 switch (state) { 2543 switch (state) {
2381 case SRPT_STATE_NEW: 2544 case SRPT_STATE_NEW:
@@ -2390,7 +2553,6 @@ static void srpt_queue_response(struct se_cmd *cmd)
2390 ch, ioctx->ioctx.index, ioctx->state); 2553 ch, ioctx->ioctx.index, ioctx->state);
2391 break; 2554 break;
2392 } 2555 }
2393 spin_unlock_irqrestore(&ioctx->spinlock, flags);
2394 2556
2395 if (unlikely(WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) 2557 if (unlikely(WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT)))
2396 return; 2558 return;
@@ -2494,26 +2656,56 @@ static void srpt_refresh_port_work(struct work_struct *work)
2494 srpt_refresh_port(sport); 2656 srpt_refresh_port(sport);
2495} 2657}
2496 2658
2659static bool srpt_ch_list_empty(struct srpt_port *sport)
2660{
2661 struct srpt_nexus *nexus;
2662 bool res = true;
2663
2664 rcu_read_lock();
2665 list_for_each_entry(nexus, &sport->nexus_list, entry)
2666 if (!list_empty(&nexus->ch_list))
2667 res = false;
2668 rcu_read_unlock();
2669
2670 return res;
2671}
2672
2497/** 2673/**
2498 * srpt_release_sdev() - Free the channel resources associated with a target. 2674 * srpt_release_sport - disable login and wait for associated channels
2675 * @sport: SRPT HCA port.
2499 */ 2676 */
2500static int srpt_release_sdev(struct srpt_device *sdev) 2677static int srpt_release_sport(struct srpt_port *sport)
2501{ 2678{
2502 int i, res; 2679 struct srpt_nexus *nexus, *next_n;
2680 struct srpt_rdma_ch *ch;
2503 2681
2504 WARN_ON_ONCE(irqs_disabled()); 2682 WARN_ON_ONCE(irqs_disabled());
2505 2683
2506 BUG_ON(!sdev); 2684 mutex_lock(&sport->mutex);
2507 2685 srpt_set_enabled(sport, false);
2508 mutex_lock(&sdev->mutex); 2686 mutex_unlock(&sport->mutex);
2509 for (i = 0; i < ARRAY_SIZE(sdev->port); i++) 2687
2510 srpt_set_enabled(&sdev->port[i], false); 2688 while (wait_event_timeout(sport->ch_releaseQ,
2511 mutex_unlock(&sdev->mutex); 2689 srpt_ch_list_empty(sport), 5 * HZ) <= 0) {
2690 pr_info("%s_%d: waiting for session unregistration ...\n",
2691 sport->sdev->device->name, sport->port);
2692 rcu_read_lock();
2693 list_for_each_entry(nexus, &sport->nexus_list, entry) {
2694 list_for_each_entry(ch, &nexus->ch_list, list) {
2695 pr_info("%s-%d: state %s\n",
2696 ch->sess_name, ch->qp->qp_num,
2697 get_ch_state_name(ch->state));
2698 }
2699 }
2700 rcu_read_unlock();
2701 }
2512 2702
2513 res = wait_event_interruptible(sdev->ch_releaseQ, 2703 mutex_lock(&sport->mutex);
2514 list_empty_careful(&sdev->rch_list)); 2704 list_for_each_entry_safe(nexus, next_n, &sport->nexus_list, entry) {
2515 if (res) 2705 list_del(&nexus->entry);
2516 pr_err("%s: interrupted.\n", __func__); 2706 kfree_rcu(nexus, rcu);
2707 }
2708 mutex_unlock(&sport->mutex);
2517 2709
2518 return 0; 2710 return 0;
2519} 2711}
@@ -2600,8 +2792,10 @@ static int srpt_alloc_srq(struct srpt_device *sdev)
2600 sdev->use_srq = true; 2792 sdev->use_srq = true;
2601 sdev->srq = srq; 2793 sdev->srq = srq;
2602 2794
2603 for (i = 0; i < sdev->srq_size; ++i) 2795 for (i = 0; i < sdev->srq_size; ++i) {
2796 INIT_LIST_HEAD(&sdev->ioctx_ring[i]->wait_list);
2604 srpt_post_recv(sdev, NULL, sdev->ioctx_ring[i]); 2797 srpt_post_recv(sdev, NULL, sdev->ioctx_ring[i]);
2798 }
2605 2799
2606 return 0; 2800 return 0;
2607} 2801}
@@ -2623,7 +2817,8 @@ static int srpt_use_srq(struct srpt_device *sdev, bool use_srq)
2623} 2817}
2624 2818
2625/** 2819/**
2626 * srpt_add_one() - Infiniband device addition callback function. 2820 * srpt_add_one - InfiniBand device addition callback function
2821 * @device: Describes a HCA.
2627 */ 2822 */
2628static void srpt_add_one(struct ib_device *device) 2823static void srpt_add_one(struct ib_device *device)
2629{ 2824{
@@ -2638,9 +2833,7 @@ static void srpt_add_one(struct ib_device *device)
2638 goto err; 2833 goto err;
2639 2834
2640 sdev->device = device; 2835 sdev->device = device;
2641 INIT_LIST_HEAD(&sdev->rch_list); 2836 mutex_init(&sdev->sdev_mutex);
2642 init_waitqueue_head(&sdev->ch_releaseQ);
2643 mutex_init(&sdev->mutex);
2644 2837
2645 sdev->pd = ib_alloc_pd(device, 0); 2838 sdev->pd = ib_alloc_pd(device, 0);
2646 if (IS_ERR(sdev->pd)) 2839 if (IS_ERR(sdev->pd))
@@ -2681,6 +2874,9 @@ static void srpt_add_one(struct ib_device *device)
2681 2874
2682 for (i = 1; i <= sdev->device->phys_port_cnt; i++) { 2875 for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
2683 sport = &sdev->port[i - 1]; 2876 sport = &sdev->port[i - 1];
2877 INIT_LIST_HEAD(&sport->nexus_list);
2878 init_waitqueue_head(&sport->ch_releaseQ);
2879 mutex_init(&sport->mutex);
2684 sport->sdev = sdev; 2880 sport->sdev = sdev;
2685 sport->port = i; 2881 sport->port = i;
2686 sport->port_attrib.srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE; 2882 sport->port_attrib.srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE;
@@ -2721,7 +2917,9 @@ err:
2721} 2917}
2722 2918
2723/** 2919/**
2724 * srpt_remove_one() - InfiniBand device removal callback function. 2920 * srpt_remove_one - InfiniBand device removal callback function
2921 * @device: Describes a HCA.
2922 * @client_data: The value passed as the third argument to ib_set_client_data().
2725 */ 2923 */
2726static void srpt_remove_one(struct ib_device *device, void *client_data) 2924static void srpt_remove_one(struct ib_device *device, void *client_data)
2727{ 2925{
@@ -2751,7 +2949,9 @@ static void srpt_remove_one(struct ib_device *device, void *client_data)
2751 spin_lock(&srpt_dev_lock); 2949 spin_lock(&srpt_dev_lock);
2752 list_del(&sdev->list); 2950 list_del(&sdev->list);
2753 spin_unlock(&srpt_dev_lock); 2951 spin_unlock(&srpt_dev_lock);
2754 srpt_release_sdev(sdev); 2952
2953 for (i = 0; i < sdev->device->phys_port_cnt; i++)
2954 srpt_release_sport(&sdev->port[i]);
2755 2955
2756 srpt_free_srq(sdev); 2956 srpt_free_srq(sdev);
2757 2957
@@ -2827,7 +3027,8 @@ static void srpt_release_cmd(struct se_cmd *se_cmd)
2827} 3027}
2828 3028
2829/** 3029/**
2830 * srpt_close_session() - Forcibly close a session. 3030 * srpt_close_session - forcibly close a session
3031 * @se_sess: SCSI target session.
2831 * 3032 *
2832 * Callback function invoked by the TCM core to clean up sessions associated 3033 * Callback function invoked by the TCM core to clean up sessions associated
2833 * with a node ACL when the user invokes 3034 * with a node ACL when the user invokes
@@ -2836,15 +3037,13 @@ static void srpt_release_cmd(struct se_cmd *se_cmd)
2836static void srpt_close_session(struct se_session *se_sess) 3037static void srpt_close_session(struct se_session *se_sess)
2837{ 3038{
2838 struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr; 3039 struct srpt_rdma_ch *ch = se_sess->fabric_sess_ptr;
2839 struct srpt_device *sdev = ch->sport->sdev;
2840 3040
2841 mutex_lock(&sdev->mutex);
2842 srpt_disconnect_ch_sync(ch); 3041 srpt_disconnect_ch_sync(ch);
2843 mutex_unlock(&sdev->mutex);
2844} 3042}
2845 3043
2846/** 3044/**
2847 * srpt_sess_get_index() - Return the value of scsiAttIntrPortIndex (SCSI-MIB). 3045 * srpt_sess_get_index - return the value of scsiAttIntrPortIndex (SCSI-MIB)
3046 * @se_sess: SCSI target session.
2848 * 3047 *
2849 * A quote from RFC 4455 (SCSI-MIB) about this MIB object: 3048 * A quote from RFC 4455 (SCSI-MIB) about this MIB object:
2850 * This object represents an arbitrary integer used to uniquely identify a 3049 * This object represents an arbitrary integer used to uniquely identify a
@@ -2866,7 +3065,7 @@ static int srpt_get_tcm_cmd_state(struct se_cmd *se_cmd)
2866 struct srpt_send_ioctx *ioctx; 3065 struct srpt_send_ioctx *ioctx;
2867 3066
2868 ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); 3067 ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd);
2869 return srpt_get_cmd_state(ioctx); 3068 return ioctx->state;
2870} 3069}
2871 3070
2872static int srpt_parse_guid(u64 *guid, const char *name) 3071static int srpt_parse_guid(u64 *guid, const char *name)
@@ -2883,7 +3082,7 @@ out:
2883} 3082}
2884 3083
2885/** 3084/**
2886 * srpt_parse_i_port_id() - Parse an initiator port ID. 3085 * srpt_parse_i_port_id - parse an initiator port ID
2887 * @name: ASCII representation of a 128-bit initiator port ID. 3086 * @name: ASCII representation of a 128-bit initiator port ID.
2888 * @i_port_id: Binary 128-bit port ID. 3087 * @i_port_id: Binary 128-bit port ID.
2889 */ 3088 */
@@ -3064,18 +3263,24 @@ static ssize_t srpt_tpg_attrib_use_srq_store(struct config_item *item,
3064 if (val != !!val) 3263 if (val != !!val)
3065 return -EINVAL; 3264 return -EINVAL;
3066 3265
3067 ret = mutex_lock_interruptible(&sdev->mutex); 3266 ret = mutex_lock_interruptible(&sdev->sdev_mutex);
3068 if (ret < 0) 3267 if (ret < 0)
3069 return ret; 3268 return ret;
3269 ret = mutex_lock_interruptible(&sport->mutex);
3270 if (ret < 0)
3271 goto unlock_sdev;
3070 enabled = sport->enabled; 3272 enabled = sport->enabled;
3071 /* Log out all initiator systems before changing 'use_srq'. */ 3273 /* Log out all initiator systems before changing 'use_srq'. */
3072 srpt_set_enabled(sport, false); 3274 srpt_set_enabled(sport, false);
3073 sport->port_attrib.use_srq = val; 3275 sport->port_attrib.use_srq = val;
3074 srpt_use_srq(sdev, sport->port_attrib.use_srq); 3276 srpt_use_srq(sdev, sport->port_attrib.use_srq);
3075 srpt_set_enabled(sport, enabled); 3277 srpt_set_enabled(sport, enabled);
3076 mutex_unlock(&sdev->mutex); 3278 ret = count;
3279 mutex_unlock(&sport->mutex);
3280unlock_sdev:
3281 mutex_unlock(&sdev->sdev_mutex);
3077 3282
3078 return count; 3283 return ret;
3079} 3284}
3080 3285
3081CONFIGFS_ATTR(srpt_tpg_attrib_, srp_max_rdma_size); 3286CONFIGFS_ATTR(srpt_tpg_attrib_, srp_max_rdma_size);
@@ -3104,7 +3309,6 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item,
3104{ 3309{
3105 struct se_portal_group *se_tpg = to_tpg(item); 3310 struct se_portal_group *se_tpg = to_tpg(item);
3106 struct srpt_port *sport = srpt_tpg_to_sport(se_tpg); 3311 struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
3107 struct srpt_device *sdev = sport->sdev;
3108 unsigned long tmp; 3312 unsigned long tmp;
3109 int ret; 3313 int ret;
3110 3314
@@ -3119,9 +3323,9 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item,
3119 return -EINVAL; 3323 return -EINVAL;
3120 } 3324 }
3121 3325
3122 mutex_lock(&sdev->mutex); 3326 mutex_lock(&sport->mutex);
3123 srpt_set_enabled(sport, tmp); 3327 srpt_set_enabled(sport, tmp);
3124 mutex_unlock(&sdev->mutex); 3328 mutex_unlock(&sport->mutex);
3125 3329
3126 return count; 3330 return count;
3127} 3331}
@@ -3134,8 +3338,10 @@ static struct configfs_attribute *srpt_tpg_attrs[] = {
3134}; 3338};
3135 3339
3136/** 3340/**
3137 * configfs callback invoked for 3341 * srpt_make_tpg - configfs callback invoked for mkdir /sys/kernel/config/target/$driver/$port/$tpg
3138 * mkdir /sys/kernel/config/target/$driver/$port/$tpg 3342 * @wwn: Corresponds to $driver/$port.
3343 * @group: Not used.
3344 * @name: $tpg.
3139 */ 3345 */
3140static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, 3346static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
3141 struct config_group *group, 3347 struct config_group *group,
@@ -3157,8 +3363,8 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
3157} 3363}
3158 3364
3159/** 3365/**
3160 * configfs callback invoked for 3366 * srpt_drop_tpg - configfs callback invoked for rmdir /sys/kernel/config/target/$driver/$port/$tpg
3161 * rmdir /sys/kernel/config/target/$driver/$port/$tpg 3367 * @tpg: Target portal group to deregister.
3162 */ 3368 */
3163static void srpt_drop_tpg(struct se_portal_group *tpg) 3369static void srpt_drop_tpg(struct se_portal_group *tpg)
3164{ 3370{
@@ -3169,8 +3375,10 @@ static void srpt_drop_tpg(struct se_portal_group *tpg)
3169} 3375}
3170 3376
3171/** 3377/**
3172 * configfs callback invoked for 3378 * srpt_make_tport - configfs callback invoked for mkdir /sys/kernel/config/target/$driver/$port
3173 * mkdir /sys/kernel/config/target/$driver/$port 3379 * @tf: Not used.
3380 * @group: Not used.
3381 * @name: $port.
3174 */ 3382 */
3175static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf, 3383static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf,
3176 struct config_group *group, 3384 struct config_group *group,
@@ -3180,8 +3388,8 @@ static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf,
3180} 3388}
3181 3389
3182/** 3390/**
3183 * configfs callback invoked for 3391 * srpt_drop_tport - configfs callback invoked for rmdir /sys/kernel/config/target/$driver/$port
3184 * rmdir /sys/kernel/config/target/$driver/$port 3392 * @wwn: $port.
3185 */ 3393 */
3186static void srpt_drop_tport(struct se_wwn *wwn) 3394static void srpt_drop_tport(struct se_wwn *wwn)
3187{ 3395{
@@ -3239,7 +3447,7 @@ static const struct target_core_fabric_ops srpt_template = {
3239}; 3447};
3240 3448
3241/** 3449/**
3242 * srpt_init_module() - Kernel module initialization. 3450 * srpt_init_module - kernel module initialization
3243 * 3451 *
3244 * Note: Since ib_register_client() registers callback functions, and since at 3452 * Note: Since ib_register_client() registers callback functions, and since at
3245 * least one of these callback functions (srpt_add_one()) calls target core 3453 * least one of these callback functions (srpt_add_one()) calls target core
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 673387d365a3..4d9199fd00dc 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -54,6 +54,8 @@
54 */ 54 */
55#define SRP_SERVICE_NAME_PREFIX "SRP.T10:" 55#define SRP_SERVICE_NAME_PREFIX "SRP.T10:"
56 56
57struct srpt_nexus;
58
57enum { 59enum {
58 /* 60 /*
59 * SRP IOControllerProfile attributes for SRP target ports that have 61 * SRP IOControllerProfile attributes for SRP target ports that have
@@ -114,7 +116,7 @@ enum {
114 116
115 MIN_SRPT_SQ_SIZE = 16, 117 MIN_SRPT_SQ_SIZE = 16,
116 DEF_SRPT_SQ_SIZE = 4096, 118 DEF_SRPT_SQ_SIZE = 4096,
117 SRPT_RQ_SIZE = 128, 119 MAX_SRPT_RQ_SIZE = 128,
118 MIN_SRPT_SRQ_SIZE = 4, 120 MIN_SRPT_SRQ_SIZE = 4,
119 DEFAULT_SRPT_SRQ_SIZE = 4095, 121 DEFAULT_SRPT_SRQ_SIZE = 4095,
120 MAX_SRPT_SRQ_SIZE = 65535, 122 MAX_SRPT_SRQ_SIZE = 65535,
@@ -134,7 +136,7 @@ enum {
134}; 136};
135 137
136/** 138/**
137 * enum srpt_command_state - SCSI command state managed by SRPT. 139 * enum srpt_command_state - SCSI command state managed by SRPT
138 * @SRPT_STATE_NEW: New command arrived and is being processed. 140 * @SRPT_STATE_NEW: New command arrived and is being processed.
139 * @SRPT_STATE_NEED_DATA: Processing a write or bidir command and waiting 141 * @SRPT_STATE_NEED_DATA: Processing a write or bidir command and waiting
140 * for data arrival. 142 * for data arrival.
@@ -158,7 +160,8 @@ enum srpt_command_state {
158}; 160};
159 161
160/** 162/**
161 * struct srpt_ioctx - Shared SRPT I/O context information. 163 * struct srpt_ioctx - shared SRPT I/O context information
164 * @cqe: Completion queue element.
162 * @buf: Pointer to the buffer. 165 * @buf: Pointer to the buffer.
163 * @dma: DMA address of the buffer. 166 * @dma: DMA address of the buffer.
164 * @index: Index of the I/O context in its ioctx_ring array. 167 * @index: Index of the I/O context in its ioctx_ring array.
@@ -171,7 +174,7 @@ struct srpt_ioctx {
171}; 174};
172 175
173/** 176/**
174 * struct srpt_recv_ioctx - SRPT receive I/O context. 177 * struct srpt_recv_ioctx - SRPT receive I/O context
175 * @ioctx: See above. 178 * @ioctx: See above.
176 * @wait_list: Node for insertion in srpt_rdma_ch.cmd_wait_list. 179 * @wait_list: Node for insertion in srpt_rdma_ch.cmd_wait_list.
177 */ 180 */
@@ -187,13 +190,20 @@ struct srpt_rw_ctx {
187}; 190};
188 191
189/** 192/**
190 * struct srpt_send_ioctx - SRPT send I/O context. 193 * struct srpt_send_ioctx - SRPT send I/O context
191 * @ioctx: See above. 194 * @ioctx: See above.
192 * @ch: Channel pointer. 195 * @ch: Channel pointer.
193 * @spinlock: Protects 'state'. 196 * @s_rw_ctx: @rw_ctxs points here if only a single rw_ctx is needed.
197 * @rw_ctxs: RDMA read/write contexts.
198 * @rdma_cqe: RDMA completion queue element.
199 * @free_list: Node in srpt_rdma_ch.free_list.
194 * @state: I/O context state. 200 * @state: I/O context state.
195 * @cmd: Target core command data structure. 201 * @cmd: Target core command data structure.
196 * @sense_data: SCSI sense data. 202 * @sense_data: SCSI sense data.
203 * @n_rdma: Number of work requests needed to transfer this ioctx.
204 * @n_rw_ctx: Size of rw_ctxs array.
205 * @queue_status_only: Send a SCSI status back to the initiator but no data.
206 * @sense_data: Sense data to be sent to the initiator.
197 */ 207 */
198struct srpt_send_ioctx { 208struct srpt_send_ioctx {
199 struct srpt_ioctx ioctx; 209 struct srpt_ioctx ioctx;
@@ -204,10 +214,8 @@ struct srpt_send_ioctx {
204 214
205 struct ib_cqe rdma_cqe; 215 struct ib_cqe rdma_cqe;
206 struct list_head free_list; 216 struct list_head free_list;
207 spinlock_t spinlock;
208 enum srpt_command_state state; 217 enum srpt_command_state state;
209 struct se_cmd cmd; 218 struct se_cmd cmd;
210 struct completion tx_done;
211 u8 n_rdma; 219 u8 n_rdma;
212 u8 n_rw_ctx; 220 u8 n_rw_ctx;
213 bool queue_status_only; 221 bool queue_status_only;
@@ -215,7 +223,7 @@ struct srpt_send_ioctx {
215}; 223};
216 224
217/** 225/**
218 * enum rdma_ch_state - SRP channel state. 226 * enum rdma_ch_state - SRP channel state
219 * @CH_CONNECTING: QP is in RTR state; waiting for RTU. 227 * @CH_CONNECTING: QP is in RTR state; waiting for RTU.
220 * @CH_LIVE: QP is in RTS state. 228 * @CH_LIVE: QP is in RTS state.
221 * @CH_DISCONNECTING: DREQ has been sent and waiting for DREP or DREQ has 229 * @CH_DISCONNECTING: DREQ has been sent and waiting for DREP or DREQ has
@@ -233,17 +241,19 @@ enum rdma_ch_state {
233}; 241};
234 242
235/** 243/**
236 * struct srpt_rdma_ch - RDMA channel. 244 * struct srpt_rdma_ch - RDMA channel
237 * @cm_id: IB CM ID associated with the channel. 245 * @nexus: I_T nexus this channel is associated with.
238 * @qp: IB queue pair used for communicating over this channel. 246 * @qp: IB queue pair used for communicating over this channel.
247 * @cm_id: IB CM ID associated with the channel.
239 * @cq: IB completion queue for this channel. 248 * @cq: IB completion queue for this channel.
249 * @zw_cqe: Zero-length write CQE.
250 * @rcu: RCU head.
251 * @kref: kref for this channel.
240 * @rq_size: IB receive queue size. 252 * @rq_size: IB receive queue size.
241 * @rsp_size IB response message size in bytes. 253 * @max_rsp_size: Maximum size of an RSP response message in bytes.
242 * @sq_wr_avail: number of work requests available in the send queue. 254 * @sq_wr_avail: number of work requests available in the send queue.
243 * @sport: pointer to the information of the HCA port used by this 255 * @sport: pointer to the information of the HCA port used by this
244 * channel. 256 * channel.
245 * @i_port_id: 128-bit initiator port identifier copied from SRP_LOGIN_REQ.
246 * @t_port_id: 128-bit target port identifier copied from SRP_LOGIN_REQ.
247 * @max_ti_iu_len: maximum target-to-initiator information unit length. 257 * @max_ti_iu_len: maximum target-to-initiator information unit length.
248 * @req_lim: request limit: maximum number of requests that may be sent 258 * @req_lim: request limit: maximum number of requests that may be sent
249 * by the initiator without having received a response. 259 * by the initiator without having received a response.
@@ -251,30 +261,34 @@ enum rdma_ch_state {
251 * @spinlock: Protects free_list and state. 261 * @spinlock: Protects free_list and state.
252 * @free_list: Head of list with free send I/O contexts. 262 * @free_list: Head of list with free send I/O contexts.
253 * @state: channel state. See also enum rdma_ch_state. 263 * @state: channel state. See also enum rdma_ch_state.
264 * @processing_wait_list: Whether or not cmd_wait_list is being processed.
254 * @ioctx_ring: Send ring. 265 * @ioctx_ring: Send ring.
255 * @ioctx_recv_ring: Receive I/O context ring. 266 * @ioctx_recv_ring: Receive I/O context ring.
256 * @list: Node for insertion in the srpt_device.rch_list list. 267 * @list: Node in srpt_nexus.ch_list.
257 * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This 268 * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This
258 * list contains struct srpt_ioctx elements and is protected 269 * list contains struct srpt_ioctx elements and is protected
259 * against concurrent modification by the cm_id spinlock. 270 * against concurrent modification by the cm_id spinlock.
271 * @pkey: P_Key of the IB partition for this SRP channel.
260 * @sess: Session information associated with this SRP channel. 272 * @sess: Session information associated with this SRP channel.
261 * @sess_name: Session name. 273 * @sess_name: Session name.
262 * @ini_guid: Initiator port GUID.
263 * @release_work: Allows scheduling of srpt_release_channel(). 274 * @release_work: Allows scheduling of srpt_release_channel().
264 * @release_done: Enables waiting for srpt_release_channel() completion.
265 */ 275 */
266struct srpt_rdma_ch { 276struct srpt_rdma_ch {
267 struct ib_cm_id *cm_id; 277 struct srpt_nexus *nexus;
268 struct ib_qp *qp; 278 struct ib_qp *qp;
279 union {
280 struct {
281 struct ib_cm_id *cm_id;
282 } ib_cm;
283 };
269 struct ib_cq *cq; 284 struct ib_cq *cq;
270 struct ib_cqe zw_cqe; 285 struct ib_cqe zw_cqe;
286 struct rcu_head rcu;
271 struct kref kref; 287 struct kref kref;
272 int rq_size; 288 int rq_size;
273 u32 rsp_size; 289 u32 max_rsp_size;
274 atomic_t sq_wr_avail; 290 atomic_t sq_wr_avail;
275 struct srpt_port *sport; 291 struct srpt_port *sport;
276 u8 i_port_id[16];
277 u8 t_port_id[16];
278 int max_ti_iu_len; 292 int max_ti_iu_len;
279 atomic_t req_lim; 293 atomic_t req_lim;
280 atomic_t req_lim_delta; 294 atomic_t req_lim_delta;
@@ -285,15 +299,31 @@ struct srpt_rdma_ch {
285 struct srpt_recv_ioctx **ioctx_recv_ring; 299 struct srpt_recv_ioctx **ioctx_recv_ring;
286 struct list_head list; 300 struct list_head list;
287 struct list_head cmd_wait_list; 301 struct list_head cmd_wait_list;
302 uint16_t pkey;
303 bool processing_wait_list;
288 struct se_session *sess; 304 struct se_session *sess;
289 u8 sess_name[36]; 305 u8 sess_name[24];
290 u8 ini_guid[24];
291 struct work_struct release_work; 306 struct work_struct release_work;
292 struct completion *release_done;
293}; 307};
294 308
295/** 309/**
296 * struct srpt_port_attib - Attributes for SRPT port 310 * struct srpt_nexus - I_T nexus
311 * @rcu: RCU head for this data structure.
312 * @entry: srpt_port.nexus_list list node.
313 * @ch_list: struct srpt_rdma_ch list. Protected by srpt_port.mutex.
314 * @i_port_id: 128-bit initiator port identifier copied from SRP_LOGIN_REQ.
315 * @t_port_id: 128-bit target port identifier copied from SRP_LOGIN_REQ.
316 */
317struct srpt_nexus {
318 struct rcu_head rcu;
319 struct list_head entry;
320 struct list_head ch_list;
321 u8 i_port_id[16];
322 u8 t_port_id[16];
323};
324
325/**
326 * struct srpt_port_attib - attributes for SRPT port
297 * @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections. 327 * @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections.
298 * @srp_max_rsp_size: Maximum size of SRP response messages in bytes. 328 * @srp_max_rsp_size: Maximum size of SRP response messages in bytes.
299 * @srp_sq_size: Shared receive queue (SRQ) size. 329 * @srp_sq_size: Shared receive queue (SRQ) size.
@@ -307,7 +337,7 @@ struct srpt_port_attrib {
307}; 337};
308 338
309/** 339/**
310 * struct srpt_port - Information associated by SRPT with a single IB port. 340 * struct srpt_port - information associated by SRPT with a single IB port
311 * @sdev: backpointer to the HCA information. 341 * @sdev: backpointer to the HCA information.
312 * @mad_agent: per-port management datagram processing information. 342 * @mad_agent: per-port management datagram processing information.
313 * @enabled: Whether or not this target port is enabled. 343 * @enabled: Whether or not this target port is enabled.
@@ -323,7 +353,10 @@ struct srpt_port_attrib {
323 * @port_guid_wwn: WWN associated with target port GUID. 353 * @port_guid_wwn: WWN associated with target port GUID.
324 * @port_gid_tpg: TPG associated with target port GID. 354 * @port_gid_tpg: TPG associated with target port GID.
325 * @port_gid_wwn: WWN associated with target port GID. 355 * @port_gid_wwn: WWN associated with target port GID.
326 * @port_acl_list: Head of the list with all node ACLs for this port. 356 * @port_attrib: Port attributes that can be accessed through configfs.
357 * @ch_releaseQ: Enables waiting for removal from nexus_list.
358 * @mutex: Protects nexus_list.
359 * @nexus_list: Nexus list. See also srpt_nexus.entry.
327 */ 360 */
328struct srpt_port { 361struct srpt_port {
329 struct srpt_device *sdev; 362 struct srpt_device *sdev;
@@ -341,21 +374,22 @@ struct srpt_port {
341 struct se_portal_group port_gid_tpg; 374 struct se_portal_group port_gid_tpg;
342 struct se_wwn port_gid_wwn; 375 struct se_wwn port_gid_wwn;
343 struct srpt_port_attrib port_attrib; 376 struct srpt_port_attrib port_attrib;
377 wait_queue_head_t ch_releaseQ;
378 struct mutex mutex;
379 struct list_head nexus_list;
344}; 380};
345 381
346/** 382/**
347 * struct srpt_device - Information associated by SRPT with a single HCA. 383 * struct srpt_device - information associated by SRPT with a single HCA
348 * @device: Backpointer to the struct ib_device managed by the IB core. 384 * @device: Backpointer to the struct ib_device managed by the IB core.
349 * @pd: IB protection domain. 385 * @pd: IB protection domain.
350 * @lkey: L_Key (local key) with write access to all local memory. 386 * @lkey: L_Key (local key) with write access to all local memory.
351 * @srq: Per-HCA SRQ (shared receive queue). 387 * @srq: Per-HCA SRQ (shared receive queue).
352 * @cm_id: Connection identifier. 388 * @cm_id: Connection identifier.
353 * @srq_size: SRQ size. 389 * @srq_size: SRQ size.
390 * @sdev_mutex: Serializes use_srq changes.
354 * @use_srq: Whether or not to use SRQ. 391 * @use_srq: Whether or not to use SRQ.
355 * @ioctx_ring: Per-HCA SRQ. 392 * @ioctx_ring: Per-HCA SRQ.
356 * @rch_list: Per-device channel list -- see also srpt_rdma_ch.list.
357 * @ch_releaseQ: Enables waiting for removal from rch_list.
358 * @mutex: Protects rch_list.
359 * @port: Information about the ports owned by this HCA. 393 * @port: Information about the ports owned by this HCA.
360 * @event_handler: Per-HCA asynchronous IB event handler. 394 * @event_handler: Per-HCA asynchronous IB event handler.
361 * @list: Node in srpt_dev_list. 395 * @list: Node in srpt_dev_list.
@@ -367,11 +401,9 @@ struct srpt_device {
367 struct ib_srq *srq; 401 struct ib_srq *srq;
368 struct ib_cm_id *cm_id; 402 struct ib_cm_id *cm_id;
369 int srq_size; 403 int srq_size;
404 struct mutex sdev_mutex;
370 bool use_srq; 405 bool use_srq;
371 struct srpt_recv_ioctx **ioctx_ring; 406 struct srpt_recv_ioctx **ioctx_ring;
372 struct list_head rch_list;
373 wait_queue_head_t ch_releaseQ;
374 struct mutex mutex;
375 struct srpt_port port[2]; 407 struct srpt_port port[2];
376 struct ib_event_handler event_handler; 408 struct ib_event_handler event_handler;
377 struct list_head list; 409 struct list_head list;
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 769598f7b6c8..3aaf4bad6c5a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -287,6 +287,9 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
287 u64 in_param = 0; 287 u64 in_param = 0;
288 int err; 288 int err;
289 289
290 if (!cnt)
291 return;
292
290 if (mlx4_is_mfunc(dev)) { 293 if (mlx4_is_mfunc(dev)) {
291 set_param_l(&in_param, base_qpn); 294 set_param_l(&in_param, base_qpn);
292 set_param_h(&in_param, cnt); 295 set_param_h(&in_param, cnt);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index e7e7cef2bde4..14d57828945d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -417,7 +417,11 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
417 cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; 417 cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
418 mlx5_cq_completion(dev, cqn); 418 mlx5_cq_completion(dev, cqn);
419 break; 419 break;
420 420 case MLX5_EVENT_TYPE_DCT_DRAINED:
421 rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
422 rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
423 mlx5_rsc_event(dev, rsn, eqe->type);
424 break;
421 case MLX5_EVENT_TYPE_PATH_MIG: 425 case MLX5_EVENT_TYPE_PATH_MIG:
422 case MLX5_EVENT_TYPE_COMM_EST: 426 case MLX5_EVENT_TYPE_COMM_EST:
423 case MLX5_EVENT_TYPE_SQ_DRAINED: 427 case MLX5_EVENT_TYPE_SQ_DRAINED:
@@ -715,6 +719,9 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
715 719
716 if (MLX5_CAP_GEN(dev, fpga)) 720 if (MLX5_CAP_GEN(dev, fpga))
717 async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR); 721 async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR);
722 if (MLX5_CAP_GEN_MAX(dev, dct))
723 async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED);
724
718 725
719 err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, 726 err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
720 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, 727 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index c4392f741c5f..e6175f8ac0e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -688,7 +688,7 @@ static inline int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn)
688 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 688 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
689 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 689 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
690 MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX); 690 MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX);
691 MLX5_SET(qpc, qpc, primary_address_path.port, MLX5_FPGA_PORT_NUM); 691 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM);
692 MLX5_SET(qpc, qpc, pd, conn->fdev->conn_res.pdn); 692 MLX5_SET(qpc, qpc, pd, conn->fdev->conn_res.pdn);
693 MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn); 693 MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn);
694 MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn); 694 MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn);
@@ -727,7 +727,7 @@ static inline int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn)
727 MLX5_SET(qpc, qpc, next_rcv_psn, 727 MLX5_SET(qpc, qpc, next_rcv_psn,
728 MLX5_GET(fpga_qpc, conn->fpga_qpc, next_send_psn)); 728 MLX5_GET(fpga_qpc, conn->fpga_qpc, next_send_psn));
729 MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX); 729 MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX);
730 MLX5_SET(qpc, qpc, primary_address_path.port, MLX5_FPGA_PORT_NUM); 730 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM);
731 ether_addr_copy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), 731 ether_addr_copy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
732 MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_mac_47_32)); 732 MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_mac_47_32));
733 MLX5_SET(qpc, qpc, primary_address_path.udp_sport, 733 MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
@@ -888,7 +888,8 @@ struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
888 err = mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, 888 err = mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index,
889 MLX5_ROCE_VERSION_2, 889 MLX5_ROCE_VERSION_2,
890 MLX5_ROCE_L3_TYPE_IPV6, 890 MLX5_ROCE_L3_TYPE_IPV6,
891 remote_ip, remote_mac, true, 0); 891 remote_ip, remote_mac, true, 0,
892 MLX5_FPGA_PORT_NUM);
892 if (err) { 893 if (err) {
893 mlx5_fpga_err(fdev, "Failed to set SGID: %d\n", err); 894 mlx5_fpga_err(fdev, "Failed to set SGID: %d\n", err);
894 ret = ERR_PTR(err); 895 ret = ERR_PTR(err);
@@ -954,7 +955,7 @@ err_cq:
954 mlx5_fpga_conn_destroy_cq(conn); 955 mlx5_fpga_conn_destroy_cq(conn);
955err_gid: 956err_gid:
956 mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, 0, 0, NULL, 957 mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, 0, 0, NULL,
957 NULL, false, 0); 958 NULL, false, 0, MLX5_FPGA_PORT_NUM);
958err_rsvd_gid: 959err_rsvd_gid:
959 mlx5_core_reserved_gid_free(fdev->mdev, conn->qp.sgid_index); 960 mlx5_core_reserved_gid_free(fdev->mdev, conn->qp.sgid_index);
960err: 961err:
@@ -982,7 +983,7 @@ void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn)
982 mlx5_fpga_conn_destroy_cq(conn); 983 mlx5_fpga_conn_destroy_cq(conn);
983 984
984 mlx5_core_roce_gid_set(conn->fdev->mdev, conn->qp.sgid_index, 0, 0, 985 mlx5_core_roce_gid_set(conn->fdev->mdev, conn->qp.sgid_index, 0, 0,
985 NULL, NULL, false, 0); 986 NULL, NULL, false, 0, MLX5_FPGA_PORT_NUM);
986 mlx5_core_reserved_gid_free(conn->fdev->mdev, conn->qp.sgid_index); 987 mlx5_core_reserved_gid_free(conn->fdev->mdev, conn->qp.sgid_index);
987 kfree(conn); 988 kfree(conn);
988} 989}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 5ef1b56b6a96..9d11e92fb541 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -195,12 +195,20 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
195 return 0; 195 return 0;
196} 196}
197 197
198int mlx5_cmd_init_hca(struct mlx5_core_dev *dev) 198int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id)
199{ 199{
200 u32 out[MLX5_ST_SZ_DW(init_hca_out)] = {0}; 200 u32 out[MLX5_ST_SZ_DW(init_hca_out)] = {0};
201 u32 in[MLX5_ST_SZ_DW(init_hca_in)] = {0}; 201 u32 in[MLX5_ST_SZ_DW(init_hca_in)] = {0};
202 int i;
202 203
203 MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA); 204 MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA);
205
206 if (MLX5_CAP_GEN(dev, sw_owner_id)) {
207 for (i = 0; i < 4; i++)
208 MLX5_ARRAY_SET(init_hca_in, in, sw_owner_id, i,
209 sw_owner_id[i]);
210 }
211
204 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 212 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
205} 213}
206 214
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index ee2f378c5030..a281d95ce17c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -189,7 +189,7 @@ int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp
189 MLX5_QP_ENHANCED_ULP_STATELESS_MODE); 189 MLX5_QP_ENHANCED_ULP_STATELESS_MODE);
190 190
191 addr_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 191 addr_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
192 MLX5_SET(ads, addr_path, port, 1); 192 MLX5_SET(ads, addr_path, vhca_port_num, 1);
193 MLX5_SET(ads, addr_path, grh, 1); 193 MLX5_SET(ads, addr_path, grh, 1);
194 194
195 ret = mlx5_core_create_qp(mdev, qp, in, inlen); 195 ret = mlx5_core_create_qp(mdev, qp, in, inlen);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 5701f125e99c..e159243e0fcf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -31,6 +31,8 @@
31 */ 31 */
32 32
33#include <linux/clocksource.h> 33#include <linux/clocksource.h>
34#include <linux/highmem.h>
35#include <rdma/mlx5-abi.h>
34#include "en.h" 36#include "en.h"
35 37
36enum { 38enum {
@@ -71,6 +73,28 @@ static u64 read_internal_timer(const struct cyclecounter *cc)
71 return mlx5_read_internal_timer(mdev) & cc->mask; 73 return mlx5_read_internal_timer(mdev) & cc->mask;
72} 74}
73 75
76static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev)
77{
78 struct mlx5_ib_clock_info *clock_info = mdev->clock_info;
79 struct mlx5_clock *clock = &mdev->clock;
80 u32 sign;
81
82 if (!clock_info)
83 return;
84
85 sign = smp_load_acquire(&clock_info->sign);
86 smp_store_mb(clock_info->sign,
87 sign | MLX5_IB_CLOCK_INFO_KERNEL_UPDATING);
88
89 clock_info->cycles = clock->tc.cycle_last;
90 clock_info->mult = clock->cycles.mult;
91 clock_info->nsec = clock->tc.nsec;
92 clock_info->frac = clock->tc.frac;
93
94 smp_store_release(&clock_info->sign,
95 sign + MLX5_IB_CLOCK_INFO_KERNEL_UPDATING * 2);
96}
97
74static void mlx5_pps_out(struct work_struct *work) 98static void mlx5_pps_out(struct work_struct *work)
75{ 99{
76 struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps, 100 struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps,
@@ -109,6 +133,7 @@ static void mlx5_timestamp_overflow(struct work_struct *work)
109 133
110 write_lock_irqsave(&clock->lock, flags); 134 write_lock_irqsave(&clock->lock, flags);
111 timecounter_read(&clock->tc); 135 timecounter_read(&clock->tc);
136 mlx5_update_clock_info_page(clock->mdev);
112 write_unlock_irqrestore(&clock->lock, flags); 137 write_unlock_irqrestore(&clock->lock, flags);
113 schedule_delayed_work(&clock->overflow_work, clock->overflow_period); 138 schedule_delayed_work(&clock->overflow_work, clock->overflow_period);
114} 139}
@@ -123,6 +148,7 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
123 148
124 write_lock_irqsave(&clock->lock, flags); 149 write_lock_irqsave(&clock->lock, flags);
125 timecounter_init(&clock->tc, &clock->cycles, ns); 150 timecounter_init(&clock->tc, &clock->cycles, ns);
151 mlx5_update_clock_info_page(clock->mdev);
126 write_unlock_irqrestore(&clock->lock, flags); 152 write_unlock_irqrestore(&clock->lock, flags);
127 153
128 return 0; 154 return 0;
@@ -152,6 +178,7 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
152 178
153 write_lock_irqsave(&clock->lock, flags); 179 write_lock_irqsave(&clock->lock, flags);
154 timecounter_adjtime(&clock->tc, delta); 180 timecounter_adjtime(&clock->tc, delta);
181 mlx5_update_clock_info_page(clock->mdev);
155 write_unlock_irqrestore(&clock->lock, flags); 182 write_unlock_irqrestore(&clock->lock, flags);
156 183
157 return 0; 184 return 0;
@@ -179,6 +206,7 @@ static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
179 timecounter_read(&clock->tc); 206 timecounter_read(&clock->tc);
180 clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff : 207 clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
181 clock->nominal_c_mult + diff; 208 clock->nominal_c_mult + diff;
209 mlx5_update_clock_info_page(clock->mdev);
182 write_unlock_irqrestore(&clock->lock, flags); 210 write_unlock_irqrestore(&clock->lock, flags);
183 211
184 return 0; 212 return 0;
@@ -474,6 +502,7 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
474 clock->cycles.shift); 502 clock->cycles.shift);
475 clock->nominal_c_mult = clock->cycles.mult; 503 clock->nominal_c_mult = clock->cycles.mult;
476 clock->cycles.mask = CLOCKSOURCE_MASK(41); 504 clock->cycles.mask = CLOCKSOURCE_MASK(41);
505 clock->mdev = mdev;
477 506
478 timecounter_init(&clock->tc, &clock->cycles, 507 timecounter_init(&clock->tc, &clock->cycles,
479 ktime_to_ns(ktime_get_real())); 508 ktime_to_ns(ktime_get_real()));
@@ -486,6 +515,25 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
486 do_div(ns, NSEC_PER_SEC / 2 / HZ); 515 do_div(ns, NSEC_PER_SEC / 2 / HZ);
487 clock->overflow_period = ns; 516 clock->overflow_period = ns;
488 517
518 mdev->clock_info_page = alloc_page(GFP_KERNEL);
519 if (mdev->clock_info_page) {
520 mdev->clock_info = kmap(mdev->clock_info_page);
521 if (!mdev->clock_info) {
522 __free_page(mdev->clock_info_page);
523 mlx5_core_warn(mdev, "failed to map clock page\n");
524 } else {
525 mdev->clock_info->sign = 0;
526 mdev->clock_info->nsec = clock->tc.nsec;
527 mdev->clock_info->cycles = clock->tc.cycle_last;
528 mdev->clock_info->mask = clock->cycles.mask;
529 mdev->clock_info->mult = clock->nominal_c_mult;
530 mdev->clock_info->shift = clock->cycles.shift;
531 mdev->clock_info->frac = clock->tc.frac;
532 mdev->clock_info->overflow_period =
533 clock->overflow_period;
534 }
535 }
536
489 INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out); 537 INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out);
490 INIT_DELAYED_WORK(&clock->overflow_work, mlx5_timestamp_overflow); 538 INIT_DELAYED_WORK(&clock->overflow_work, mlx5_timestamp_overflow);
491 if (clock->overflow_period) 539 if (clock->overflow_period)
@@ -525,5 +573,12 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
525 573
526 cancel_work_sync(&clock->pps_info.out_work); 574 cancel_work_sync(&clock->pps_info.out_work);
527 cancel_delayed_work_sync(&clock->overflow_work); 575 cancel_delayed_work_sync(&clock->overflow_work);
576
577 if (mdev->clock_info) {
578 kunmap(mdev->clock_info_page);
579 __free_page(mdev->clock_info_page);
580 mdev->clock_info = NULL;
581 }
582
528 kfree(clock->ptp_info.pin_config); 583 kfree(clock->ptp_info.pin_config);
529} 584}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
index 573f59f46d41..7722a3f9bb68 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
@@ -121,7 +121,7 @@ EXPORT_SYMBOL_GPL(mlx5_core_reserved_gids_count);
121 121
122int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, 122int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
123 u8 roce_version, u8 roce_l3_type, const u8 *gid, 123 u8 roce_version, u8 roce_l3_type, const u8 *gid,
124 const u8 *mac, bool vlan, u16 vlan_id) 124 const u8 *mac, bool vlan, u16 vlan_id, u8 port_num)
125{ 125{
126#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v) 126#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
127 u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0}; 127 u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0};
@@ -148,6 +148,9 @@ int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
148 memcpy(addr_l3_addr, gid, gidsz); 148 memcpy(addr_l3_addr, gid, gidsz);
149 } 149 }
150 150
151 if (MLX5_CAP_GEN(dev, num_vhca_ports) > 0)
152 MLX5_SET(set_roce_address_in, in, vhca_port_num, port_num);
153
151 MLX5_SET(set_roce_address_in, in, roce_address_index, index); 154 MLX5_SET(set_roce_address_in, in, roce_address_index, index);
152 MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS); 155 MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
153 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 156 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 0f88fd30a09a..2ef641c91c26 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -75,6 +75,8 @@ static unsigned int prof_sel = MLX5_DEFAULT_PROF;
75module_param_named(prof_sel, prof_sel, uint, 0444); 75module_param_named(prof_sel, prof_sel, uint, 0444);
76MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); 76MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
77 77
78static u32 sw_owner_id[4];
79
78enum { 80enum {
79 MLX5_ATOMIC_REQ_MODE_BE = 0x0, 81 MLX5_ATOMIC_REQ_MODE_BE = 0x0,
80 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1, 82 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
@@ -551,6 +553,15 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
551 cache_line_128byte, 553 cache_line_128byte,
552 cache_line_size() == 128 ? 1 : 0); 554 cache_line_size() == 128 ? 1 : 0);
553 555
556 if (MLX5_CAP_GEN_MAX(dev, dct))
557 MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1);
558
559 if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports))
560 MLX5_SET(cmd_hca_cap,
561 set_hca_cap,
562 num_vhca_ports,
563 MLX5_CAP_GEN_MAX(dev, num_vhca_ports));
564
554 err = set_caps(dev, set_ctx, set_sz, 565 err = set_caps(dev, set_ctx, set_sz,
555 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); 566 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
556 567
@@ -1107,7 +1118,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
1107 goto reclaim_boot_pages; 1118 goto reclaim_boot_pages;
1108 } 1119 }
1109 1120
1110 err = mlx5_cmd_init_hca(dev); 1121 err = mlx5_cmd_init_hca(dev, sw_owner_id);
1111 if (err) { 1122 if (err) {
1112 dev_err(&pdev->dev, "init hca failed\n"); 1123 dev_err(&pdev->dev, "init hca failed\n");
1113 goto err_pagealloc_stop; 1124 goto err_pagealloc_stop;
@@ -1643,6 +1654,8 @@ static int __init init(void)
1643{ 1654{
1644 int err; 1655 int err;
1645 1656
1657 get_random_bytes(&sw_owner_id, sizeof(sw_owner_id));
1658
1646 mlx5_core_verify_params(); 1659 mlx5_core_verify_params();
1647 mlx5_register_debugfs(); 1660 mlx5_register_debugfs();
1648 1661
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index ff4a0b889a6f..b05868728da7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -86,7 +86,7 @@ enum {
86 86
87int mlx5_query_hca_caps(struct mlx5_core_dev *dev); 87int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
88int mlx5_query_board_id(struct mlx5_core_dev *dev); 88int mlx5_query_board_id(struct mlx5_core_dev *dev);
89int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); 89int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id);
90int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); 90int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
91int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); 91int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
92void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, 92void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 889130edb715..02d6c5b5d502 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -98,6 +98,11 @@ static u64 sq_allowed_event_types(void)
98 return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR); 98 return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
99} 99}
100 100
101static u64 dct_allowed_event_types(void)
102{
103 return BIT(MLX5_EVENT_TYPE_DCT_DRAINED);
104}
105
101static bool is_event_type_allowed(int rsc_type, int event_type) 106static bool is_event_type_allowed(int rsc_type, int event_type)
102{ 107{
103 switch (rsc_type) { 108 switch (rsc_type) {
@@ -107,6 +112,8 @@ static bool is_event_type_allowed(int rsc_type, int event_type)
107 return BIT(event_type) & rq_allowed_event_types(); 112 return BIT(event_type) & rq_allowed_event_types();
108 case MLX5_EVENT_QUEUE_TYPE_SQ: 113 case MLX5_EVENT_QUEUE_TYPE_SQ:
109 return BIT(event_type) & sq_allowed_event_types(); 114 return BIT(event_type) & sq_allowed_event_types();
115 case MLX5_EVENT_QUEUE_TYPE_DCT:
116 return BIT(event_type) & dct_allowed_event_types();
110 default: 117 default:
111 WARN(1, "Event arrived for unknown resource type"); 118 WARN(1, "Event arrived for unknown resource type");
112 return false; 119 return false;
@@ -116,6 +123,7 @@ static bool is_event_type_allowed(int rsc_type, int event_type)
116void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) 123void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
117{ 124{
118 struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn); 125 struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
126 struct mlx5_core_dct *dct;
119 struct mlx5_core_qp *qp; 127 struct mlx5_core_qp *qp;
120 128
121 if (!common) 129 if (!common)
@@ -134,7 +142,11 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
134 qp = (struct mlx5_core_qp *)common; 142 qp = (struct mlx5_core_qp *)common;
135 qp->event(qp, event_type); 143 qp->event(qp, event_type);
136 break; 144 break;
137 145 case MLX5_RES_DCT:
146 dct = (struct mlx5_core_dct *)common;
147 if (event_type == MLX5_EVENT_TYPE_DCT_DRAINED)
148 complete(&dct->drained);
149 break;
138 default: 150 default:
139 mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn); 151 mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn);
140 } 152 }
@@ -142,9 +154,9 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
142 mlx5_core_put_rsc(common); 154 mlx5_core_put_rsc(common);
143} 155}
144 156
145static int create_qprqsq_common(struct mlx5_core_dev *dev, 157static int create_resource_common(struct mlx5_core_dev *dev,
146 struct mlx5_core_qp *qp, 158 struct mlx5_core_qp *qp,
147 int rsc_type) 159 int rsc_type)
148{ 160{
149 struct mlx5_qp_table *table = &dev->priv.qp_table; 161 struct mlx5_qp_table *table = &dev->priv.qp_table;
150 int err; 162 int err;
@@ -165,8 +177,8 @@ static int create_qprqsq_common(struct mlx5_core_dev *dev,
165 return 0; 177 return 0;
166} 178}
167 179
168static void destroy_qprqsq_common(struct mlx5_core_dev *dev, 180static void destroy_resource_common(struct mlx5_core_dev *dev,
169 struct mlx5_core_qp *qp) 181 struct mlx5_core_qp *qp)
170{ 182{
171 struct mlx5_qp_table *table = &dev->priv.qp_table; 183 struct mlx5_qp_table *table = &dev->priv.qp_table;
172 unsigned long flags; 184 unsigned long flags;
@@ -179,6 +191,40 @@ static void destroy_qprqsq_common(struct mlx5_core_dev *dev,
179 wait_for_completion(&qp->common.free); 191 wait_for_completion(&qp->common.free);
180} 192}
181 193
194int mlx5_core_create_dct(struct mlx5_core_dev *dev,
195 struct mlx5_core_dct *dct,
196 u32 *in, int inlen)
197{
198 u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {0};
199 u32 din[MLX5_ST_SZ_DW(destroy_dct_in)] = {0};
200 u32 dout[MLX5_ST_SZ_DW(destroy_dct_out)] = {0};
201 struct mlx5_core_qp *qp = &dct->mqp;
202 int err;
203
204 init_completion(&dct->drained);
205 MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT);
206
207 err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
208 if (err) {
209 mlx5_core_warn(dev, "create DCT failed, ret %d\n", err);
210 return err;
211 }
212
213 qp->qpn = MLX5_GET(create_dct_out, out, dctn);
214 err = create_resource_common(dev, qp, MLX5_RES_DCT);
215 if (err)
216 goto err_cmd;
217
218 return 0;
219err_cmd:
220 MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
221 MLX5_SET(destroy_dct_in, din, dctn, qp->qpn);
222 mlx5_cmd_exec(dev, (void *)&in, sizeof(din),
223 (void *)&out, sizeof(dout));
224 return err;
225}
226EXPORT_SYMBOL_GPL(mlx5_core_create_dct);
227
182int mlx5_core_create_qp(struct mlx5_core_dev *dev, 228int mlx5_core_create_qp(struct mlx5_core_dev *dev,
183 struct mlx5_core_qp *qp, 229 struct mlx5_core_qp *qp,
184 u32 *in, int inlen) 230 u32 *in, int inlen)
@@ -197,7 +243,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
197 qp->qpn = MLX5_GET(create_qp_out, out, qpn); 243 qp->qpn = MLX5_GET(create_qp_out, out, qpn);
198 mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn); 244 mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
199 245
200 err = create_qprqsq_common(dev, qp, MLX5_RES_QP); 246 err = create_resource_common(dev, qp, MLX5_RES_QP);
201 if (err) 247 if (err)
202 goto err_cmd; 248 goto err_cmd;
203 249
@@ -220,6 +266,47 @@ err_cmd:
220} 266}
221EXPORT_SYMBOL_GPL(mlx5_core_create_qp); 267EXPORT_SYMBOL_GPL(mlx5_core_create_qp);
222 268
269static int mlx5_core_drain_dct(struct mlx5_core_dev *dev,
270 struct mlx5_core_dct *dct)
271{
272 u32 out[MLX5_ST_SZ_DW(drain_dct_out)] = {0};
273 u32 in[MLX5_ST_SZ_DW(drain_dct_in)] = {0};
274 struct mlx5_core_qp *qp = &dct->mqp;
275
276 MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT);
277 MLX5_SET(drain_dct_in, in, dctn, qp->qpn);
278 return mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
279 (void *)&out, sizeof(out));
280}
281
282int mlx5_core_destroy_dct(struct mlx5_core_dev *dev,
283 struct mlx5_core_dct *dct)
284{
285 u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0};
286 u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {0};
287 struct mlx5_core_qp *qp = &dct->mqp;
288 int err;
289
290 err = mlx5_core_drain_dct(dev, dct);
291 if (err) {
292 if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
293 goto destroy;
294 } else {
295 mlx5_core_warn(dev, "failed drain DCT 0x%x with error 0x%x\n", qp->qpn, err);
296 return err;
297 }
298 }
299 wait_for_completion(&dct->drained);
300destroy:
301 destroy_resource_common(dev, &dct->mqp);
302 MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
303 MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
304 err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
305 (void *)&out, sizeof(out));
306 return err;
307}
308EXPORT_SYMBOL_GPL(mlx5_core_destroy_dct);
309
223int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, 310int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
224 struct mlx5_core_qp *qp) 311 struct mlx5_core_qp *qp)
225{ 312{
@@ -229,7 +316,7 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
229 316
230 mlx5_debug_qp_remove(dev, qp); 317 mlx5_debug_qp_remove(dev, qp);
231 318
232 destroy_qprqsq_common(dev, qp); 319 destroy_resource_common(dev, qp);
233 320
234 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 321 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
235 MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); 322 MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
@@ -405,6 +492,20 @@ int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
405} 492}
406EXPORT_SYMBOL_GPL(mlx5_core_qp_query); 493EXPORT_SYMBOL_GPL(mlx5_core_qp_query);
407 494
495int mlx5_core_dct_query(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct,
496 u32 *out, int outlen)
497{
498 u32 in[MLX5_ST_SZ_DW(query_dct_in)] = {0};
499 struct mlx5_core_qp *qp = &dct->mqp;
500
501 MLX5_SET(query_dct_in, in, opcode, MLX5_CMD_OP_QUERY_DCT);
502 MLX5_SET(query_dct_in, in, dctn, qp->qpn);
503
504 return mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
505 (void *)out, outlen);
506}
507EXPORT_SYMBOL_GPL(mlx5_core_dct_query);
508
408int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn) 509int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn)
409{ 510{
410 u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {0}; 511 u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {0};
@@ -441,7 +542,7 @@ int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
441 return err; 542 return err;
442 543
443 rq->qpn = rqn; 544 rq->qpn = rqn;
444 err = create_qprqsq_common(dev, rq, MLX5_RES_RQ); 545 err = create_resource_common(dev, rq, MLX5_RES_RQ);
445 if (err) 546 if (err)
446 goto err_destroy_rq; 547 goto err_destroy_rq;
447 548
@@ -457,7 +558,7 @@ EXPORT_SYMBOL(mlx5_core_create_rq_tracked);
457void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev, 558void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
458 struct mlx5_core_qp *rq) 559 struct mlx5_core_qp *rq)
459{ 560{
460 destroy_qprqsq_common(dev, rq); 561 destroy_resource_common(dev, rq);
461 mlx5_core_destroy_rq(dev, rq->qpn); 562 mlx5_core_destroy_rq(dev, rq->qpn);
462} 563}
463EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked); 564EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
@@ -473,7 +574,7 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
473 return err; 574 return err;
474 575
475 sq->qpn = sqn; 576 sq->qpn = sqn;
476 err = create_qprqsq_common(dev, sq, MLX5_RES_SQ); 577 err = create_resource_common(dev, sq, MLX5_RES_SQ);
477 if (err) 578 if (err)
478 goto err_destroy_sq; 579 goto err_destroy_sq;
479 580
@@ -489,7 +590,7 @@ EXPORT_SYMBOL(mlx5_core_create_sq_tracked);
489void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, 590void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
490 struct mlx5_core_qp *sq) 591 struct mlx5_core_qp *sq)
491{ 592{
492 destroy_qprqsq_common(dev, sq); 593 destroy_resource_common(dev, sq);
493 mlx5_core_destroy_sq(dev, sq->qpn); 594 mlx5_core_destroy_sq(dev, sq->qpn);
494} 595}
495EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked); 596EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index a1296a62497d..dfe36cf6fbea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -36,6 +36,9 @@
36#include <linux/mlx5/vport.h> 36#include <linux/mlx5/vport.h>
37#include "mlx5_core.h" 37#include "mlx5_core.h"
38 38
39/* Mutex to hold while enabling or disabling RoCE */
40static DEFINE_MUTEX(mlx5_roce_en_lock);
41
39static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, 42static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod,
40 u16 vport, u32 *out, int outlen) 43 u16 vport, u32 *out, int outlen)
41{ 44{
@@ -998,17 +1001,35 @@ static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
998 1001
999int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev) 1002int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev)
1000{ 1003{
1001 if (atomic_inc_return(&mdev->roce.roce_en) != 1) 1004 int err = 0;
1002 return 0; 1005
1003 return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED); 1006 mutex_lock(&mlx5_roce_en_lock);
1007 if (!mdev->roce.roce_en)
1008 err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
1009
1010 if (!err)
1011 mdev->roce.roce_en++;
1012 mutex_unlock(&mlx5_roce_en_lock);
1013
1014 return err;
1004} 1015}
1005EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce); 1016EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce);
1006 1017
1007int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev) 1018int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
1008{ 1019{
1009 if (atomic_dec_return(&mdev->roce.roce_en) != 0) 1020 int err = 0;
1010 return 0; 1021
1011 return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED); 1022 mutex_lock(&mlx5_roce_en_lock);
1023 if (mdev->roce.roce_en) {
1024 mdev->roce.roce_en--;
1025 if (mdev->roce.roce_en == 0)
1026 err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
1027
1028 if (err)
1029 mdev->roce.roce_en++;
1030 }
1031 mutex_unlock(&mlx5_roce_en_lock);
1032 return err;
1012} 1033}
1013EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce); 1034EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
1014 1035
@@ -1110,3 +1131,61 @@ ex:
1110 return err; 1131 return err;
1111} 1132}
1112EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context); 1133EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context);
1134
1135int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
1136 struct mlx5_core_dev *port_mdev)
1137{
1138 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
1139 void *in;
1140 int err;
1141
1142 in = kvzalloc(inlen, GFP_KERNEL);
1143 if (!in)
1144 return -ENOMEM;
1145
1146 err = mlx5_nic_vport_enable_roce(port_mdev);
1147 if (err)
1148 goto free;
1149
1150 MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1);
1151 MLX5_SET(modify_nic_vport_context_in, in,
1152 nic_vport_context.affiliated_vhca_id,
1153 MLX5_CAP_GEN(master_mdev, vhca_id));
1154 MLX5_SET(modify_nic_vport_context_in, in,
1155 nic_vport_context.affiliation_criteria,
1156 MLX5_CAP_GEN(port_mdev, affiliate_nic_vport_criteria));
1157
1158 err = mlx5_modify_nic_vport_context(port_mdev, in, inlen);
1159 if (err)
1160 mlx5_nic_vport_disable_roce(port_mdev);
1161
1162free:
1163 kvfree(in);
1164 return err;
1165}
1166EXPORT_SYMBOL_GPL(mlx5_nic_vport_affiliate_multiport);
1167
1168int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev)
1169{
1170 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
1171 void *in;
1172 int err;
1173
1174 in = kvzalloc(inlen, GFP_KERNEL);
1175 if (!in)
1176 return -ENOMEM;
1177
1178 MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1);
1179 MLX5_SET(modify_nic_vport_context_in, in,
1180 nic_vport_context.affiliated_vhca_id, 0);
1181 MLX5_SET(modify_nic_vport_context_in, in,
1182 nic_vport_context.affiliation_criteria, 0);
1183
1184 err = mlx5_modify_nic_vport_context(port_mdev, in, inlen);
1185 if (!err)
1186 mlx5_nic_vport_disable_roce(port_mdev);
1187
1188 kvfree(in);
1189 return err;
1190}
1191EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 409ffb14298a..e5258ee4e38b 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -79,6 +79,11 @@
79 << __mlx5_dw_bit_off(typ, fld))); \ 79 << __mlx5_dw_bit_off(typ, fld))); \
80} while (0) 80} while (0)
81 81
82#define MLX5_ARRAY_SET(typ, p, fld, idx, v) do { \
83 BUILD_BUG_ON(__mlx5_bit_off(typ, fld) % 32); \
84 MLX5_SET(typ, p, fld[idx], v); \
85} while (0)
86
82#define MLX5_SET_TO_ONES(typ, p, fld) do { \ 87#define MLX5_SET_TO_ONES(typ, p, fld) do { \
83 BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32); \ 88 BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32); \
84 *((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \ 89 *((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \
@@ -244,6 +249,8 @@ enum {
244 MLX5_NON_FP_BFREGS_PER_UAR, 249 MLX5_NON_FP_BFREGS_PER_UAR,
245 MLX5_UARS_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE, 250 MLX5_UARS_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
246 MLX5_NON_FP_BFREGS_IN_PAGE = MLX5_NON_FP_BFREGS_PER_UAR * MLX5_UARS_IN_PAGE, 251 MLX5_NON_FP_BFREGS_IN_PAGE = MLX5_NON_FP_BFREGS_PER_UAR * MLX5_UARS_IN_PAGE,
252 MLX5_MIN_DYN_BFREGS = 512,
253 MLX5_MAX_DYN_BFREGS = 1024,
247}; 254};
248 255
249enum { 256enum {
@@ -284,6 +291,7 @@ enum {
284 MLX5_EVENT_QUEUE_TYPE_QP = 0, 291 MLX5_EVENT_QUEUE_TYPE_QP = 0,
285 MLX5_EVENT_QUEUE_TYPE_RQ = 1, 292 MLX5_EVENT_QUEUE_TYPE_RQ = 1,
286 MLX5_EVENT_QUEUE_TYPE_SQ = 2, 293 MLX5_EVENT_QUEUE_TYPE_SQ = 2,
294 MLX5_EVENT_QUEUE_TYPE_DCT = 6,
287}; 295};
288 296
289enum mlx5_event { 297enum mlx5_event {
@@ -319,6 +327,8 @@ enum mlx5_event {
319 MLX5_EVENT_TYPE_PAGE_FAULT = 0xc, 327 MLX5_EVENT_TYPE_PAGE_FAULT = 0xc,
320 MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd, 328 MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd,
321 329
330 MLX5_EVENT_TYPE_DCT_DRAINED = 0x1c,
331
322 MLX5_EVENT_TYPE_FPGA_ERROR = 0x20, 332 MLX5_EVENT_TYPE_FPGA_ERROR = 0x20,
323}; 333};
324 334
@@ -611,6 +621,11 @@ struct mlx5_eqe_pps {
611 u8 rsvd2[12]; 621 u8 rsvd2[12];
612} __packed; 622} __packed;
613 623
624struct mlx5_eqe_dct {
625 __be32 reserved[6];
626 __be32 dctn;
627};
628
614union ev_data { 629union ev_data {
615 __be32 raw[7]; 630 __be32 raw[7];
616 struct mlx5_eqe_cmd cmd; 631 struct mlx5_eqe_cmd cmd;
@@ -626,6 +641,7 @@ union ev_data {
626 struct mlx5_eqe_vport_change vport_change; 641 struct mlx5_eqe_vport_change vport_change;
627 struct mlx5_eqe_port_module port_module; 642 struct mlx5_eqe_port_module port_module;
628 struct mlx5_eqe_pps pps; 643 struct mlx5_eqe_pps pps;
644 struct mlx5_eqe_dct dct;
629} __packed; 645} __packed;
630 646
631struct mlx5_eqe { 647struct mlx5_eqe {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a0610427e168..fb7e8b205eb9 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -155,6 +155,13 @@ enum mlx5_dcbx_oper_mode {
155 MLX5E_DCBX_PARAM_VER_OPER_AUTO = 0x3, 155 MLX5E_DCBX_PARAM_VER_OPER_AUTO = 0x3,
156}; 156};
157 157
158enum mlx5_dct_atomic_mode {
159 MLX5_ATOMIC_MODE_DCT_OFF = 20,
160 MLX5_ATOMIC_MODE_DCT_NONE = 0 << MLX5_ATOMIC_MODE_DCT_OFF,
161 MLX5_ATOMIC_MODE_DCT_IB_COMP = 1 << MLX5_ATOMIC_MODE_DCT_OFF,
162 MLX5_ATOMIC_MODE_DCT_CX = 2 << MLX5_ATOMIC_MODE_DCT_OFF,
163};
164
158enum { 165enum {
159 MLX5_ATOMIC_OPS_CMP_SWAP = 1 << 0, 166 MLX5_ATOMIC_OPS_CMP_SWAP = 1 << 0,
160 MLX5_ATOMIC_OPS_FETCH_ADD = 1 << 1, 167 MLX5_ATOMIC_OPS_FETCH_ADD = 1 << 1,
@@ -231,6 +238,9 @@ struct mlx5_bfreg_info {
231 u32 ver; 238 u32 ver;
232 bool lib_uar_4k; 239 bool lib_uar_4k;
233 u32 num_sys_pages; 240 u32 num_sys_pages;
241 u32 num_static_sys_pages;
242 u32 total_num_bfregs;
243 u32 num_dyn_bfregs;
234}; 244};
235 245
236struct mlx5_cmd_first { 246struct mlx5_cmd_first {
@@ -430,6 +440,7 @@ enum mlx5_res_type {
430 MLX5_RES_SRQ = 3, 440 MLX5_RES_SRQ = 3,
431 MLX5_RES_XSRQ = 4, 441 MLX5_RES_XSRQ = 4,
432 MLX5_RES_XRQ = 5, 442 MLX5_RES_XRQ = 5,
443 MLX5_RES_DCT = MLX5_EVENT_QUEUE_TYPE_DCT,
433}; 444};
434 445
435struct mlx5_core_rsc_common { 446struct mlx5_core_rsc_common {
@@ -788,6 +799,7 @@ struct mlx5_clock {
788 u32 nominal_c_mult; 799 u32 nominal_c_mult;
789 unsigned long overflow_period; 800 unsigned long overflow_period;
790 struct delayed_work overflow_work; 801 struct delayed_work overflow_work;
802 struct mlx5_core_dev *mdev;
791 struct ptp_clock *ptp; 803 struct ptp_clock *ptp;
792 struct ptp_clock_info ptp_info; 804 struct ptp_clock_info ptp_info;
793 struct mlx5_pps pps_info; 805 struct mlx5_pps pps_info;
@@ -826,7 +838,7 @@ struct mlx5_core_dev {
826 struct mlx5e_resources mlx5e_res; 838 struct mlx5e_resources mlx5e_res;
827 struct { 839 struct {
828 struct mlx5_rsvd_gids reserved_gids; 840 struct mlx5_rsvd_gids reserved_gids;
829 atomic_t roce_en; 841 u32 roce_en;
830 } roce; 842 } roce;
831#ifdef CONFIG_MLX5_FPGA 843#ifdef CONFIG_MLX5_FPGA
832 struct mlx5_fpga_device *fpga; 844 struct mlx5_fpga_device *fpga;
@@ -835,6 +847,8 @@ struct mlx5_core_dev {
835 struct cpu_rmap *rmap; 847 struct cpu_rmap *rmap;
836#endif 848#endif
837 struct mlx5_clock clock; 849 struct mlx5_clock clock;
850 struct mlx5_ib_clock_info *clock_info;
851 struct page *clock_info_page;
838}; 852};
839 853
840struct mlx5_db { 854struct mlx5_db {
@@ -1103,7 +1117,7 @@ void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg);
1103unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev); 1117unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev);
1104int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, 1118int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
1105 u8 roce_version, u8 roce_l3_type, const u8 *gid, 1119 u8 roce_version, u8 roce_l3_type, const u8 *gid,
1106 const u8 *mac, bool vlan, u16 vlan_id); 1120 const u8 *mac, bool vlan, u16 vlan_id, u8 port_num);
1107 1121
1108static inline int fw_initializing(struct mlx5_core_dev *dev) 1122static inline int fw_initializing(struct mlx5_core_dev *dev)
1109{ 1123{
@@ -1225,6 +1239,31 @@ static inline bool mlx5_rl_is_supported(struct mlx5_core_dev *dev)
1225 return !!(dev->priv.rl_table.max_size); 1239 return !!(dev->priv.rl_table.max_size);
1226} 1240}
1227 1241
1242static inline int mlx5_core_is_mp_slave(struct mlx5_core_dev *dev)
1243{
1244 return MLX5_CAP_GEN(dev, affiliate_nic_vport_criteria) &&
1245 MLX5_CAP_GEN(dev, num_vhca_ports) <= 1;
1246}
1247
1248static inline int mlx5_core_is_mp_master(struct mlx5_core_dev *dev)
1249{
1250 return MLX5_CAP_GEN(dev, num_vhca_ports) > 1;
1251}
1252
1253static inline int mlx5_core_mp_enabled(struct mlx5_core_dev *dev)
1254{
1255 return mlx5_core_is_mp_slave(dev) ||
1256 mlx5_core_is_mp_master(dev);
1257}
1258
1259static inline int mlx5_core_native_port_num(struct mlx5_core_dev *dev)
1260{
1261 if (!mlx5_core_mp_enabled(dev))
1262 return 1;
1263
1264 return MLX5_CAP_GEN(dev, native_port_num);
1265}
1266
1228enum { 1267enum {
1229 MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, 1268 MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
1230}; 1269};
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1391a82da98e..7ac7bd76c7af 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -502,7 +502,7 @@ struct mlx5_ifc_ads_bits {
502 u8 dei_cfi[0x1]; 502 u8 dei_cfi[0x1];
503 u8 eth_prio[0x3]; 503 u8 eth_prio[0x3];
504 u8 sl[0x4]; 504 u8 sl[0x4];
505 u8 port[0x8]; 505 u8 vhca_port_num[0x8];
506 u8 rmac_47_32[0x10]; 506 u8 rmac_47_32[0x10];
507 507
508 u8 rmac_31_0[0x20]; 508 u8 rmac_31_0[0x20];
@@ -794,7 +794,10 @@ enum {
794}; 794};
795 795
796struct mlx5_ifc_cmd_hca_cap_bits { 796struct mlx5_ifc_cmd_hca_cap_bits {
797 u8 reserved_at_0[0x80]; 797 u8 reserved_at_0[0x30];
798 u8 vhca_id[0x10];
799
800 u8 reserved_at_40[0x40];
798 801
799 u8 log_max_srq_sz[0x8]; 802 u8 log_max_srq_sz[0x8];
800 u8 log_max_qp_sz[0x8]; 803 u8 log_max_qp_sz[0x8];
@@ -1067,7 +1070,12 @@ struct mlx5_ifc_cmd_hca_cap_bits {
1067 u8 reserved_at_5f8[0x3]; 1070 u8 reserved_at_5f8[0x3];
1068 u8 log_max_xrq[0x5]; 1071 u8 log_max_xrq[0x5];
1069 1072
1070 u8 reserved_at_600[0x200]; 1073 u8 affiliate_nic_vport_criteria[0x8];
1074 u8 native_port_num[0x8];
1075 u8 num_vhca_ports[0x8];
1076 u8 reserved_at_618[0x6];
1077 u8 sw_owner_id[0x1];
1078 u8 reserved_at_61f[0x1e1];
1071}; 1079};
1072 1080
1073enum mlx5_flow_destination_type { 1081enum mlx5_flow_destination_type {
@@ -2616,7 +2624,12 @@ struct mlx5_ifc_nic_vport_context_bits {
2616 u8 event_on_mc_address_change[0x1]; 2624 u8 event_on_mc_address_change[0x1];
2617 u8 event_on_uc_address_change[0x1]; 2625 u8 event_on_uc_address_change[0x1];
2618 2626
2619 u8 reserved_at_40[0xf0]; 2627 u8 reserved_at_40[0xc];
2628
2629 u8 affiliation_criteria[0x4];
2630 u8 affiliated_vhca_id[0x10];
2631
2632 u8 reserved_at_60[0xd0];
2620 2633
2621 u8 mtu[0x10]; 2634 u8 mtu[0x10];
2622 2635
@@ -3259,7 +3272,8 @@ struct mlx5_ifc_set_roce_address_in_bits {
3259 u8 op_mod[0x10]; 3272 u8 op_mod[0x10];
3260 3273
3261 u8 roce_address_index[0x10]; 3274 u8 roce_address_index[0x10];
3262 u8 reserved_at_50[0x10]; 3275 u8 reserved_at_50[0xc];
3276 u8 vhca_port_num[0x4];
3263 3277
3264 u8 reserved_at_60[0x20]; 3278 u8 reserved_at_60[0x20];
3265 3279
@@ -3879,7 +3893,8 @@ struct mlx5_ifc_query_roce_address_in_bits {
3879 u8 op_mod[0x10]; 3893 u8 op_mod[0x10];
3880 3894
3881 u8 roce_address_index[0x10]; 3895 u8 roce_address_index[0x10];
3882 u8 reserved_at_50[0x10]; 3896 u8 reserved_at_50[0xc];
3897 u8 vhca_port_num[0x4];
3883 3898
3884 u8 reserved_at_60[0x20]; 3899 u8 reserved_at_60[0x20];
3885}; 3900};
@@ -5311,7 +5326,9 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits {
5311}; 5326};
5312 5327
5313struct mlx5_ifc_modify_nic_vport_field_select_bits { 5328struct mlx5_ifc_modify_nic_vport_field_select_bits {
5314 u8 reserved_at_0[0x14]; 5329 u8 reserved_at_0[0x12];
5330 u8 affiliation[0x1];
5331 u8 reserved_at_e[0x1];
5315 u8 disable_uc_local_lb[0x1]; 5332 u8 disable_uc_local_lb[0x1];
5316 u8 disable_mc_local_lb[0x1]; 5333 u8 disable_mc_local_lb[0x1];
5317 u8 node_guid[0x1]; 5334 u8 node_guid[0x1];
@@ -5532,6 +5549,7 @@ struct mlx5_ifc_init_hca_in_bits {
5532 u8 op_mod[0x10]; 5549 u8 op_mod[0x10];
5533 5550
5534 u8 reserved_at_40[0x40]; 5551 u8 reserved_at_40[0x40];
5552 u8 sw_owner_id[4][0x20];
5535}; 5553};
5536 5554
5537struct mlx5_ifc_init2rtr_qp_out_bits { 5555struct mlx5_ifc_init2rtr_qp_out_bits {
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 62af7512dabb..4778d41085d4 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -473,6 +473,11 @@ struct mlx5_core_qp {
473 int pid; 473 int pid;
474}; 474};
475 475
476struct mlx5_core_dct {
477 struct mlx5_core_qp mqp;
478 struct completion drained;
479};
480
476struct mlx5_qp_path { 481struct mlx5_qp_path {
477 u8 fl_free_ar; 482 u8 fl_free_ar;
478 u8 rsvd3; 483 u8 rsvd3;
@@ -549,6 +554,9 @@ static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev,
549 return radix_tree_lookup(&dev->priv.mkey_table.tree, key); 554 return radix_tree_lookup(&dev->priv.mkey_table.tree, key);
550} 555}
551 556
557int mlx5_core_create_dct(struct mlx5_core_dev *dev,
558 struct mlx5_core_dct *qp,
559 u32 *in, int inlen);
552int mlx5_core_create_qp(struct mlx5_core_dev *dev, 560int mlx5_core_create_qp(struct mlx5_core_dev *dev,
553 struct mlx5_core_qp *qp, 561 struct mlx5_core_qp *qp,
554 u32 *in, 562 u32 *in,
@@ -558,8 +566,12 @@ int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode,
558 struct mlx5_core_qp *qp); 566 struct mlx5_core_qp *qp);
559int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, 567int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
560 struct mlx5_core_qp *qp); 568 struct mlx5_core_qp *qp);
569int mlx5_core_destroy_dct(struct mlx5_core_dev *dev,
570 struct mlx5_core_dct *dct);
561int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, 571int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
562 u32 *out, int outlen); 572 u32 *out, int outlen);
573int mlx5_core_dct_query(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct,
574 u32 *out, int outlen);
563 575
564int mlx5_core_set_delay_drop(struct mlx5_core_dev *dev, 576int mlx5_core_set_delay_drop(struct mlx5_core_dev *dev,
565 u32 timeout_usec); 577 u32 timeout_usec);
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index aaa0bb9e7655..64e193e87394 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -116,4 +116,8 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
116 struct mlx5_hca_vport_context *req); 116 struct mlx5_hca_vport_context *req);
117int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable); 117int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable);
118int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status); 118int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status);
119
120int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
121 struct mlx5_core_dev *port_mdev);
122int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev);
119#endif /* __MLX5_VPORT_H__ */ 123#endif /* __MLX5_VPORT_H__ */
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 18c564f60e93..d656809f1217 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -94,7 +94,7 @@ struct rdma_dev_addr {
94 * The dev_addr->net field must be initialized. 94 * The dev_addr->net field must be initialized.
95 */ 95 */
96int rdma_translate_ip(const struct sockaddr *addr, 96int rdma_translate_ip(const struct sockaddr *addr,
97 struct rdma_dev_addr *dev_addr, u16 *vlan_id); 97 struct rdma_dev_addr *dev_addr);
98 98
99/** 99/**
100 * rdma_resolve_ip - Resolve source and destination IP addresses to 100 * rdma_resolve_ip - Resolve source and destination IP addresses to
@@ -131,10 +131,9 @@ void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
131 131
132int rdma_addr_size(struct sockaddr *addr); 132int rdma_addr_size(struct sockaddr *addr);
133 133
134int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
135int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, 134int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
136 const union ib_gid *dgid, 135 const union ib_gid *dgid,
137 u8 *smac, u16 *vlan_id, int *if_index, 136 u8 *dmac, const struct net_device *ndev,
138 int *hoplimit); 137 int *hoplimit);
139 138
140static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) 139static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
@@ -198,34 +197,15 @@ static inline void rdma_gid2ip(struct sockaddr *out, const union ib_gid *gid)
198 } 197 }
199} 198}
200 199
201static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, 200/*
202 union ib_gid *gid) 201 * rdma_get/set_sgid/dgid() APIs are applicable to IB, and iWarp.
203{ 202 * They are not applicable to RoCE.
204 struct net_device *dev; 203 * RoCE GIDs are derived from the IP addresses.
205 struct in_device *ip4; 204 */
206
207 dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
208 if (dev) {
209 ip4 = in_dev_get(dev);
210 if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address)
211 ipv6_addr_set_v4mapped(ip4->ifa_list->ifa_address,
212 (struct in6_addr *)gid);
213
214 if (ip4)
215 in_dev_put(ip4);
216
217 dev_put(dev);
218 }
219}
220
221static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) 205static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
222{ 206{
223 if (dev_addr->transport == RDMA_TRANSPORT_IB && 207 memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr),
224 dev_addr->dev_type != ARPHRD_INFINIBAND) 208 sizeof(*gid));
225 iboe_addr_get_sgid(dev_addr, gid);
226 else
227 memcpy(gid, dev_addr->src_dev_addr +
228 rdma_addr_gid_offset(dev_addr), sizeof *gid);
229} 209}
230 210
231static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) 211static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 1f7f604db5aa..811cfcfcbe3d 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -549,12 +549,12 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
549 struct rdma_ah_attr *ah_attr); 549 struct rdma_ah_attr *ah_attr);
550 550
551/** 551/**
552 * ib_init_ah_from_path - Initialize address handle attributes based on an SA 552 * ib_init_ah_attr_from_path - Initialize address handle attributes based on
553 * path record. 553 * an SA path record.
554 */ 554 */
555int ib_init_ah_from_path(struct ib_device *device, u8 port_num, 555int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
556 struct sa_path_rec *rec, 556 struct sa_path_rec *rec,
557 struct rdma_ah_attr *ah_attr); 557 struct rdma_ah_attr *ah_attr);
558 558
559/** 559/**
560 * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec 560 * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index fd84cda5ed7c..5263c86fd103 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -63,6 +63,7 @@
63#include <linux/uaccess.h> 63#include <linux/uaccess.h>
64#include <linux/cgroup_rdma.h> 64#include <linux/cgroup_rdma.h>
65#include <uapi/rdma/ib_user_verbs.h> 65#include <uapi/rdma/ib_user_verbs.h>
66#include <rdma/restrack.h>
66 67
67#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN 68#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN
68 69
@@ -300,11 +301,6 @@ struct ib_tm_caps {
300 u32 max_sge; 301 u32 max_sge;
301}; 302};
302 303
303enum ib_cq_creation_flags {
304 IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0,
305 IB_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1,
306};
307
308struct ib_cq_init_attr { 304struct ib_cq_init_attr {
309 unsigned int cqe; 305 unsigned int cqe;
310 int comp_vector; 306 int comp_vector;
@@ -983,9 +979,9 @@ struct ib_wc {
983 u32 invalidate_rkey; 979 u32 invalidate_rkey;
984 } ex; 980 } ex;
985 u32 src_qp; 981 u32 src_qp;
982 u32 slid;
986 int wc_flags; 983 int wc_flags;
987 u16 pkey_index; 984 u16 pkey_index;
988 u32 slid;
989 u8 sl; 985 u8 sl;
990 u8 dlid_path_bits; 986 u8 dlid_path_bits;
991 u8 port_num; /* valid only for DR SMPs on switches */ 987 u8 port_num; /* valid only for DR SMPs on switches */
@@ -1082,6 +1078,7 @@ enum ib_qp_type {
1082 IB_QPT_XRC_INI = 9, 1078 IB_QPT_XRC_INI = 9,
1083 IB_QPT_XRC_TGT, 1079 IB_QPT_XRC_TGT,
1084 IB_QPT_MAX, 1080 IB_QPT_MAX,
1081 IB_QPT_DRIVER = 0xFF,
1085 /* Reserve a range for qp types internal to the low level driver. 1082 /* Reserve a range for qp types internal to the low level driver.
1086 * These qp types will not be visible at the IB core layer, so the 1083 * These qp types will not be visible at the IB core layer, so the
1087 * IB_QPT_MAX usages should not be affected in the core layer 1084 * IB_QPT_MAX usages should not be affected in the core layer
@@ -1529,6 +1526,7 @@ struct ib_pd {
1529 * Implementation details of the RDMA core, don't use in drivers: 1526 * Implementation details of the RDMA core, don't use in drivers:
1530 */ 1527 */
1531 struct ib_mr *__internal_mr; 1528 struct ib_mr *__internal_mr;
1529 struct rdma_restrack_entry res;
1532}; 1530};
1533 1531
1534struct ib_xrcd { 1532struct ib_xrcd {
@@ -1538,6 +1536,10 @@ struct ib_xrcd {
1538 1536
1539 struct mutex tgt_qp_mutex; 1537 struct mutex tgt_qp_mutex;
1540 struct list_head tgt_qp_list; 1538 struct list_head tgt_qp_list;
1539 /*
1540 * Implementation details of the RDMA core, don't use in drivers:
1541 */
1542 struct rdma_restrack_entry res;
1541}; 1543};
1542 1544
1543struct ib_ah { 1545struct ib_ah {
@@ -1569,6 +1571,10 @@ struct ib_cq {
1569 struct irq_poll iop; 1571 struct irq_poll iop;
1570 struct work_struct work; 1572 struct work_struct work;
1571 }; 1573 };
1574 /*
1575 * Implementation details of the RDMA core, don't use in drivers:
1576 */
1577 struct rdma_restrack_entry res;
1572}; 1578};
1573 1579
1574struct ib_srq { 1580struct ib_srq {
@@ -1745,6 +1751,11 @@ struct ib_qp {
1745 struct ib_rwq_ind_table *rwq_ind_tbl; 1751 struct ib_rwq_ind_table *rwq_ind_tbl;
1746 struct ib_qp_security *qp_sec; 1752 struct ib_qp_security *qp_sec;
1747 u8 port; 1753 u8 port;
1754
1755 /*
1756 * Implementation details of the RDMA core, don't use in drivers:
1757 */
1758 struct rdma_restrack_entry res;
1748}; 1759};
1749 1760
1750struct ib_mr { 1761struct ib_mr {
@@ -2351,6 +2362,10 @@ struct ib_device {
2351#endif 2362#endif
2352 2363
2353 u32 index; 2364 u32 index;
2365 /*
2366 * Implementation details of the RDMA core, don't use in drivers
2367 */
2368 struct rdma_restrack_root res;
2354 2369
2355 /** 2370 /**
2356 * The following mandatory functions are used only at device 2371 * The following mandatory functions are used only at device
@@ -2836,8 +2851,7 @@ int ib_modify_port(struct ib_device *device,
2836 struct ib_port_modify *port_modify); 2851 struct ib_port_modify *port_modify);
2837 2852
2838int ib_find_gid(struct ib_device *device, union ib_gid *gid, 2853int ib_find_gid(struct ib_device *device, union ib_gid *gid,
2839 enum ib_gid_type gid_type, struct net_device *ndev, 2854 struct net_device *ndev, u8 *port_num, u16 *index);
2840 u8 *port_num, u16 *index);
2841 2855
2842int ib_find_pkey(struct ib_device *device, 2856int ib_find_pkey(struct ib_device *device,
2843 u8 port_num, u16 pkey, u16 *index); 2857 u8 port_num, u16 pkey, u16 *index);
@@ -2858,7 +2872,7 @@ enum ib_pd_flags {
2858struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, 2872struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
2859 const char *caller); 2873 const char *caller);
2860#define ib_alloc_pd(device, flags) \ 2874#define ib_alloc_pd(device, flags) \
2861 __ib_alloc_pd((device), (flags), __func__) 2875 __ib_alloc_pd((device), (flags), KBUILD_MODNAME)
2862void ib_dealloc_pd(struct ib_pd *pd); 2876void ib_dealloc_pd(struct ib_pd *pd);
2863 2877
2864/** 2878/**
@@ -2905,7 +2919,7 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
2905int ib_get_rdma_header_version(const union rdma_network_hdr *hdr); 2919int ib_get_rdma_header_version(const union rdma_network_hdr *hdr);
2906 2920
2907/** 2921/**
2908 * ib_init_ah_from_wc - Initializes address handle attributes from a 2922 * ib_init_ah_attr_from_wc - Initializes address handle attributes from a
2909 * work completion. 2923 * work completion.
2910 * @device: Device on which the received message arrived. 2924 * @device: Device on which the received message arrived.
2911 * @port_num: Port on which the received message arrived. 2925 * @port_num: Port on which the received message arrived.
@@ -2915,9 +2929,9 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr);
2915 * @ah_attr: Returned attributes that can be used when creating an address 2929 * @ah_attr: Returned attributes that can be used when creating an address
2916 * handle for replying to the message. 2930 * handle for replying to the message.
2917 */ 2931 */
2918int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, 2932int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
2919 const struct ib_wc *wc, const struct ib_grh *grh, 2933 const struct ib_wc *wc, const struct ib_grh *grh,
2920 struct rdma_ah_attr *ah_attr); 2934 struct rdma_ah_attr *ah_attr);
2921 2935
2922/** 2936/**
2923 * ib_create_ah_from_wc - Creates an address handle associated with the 2937 * ib_create_ah_from_wc - Creates an address handle associated with the
@@ -3135,8 +3149,12 @@ static inline int ib_post_recv(struct ib_qp *qp,
3135 return qp->device->post_recv(qp, recv_wr, bad_recv_wr); 3149 return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
3136} 3150}
3137 3151
3138struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, 3152struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
3139 int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx); 3153 int nr_cqe, int comp_vector,
3154 enum ib_poll_context poll_ctx, const char *caller);
3155#define ib_alloc_cq(device, priv, nr_cqe, comp_vect, poll_ctx) \
3156 __ib_alloc_cq((device), (priv), (nr_cqe), (comp_vect), (poll_ctx), KBUILD_MODNAME)
3157
3140void ib_free_cq(struct ib_cq *cq); 3158void ib_free_cq(struct ib_cq *cq);
3141int ib_process_cq_direct(struct ib_cq *cq, int budget); 3159int ib_process_cq_direct(struct ib_cq *cq, int budget);
3142 3160
@@ -3560,8 +3578,11 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
3560/** 3578/**
3561 * ib_alloc_xrcd - Allocates an XRC domain. 3579 * ib_alloc_xrcd - Allocates an XRC domain.
3562 * @device: The device on which to allocate the XRC domain. 3580 * @device: The device on which to allocate the XRC domain.
3581 * @caller: Module name for kernel consumers
3563 */ 3582 */
3564struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device); 3583struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller);
3584#define ib_alloc_xrcd(device) \
3585 __ib_alloc_xrcd((device), KBUILD_MODNAME)
3565 3586
3566/** 3587/**
3567 * ib_dealloc_xrcd - Deallocates an XRC domain. 3588 * ib_dealloc_xrcd - Deallocates an XRC domain.
@@ -3793,8 +3814,7 @@ static inline void rdma_ah_set_grh(struct rdma_ah_attr *attr,
3793static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev, 3814static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev,
3794 u32 port_num) 3815 u32 port_num)
3795{ 3816{
3796 if ((rdma_protocol_roce(dev, port_num)) || 3817 if (rdma_protocol_roce(dev, port_num))
3797 (rdma_protocol_iwarp(dev, port_num)))
3798 return RDMA_AH_ATTR_TYPE_ROCE; 3818 return RDMA_AH_ATTR_TYPE_ROCE;
3799 else if ((rdma_protocol_ib(dev, port_num)) && 3819 else if ((rdma_protocol_ib(dev, port_num)) &&
3800 (rdma_cap_opa_ah(dev, port_num))) 3820 (rdma_cap_opa_ah(dev, port_num)))
@@ -3850,4 +3870,12 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
3850 3870
3851} 3871}
3852 3872
3873/**
3874 * rdma_roce_rescan_device - Rescan all of the network devices in the system
3875 * and add their gids, as needed, to the relevant RoCE devices.
3876 *
3877 * @device: the rdma device
3878 */
3879void rdma_roce_rescan_device(struct ib_device *ibdev);
3880
3853#endif /* IB_VERBS_H */ 3881#endif /* IB_VERBS_H */
diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h
index f68fca296631..2bbb7a67e643 100644
--- a/include/rdma/opa_addr.h
+++ b/include/rdma/opa_addr.h
@@ -114,4 +114,20 @@ static inline u32 opa_get_mcast_base(u32 nr_top_bits)
114 return (be32_to_cpu(OPA_LID_PERMISSIVE) << (32 - nr_top_bits)); 114 return (be32_to_cpu(OPA_LID_PERMISSIVE) << (32 - nr_top_bits));
115} 115}
116 116
117/* Check for a valid unicast LID for non-SM traffic types */
118static inline bool rdma_is_valid_unicast_lid(struct rdma_ah_attr *attr)
119{
120 if (attr->type == RDMA_AH_ATTR_TYPE_IB) {
121 if (!rdma_ah_get_dlid(attr) ||
122 rdma_ah_get_dlid(attr) >=
123 be32_to_cpu(IB_MULTICAST_LID_BASE))
124 return false;
125 } else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) {
126 if (!rdma_ah_get_dlid(attr) ||
127 rdma_ah_get_dlid(attr) >=
128 opa_get_mcast_base(OPA_MCAST_NR))
129 return false;
130 }
131 return true;
132}
117#endif /* OPA_ADDR_H */ 133#endif /* OPA_ADDR_H */
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 3d2eed3c4e75..6538a5cc27b6 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -413,4 +413,23 @@ bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason);
413const void *rdma_consumer_reject_data(struct rdma_cm_id *id, 413const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
414 struct rdma_cm_event *ev, u8 *data_len); 414 struct rdma_cm_event *ev, u8 *data_len);
415 415
416/**
417 * rdma_read_gids - Return the SGID and DGID used for establishing
418 * connection. This can be used after rdma_resolve_addr()
419 * on client side. This can be use on new connection
420 * on server side. This is applicable to IB, RoCE, iWarp.
421 * If cm_id is not bound yet to the RDMA device, it doesn't
422 * copy and SGID or DGID to the given pointers.
423 * @id: Communication identifier whose GIDs are queried.
424 * @sgid: Pointer to SGID where SGID will be returned. It is optional.
425 * @dgid: Pointer to DGID where DGID will be returned. It is optional.
426 * Note: This API should not be used by any new ULPs or new code.
427 * Instead, users interested in querying GIDs should refer to path record
428 * of the rdma_cm_id to query the GIDs.
429 * This API is provided for compatibility for existing users.
430 */
431
432void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid,
433 union ib_gid *dgid);
434
416#endif /* RDMA_CM_H */ 435#endif /* RDMA_CM_H */
diff --git a/include/rdma/rdma_cm_ib.h b/include/rdma/rdma_cm_ib.h
index 6947a6ba2557..6a69d71a21a5 100644
--- a/include/rdma/rdma_cm_ib.h
+++ b/include/rdma/rdma_cm_ib.h
@@ -36,17 +36,17 @@
36#include <rdma/rdma_cm.h> 36#include <rdma/rdma_cm.h>
37 37
38/** 38/**
39 * rdma_set_ib_paths - Manually sets the path records used to establish a 39 * rdma_set_ib_path - Manually sets the path record used to establish a
40 * connection. 40 * connection.
41 * @id: Connection identifier associated with the request. 41 * @id: Connection identifier associated with the request.
42 * @path_rec: Reference to the path record 42 * @path_rec: Reference to the path record
43 * 43 *
44 * This call permits a user to specify routing information for rdma_cm_id's 44 * This call permits a user to specify routing information for rdma_cm_id's
45 * bound to Infiniband devices. It is called on the client side of a 45 * bound to InfiniBand devices. It is called on the client side of a
46 * connection and replaces the call to rdma_resolve_route. 46 * connection and replaces the call to rdma_resolve_route.
47 */ 47 */
48int rdma_set_ib_paths(struct rdma_cm_id *id, 48int rdma_set_ib_path(struct rdma_cm_id *id,
49 struct sa_path_rec *path_rec, int num_paths); 49 struct sa_path_rec *path_rec);
50 50
51/* Global qkey for UDP QPs and multicast groups. */ 51/* Global qkey for UDP QPs and multicast groups. */
52#define RDMA_UDP_QKEY 0x01234567 52#define RDMA_UDP_QKEY 0x01234567
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 1ba84a78f1c5..4118324a0310 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -228,13 +228,6 @@ struct rvt_driver_provided {
228 int (*port_callback)(struct ib_device *, u8, struct kobject *); 228 int (*port_callback)(struct ib_device *, u8, struct kobject *);
229 229
230 /* 230 /*
231 * Returns a string to represent the device for which is being
232 * registered. This is primarily used for error and debug messages on
233 * the console.
234 */
235 const char * (*get_card_name)(struct rvt_dev_info *rdi);
236
237 /*
238 * Returns a pointer to the undelying hardware's PCI device. This is 231 * Returns a pointer to the undelying hardware's PCI device. This is
239 * used to display information as to what hardware is being referenced 232 * used to display information as to what hardware is being referenced
240 * in an output message 233 * in an output message
@@ -419,6 +412,30 @@ struct rvt_dev_info {
419 412
420}; 413};
421 414
415/**
416 * rvt_set_ibdev_name - Craft an IB device name from client info
417 * @rdi: pointer to the client rvt_dev_info structure
418 * @name: client specific name
419 * @unit: client specific unit number.
420 */
421static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi,
422 const char *fmt, const char *name,
423 const int unit)
424{
425 snprintf(rdi->ibdev.name, sizeof(rdi->ibdev.name), fmt, name, unit);
426}
427
428/**
429 * rvt_get_ibdev_name - return the IB name
430 * @rdi: rdmavt device
431 *
432 * Return the registered name of the device.
433 */
434static inline const char *rvt_get_ibdev_name(const struct rvt_dev_info *rdi)
435{
436 return rdi->ibdev.name;
437}
438
422static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) 439static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd)
423{ 440{
424 return container_of(ibpd, struct rvt_pd, ibpd); 441 return container_of(ibpd, struct rvt_pd, ibpd);
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
new file mode 100644
index 000000000000..c2d81167c858
--- /dev/null
+++ b/include/rdma/restrack.h
@@ -0,0 +1,157 @@
1/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
2/*
3 * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
4 */
5
6#ifndef _RDMA_RESTRACK_H_
7#define _RDMA_RESTRACK_H_
8
9#include <linux/typecheck.h>
10#include <linux/rwsem.h>
11#include <linux/sched.h>
12#include <linux/kref.h>
13#include <linux/completion.h>
14
15/**
16 * enum rdma_restrack_type - HW objects to track
17 */
18enum rdma_restrack_type {
19 /**
20 * @RDMA_RESTRACK_PD: Protection domain (PD)
21 */
22 RDMA_RESTRACK_PD,
23 /**
24 * @RDMA_RESTRACK_CQ: Completion queue (CQ)
25 */
26 RDMA_RESTRACK_CQ,
27 /**
28 * @RDMA_RESTRACK_QP: Queue pair (QP)
29 */
30 RDMA_RESTRACK_QP,
31 /**
32 * @RDMA_RESTRACK_XRCD: XRC domain (XRCD)
33 */
34 RDMA_RESTRACK_XRCD,
35 /**
36 * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
37 */
38 RDMA_RESTRACK_MAX
39};
40
41#define RDMA_RESTRACK_HASH_BITS 8
42/**
43 * struct rdma_restrack_root - main resource tracking management
44 * entity, per-device
45 */
46struct rdma_restrack_root {
47 /*
48 * @rwsem: Read/write lock to protect lists
49 */
50 struct rw_semaphore rwsem;
51 /**
52 * @hash: global database for all resources per-device
53 */
54 DECLARE_HASHTABLE(hash, RDMA_RESTRACK_HASH_BITS);
55};
56
57/**
58 * struct rdma_restrack_entry - metadata per-entry
59 */
60struct rdma_restrack_entry {
61 /**
62 * @valid: validity indicator
63 *
64 * The entries are filled during rdma_restrack_add,
65 * can be attempted to be free during rdma_restrack_del.
66 *
67 * As an example for that, see mlx5 QPs with type MLX5_IB_QPT_HW_GSI
68 */
69 bool valid;
70 /*
71 * @kref: Protect destroy of the resource
72 */
73 struct kref kref;
74 /*
75 * @comp: Signal that all consumers of resource are completed their work
76 */
77 struct completion comp;
78 /**
79 * @task: owner of resource tracking entity
80 *
81 * There are two types of entities: created by user and created
82 * by kernel.
83 *
84 * This is relevant for the entities created by users.
85 * For the entities created by kernel, this pointer will be NULL.
86 */
87 struct task_struct *task;
88 /**
89 * @kern_name: name of owner for the kernel created entities.
90 */
91 const char *kern_name;
92 /**
93 * @node: hash table entry
94 */
95 struct hlist_node node;
96 /**
97 * @type: various objects in restrack database
98 */
99 enum rdma_restrack_type type;
100};
101
102/**
103 * rdma_restrack_init() - initialize resource tracking
104 * @res: resource tracking root
105 */
106void rdma_restrack_init(struct rdma_restrack_root *res);
107
108/**
109 * rdma_restrack_clean() - clean resource tracking
110 * @res: resource tracking root
111 */
112void rdma_restrack_clean(struct rdma_restrack_root *res);
113
114/**
115 * rdma_restrack_count() - the current usage of specific object
116 * @res: resource entry
117 * @type: actual type of object to operate
118 * @ns: PID namespace
119 */
120int rdma_restrack_count(struct rdma_restrack_root *res,
121 enum rdma_restrack_type type,
122 struct pid_namespace *ns);
123
124/**
125 * rdma_restrack_add() - add object to the reource tracking database
126 * @res: resource entry
127 */
128void rdma_restrack_add(struct rdma_restrack_entry *res);
129
130/**
131 * rdma_restrack_del() - delete object from the reource tracking database
132 * @res: resource entry
133 * @type: actual type of object to operate
134 */
135void rdma_restrack_del(struct rdma_restrack_entry *res);
136
137/**
138 * rdma_is_kernel_res() - check the owner of resource
139 * @res: resource entry
140 */
141static inline bool rdma_is_kernel_res(struct rdma_restrack_entry *res)
142{
143 return !res->task;
144}
145
146/**
147 * rdma_restrack_get() - grab to protect resource from release
148 * @res: resource entry
149 */
150int __must_check rdma_restrack_get(struct rdma_restrack_entry *res);
151
152/**
153 * rdma_restrack_put() - relase resource
154 * @res: resource entry
155 */
156int rdma_restrack_put(struct rdma_restrack_entry *res);
157#endif /* _RDMA_RESTRACK_H_ */
diff --git a/include/scsi/srp.h b/include/scsi/srp.h
index 5be834de491a..c16a3c9a4d9b 100644
--- a/include/scsi/srp.h
+++ b/include/scsi/srp.h
@@ -129,6 +129,23 @@ struct srp_login_req {
129 u8 target_port_id[16]; 129 u8 target_port_id[16];
130}; 130};
131 131
132/**
133 * struct srp_login_req_rdma - RDMA/CM login parameters.
134 *
135 * RDMA/CM over InfiniBand can only carry 92 - 36 = 56 bytes of private
136 * data. The %srp_login_req_rdma structure contains the same information as
137 * %srp_login_req but with the reserved data removed.
138 */
139struct srp_login_req_rdma {
140 u64 tag;
141 __be16 req_buf_fmt;
142 u8 req_flags;
143 u8 opcode;
144 __be32 req_it_iu_len;
145 u8 initiator_port_id[16];
146 u8 target_port_id[16];
147};
148
132/* 149/*
133 * The SRP spec defines the size of the LOGIN_RSP structure to be 52 150 * The SRP spec defines the size of the LOGIN_RSP structure to be 52
134 * bytes, so it needs to be packed to avoid having it padded to 56 151 * bytes, so it needs to be packed to avoid having it padded to 56
diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h
index 398a514ee446..db54115be044 100644
--- a/include/uapi/rdma/bnxt_re-abi.h
+++ b/include/uapi/rdma/bnxt_re-abi.h
@@ -82,6 +82,15 @@ struct bnxt_re_qp_resp {
82 __u32 rsvd; 82 __u32 rsvd;
83}; 83};
84 84
85struct bnxt_re_srq_req {
86 __u64 srqva;
87 __u64 srq_handle;
88};
89
90struct bnxt_re_srq_resp {
91 __u32 srqid;
92};
93
85enum bnxt_re_shpg_offt { 94enum bnxt_re_shpg_offt {
86 BNXT_RE_BEG_RESV_OFFT = 0x00, 95 BNXT_RE_BEG_RESV_OFFT = 0x00,
87 BNXT_RE_AVID_OFFT = 0x10, 96 BNXT_RE_AVID_OFFT = 0x10,
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 7e11bb8651b6..04d0e67b1312 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -402,13 +402,18 @@ struct ib_uverbs_create_cq {
402 __u64 driver_data[0]; 402 __u64 driver_data[0];
403}; 403};
404 404
405enum ib_uverbs_ex_create_cq_flags {
406 IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0,
407 IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1,
408};
409
405struct ib_uverbs_ex_create_cq { 410struct ib_uverbs_ex_create_cq {
406 __u64 user_handle; 411 __u64 user_handle;
407 __u32 cqe; 412 __u32 cqe;
408 __u32 comp_vector; 413 __u32 comp_vector;
409 __s32 comp_channel; 414 __s32 comp_channel;
410 __u32 comp_mask; 415 __u32 comp_mask;
411 __u32 flags; 416 __u32 flags; /* bitmask of ib_uverbs_ex_create_cq_flags */
412 __u32 reserved; 417 __u32 reserved;
413}; 418};
414 419
@@ -449,7 +454,7 @@ struct ib_uverbs_wc {
449 __u32 vendor_err; 454 __u32 vendor_err;
450 __u32 byte_len; 455 __u32 byte_len;
451 union { 456 union {
452 __u32 imm_data; 457 __be32 imm_data;
453 __u32 invalidate_rkey; 458 __u32 invalidate_rkey;
454 } ex; 459 } ex;
455 __u32 qp_num; 460 __u32 qp_num;
@@ -765,7 +770,7 @@ struct ib_uverbs_send_wr {
765 __u32 opcode; 770 __u32 opcode;
766 __u32 send_flags; 771 __u32 send_flags;
767 union { 772 union {
768 __u32 imm_data; 773 __be32 imm_data;
769 __u32 invalidate_rkey; 774 __u32 invalidate_rkey;
770 } ex; 775 } ex;
771 union { 776 union {
diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h
index 224b52b6279c..7f9c37346613 100644
--- a/include/uapi/rdma/mlx4-abi.h
+++ b/include/uapi/rdma/mlx4-abi.h
@@ -97,8 +97,8 @@ struct mlx4_ib_create_srq_resp {
97}; 97};
98 98
99struct mlx4_ib_create_qp_rss { 99struct mlx4_ib_create_qp_rss {
100 __u64 rx_hash_fields_mask; 100 __u64 rx_hash_fields_mask; /* Use enum mlx4_ib_rx_hash_fields */
101 __u8 rx_hash_function; 101 __u8 rx_hash_function; /* Use enum mlx4_ib_rx_hash_function_flags */
102 __u8 reserved[7]; 102 __u8 reserved[7];
103 __u8 rx_hash_key[40]; 103 __u8 rx_hash_key[40];
104 __u32 comp_mask; 104 __u32 comp_mask;
@@ -152,7 +152,8 @@ enum mlx4_ib_rx_hash_fields {
152 MLX4_IB_RX_HASH_SRC_PORT_TCP = 1 << 4, 152 MLX4_IB_RX_HASH_SRC_PORT_TCP = 1 << 4,
153 MLX4_IB_RX_HASH_DST_PORT_TCP = 1 << 5, 153 MLX4_IB_RX_HASH_DST_PORT_TCP = 1 << 5,
154 MLX4_IB_RX_HASH_SRC_PORT_UDP = 1 << 6, 154 MLX4_IB_RX_HASH_SRC_PORT_UDP = 1 << 6,
155 MLX4_IB_RX_HASH_DST_PORT_UDP = 1 << 7 155 MLX4_IB_RX_HASH_DST_PORT_UDP = 1 << 7,
156 MLX4_IB_RX_HASH_INNER = 1ULL << 31,
156}; 157};
157 158
158#endif /* MLX4_ABI_USER_H */ 159#endif /* MLX4_ABI_USER_H */
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index a33e0517d3fd..1111aa4e7c1e 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -41,6 +41,9 @@ enum {
41 MLX5_QP_FLAG_SIGNATURE = 1 << 0, 41 MLX5_QP_FLAG_SIGNATURE = 1 << 0,
42 MLX5_QP_FLAG_SCATTER_CQE = 1 << 1, 42 MLX5_QP_FLAG_SCATTER_CQE = 1 << 1,
43 MLX5_QP_FLAG_TUNNEL_OFFLOADS = 1 << 2, 43 MLX5_QP_FLAG_TUNNEL_OFFLOADS = 1 << 2,
44 MLX5_QP_FLAG_BFREG_INDEX = 1 << 3,
45 MLX5_QP_FLAG_TYPE_DCT = 1 << 4,
46 MLX5_QP_FLAG_TYPE_DCI = 1 << 5,
44}; 47};
45 48
46enum { 49enum {
@@ -121,10 +124,12 @@ struct mlx5_ib_alloc_ucontext_resp {
121 __u8 cqe_version; 124 __u8 cqe_version;
122 __u8 cmds_supp_uhw; 125 __u8 cmds_supp_uhw;
123 __u8 eth_min_inline; 126 __u8 eth_min_inline;
124 __u8 reserved2; 127 __u8 clock_info_versions;
125 __u64 hca_core_clock_offset; 128 __u64 hca_core_clock_offset;
126 __u32 log_uar_size; 129 __u32 log_uar_size;
127 __u32 num_uars_per_page; 130 __u32 num_uars_per_page;
131 __u32 num_dyn_bfregs;
132 __u32 reserved3;
128}; 133};
129 134
130struct mlx5_ib_alloc_pd_resp { 135struct mlx5_ib_alloc_pd_resp {
@@ -280,8 +285,11 @@ struct mlx5_ib_create_qp {
280 __u32 rq_wqe_shift; 285 __u32 rq_wqe_shift;
281 __u32 flags; 286 __u32 flags;
282 __u32 uidx; 287 __u32 uidx;
283 __u32 reserved0; 288 __u32 bfreg_index;
284 __u64 sq_buf_addr; 289 union {
290 __u64 sq_buf_addr;
291 __u64 access_key;
292 };
285}; 293};
286 294
287/* RX Hash function flags */ 295/* RX Hash function flags */
@@ -307,7 +315,7 @@ enum mlx5_rx_hash_fields {
307 MLX5_RX_HASH_SRC_PORT_UDP = 1 << 6, 315 MLX5_RX_HASH_SRC_PORT_UDP = 1 << 6,
308 MLX5_RX_HASH_DST_PORT_UDP = 1 << 7, 316 MLX5_RX_HASH_DST_PORT_UDP = 1 << 7,
309 /* Save bits for future fields */ 317 /* Save bits for future fields */
310 MLX5_RX_HASH_INNER = 1 << 31 318 MLX5_RX_HASH_INNER = (1UL << 31),
311}; 319};
312 320
313struct mlx5_ib_create_qp_rss { 321struct mlx5_ib_create_qp_rss {
@@ -354,6 +362,11 @@ struct mlx5_ib_create_ah_resp {
354 __u8 reserved[6]; 362 __u8 reserved[6];
355}; 363};
356 364
365struct mlx5_ib_modify_qp_resp {
366 __u32 response_length;
367 __u32 dctn;
368};
369
357struct mlx5_ib_create_wq_resp { 370struct mlx5_ib_create_wq_resp {
358 __u32 response_length; 371 __u32 response_length;
359 __u32 reserved; 372 __u32 reserved;
@@ -368,4 +381,36 @@ struct mlx5_ib_modify_wq {
368 __u32 comp_mask; 381 __u32 comp_mask;
369 __u32 reserved; 382 __u32 reserved;
370}; 383};
384
385struct mlx5_ib_clock_info {
386 __u32 sign;
387 __u32 resv;
388 __u64 nsec;
389 __u64 cycles;
390 __u64 frac;
391 __u32 mult;
392 __u32 shift;
393 __u64 mask;
394 __u64 overflow_period;
395};
396
397enum mlx5_ib_mmap_cmd {
398 MLX5_IB_MMAP_REGULAR_PAGE = 0,
399 MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1,
400 MLX5_IB_MMAP_WC_PAGE = 2,
401 MLX5_IB_MMAP_NC_PAGE = 3,
402 /* 5 is chosen in order to be compatible with old versions of libmlx5 */
403 MLX5_IB_MMAP_CORE_CLOCK = 5,
404 MLX5_IB_MMAP_ALLOC_WC = 6,
405 MLX5_IB_MMAP_CLOCK_INFO = 7,
406};
407
408enum {
409 MLX5_IB_CLOCK_INFO_KERNEL_UPDATING = 1,
410};
411
412/* Bit indexes for the mlx5_alloc_ucontext_resp.clock_info_versions bitmap */
413enum {
414 MLX5_IB_CLOCK_INFO_V1 = 0,
415};
371#endif /* MLX5_ABI_USER_H */ 416#endif /* MLX5_ABI_USER_H */
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index cc002e316d09..17e59bec169e 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -236,6 +236,10 @@ enum rdma_nldev_command {
236 RDMA_NLDEV_CMD_PORT_NEW, 236 RDMA_NLDEV_CMD_PORT_NEW,
237 RDMA_NLDEV_CMD_PORT_DEL, 237 RDMA_NLDEV_CMD_PORT_DEL,
238 238
239 RDMA_NLDEV_CMD_RES_GET, /* can dump */
240
241 RDMA_NLDEV_CMD_RES_QP_GET, /* can dump */
242
239 RDMA_NLDEV_NUM_OPS 243 RDMA_NLDEV_NUM_OPS
240}; 244};
241 245
@@ -303,6 +307,51 @@ enum rdma_nldev_attr {
303 307
304 RDMA_NLDEV_ATTR_DEV_NODE_TYPE, /* u8 */ 308 RDMA_NLDEV_ATTR_DEV_NODE_TYPE, /* u8 */
305 309
310 RDMA_NLDEV_ATTR_RES_SUMMARY, /* nested table */
311 RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY, /* nested table */
312 RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, /* string */
313 RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, /* u64 */
314
315 RDMA_NLDEV_ATTR_RES_QP, /* nested table */
316 RDMA_NLDEV_ATTR_RES_QP_ENTRY, /* nested table */
317 /*
318 * Local QPN
319 */
320 RDMA_NLDEV_ATTR_RES_LQPN, /* u32 */
321 /*
322 * Remote QPN,
323 * Applicable for RC and UC only IBTA 11.2.5.3 QUERY QUEUE PAIR
324 */
325 RDMA_NLDEV_ATTR_RES_RQPN, /* u32 */
326 /*
327 * Receive Queue PSN,
328 * Applicable for RC and UC only 11.2.5.3 QUERY QUEUE PAIR
329 */
330 RDMA_NLDEV_ATTR_RES_RQ_PSN, /* u32 */
331 /*
332 * Send Queue PSN
333 */
334 RDMA_NLDEV_ATTR_RES_SQ_PSN, /* u32 */
335 RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE, /* u8 */
336 /*
337 * QP types as visible to RDMA/core, the reserved QPT
338 * are not exported through this interface.
339 */
340 RDMA_NLDEV_ATTR_RES_TYPE, /* u8 */
341 RDMA_NLDEV_ATTR_RES_STATE, /* u8 */
342 /*
343 * Process ID which created object,
344 * in case of kernel origin, PID won't exist.
345 */
346 RDMA_NLDEV_ATTR_RES_PID, /* u32 */
347 /*
348 * The name of process created following resource.
349 * It will exist only for kernel objects.
350 * For user created objects, the user is supposed
351 * to read /proc/PID/comm file.
352 */
353 RDMA_NLDEV_ATTR_RES_KERN_NAME, /* string */
354
306 RDMA_NLDEV_ATTR_MAX 355 RDMA_NLDEV_ATTR_MAX
307}; 356};
308#endif /* _UAPI_RDMA_NETLINK_H */ 357#endif /* _UAPI_RDMA_NETLINK_H */
diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h
index aaa352f2f110..02ca0d0f1eb7 100644
--- a/include/uapi/rdma/vmw_pvrdma-abi.h
+++ b/include/uapi/rdma/vmw_pvrdma-abi.h
@@ -52,12 +52,14 @@
52#define PVRDMA_UVERBS_ABI_VERSION 3 /* ABI Version. */ 52#define PVRDMA_UVERBS_ABI_VERSION 3 /* ABI Version. */
53#define PVRDMA_UAR_HANDLE_MASK 0x00FFFFFF /* Bottom 24 bits. */ 53#define PVRDMA_UAR_HANDLE_MASK 0x00FFFFFF /* Bottom 24 bits. */
54#define PVRDMA_UAR_QP_OFFSET 0 /* QP doorbell. */ 54#define PVRDMA_UAR_QP_OFFSET 0 /* QP doorbell. */
55#define PVRDMA_UAR_QP_SEND BIT(30) /* Send bit. */ 55#define PVRDMA_UAR_QP_SEND (1 << 30) /* Send bit. */
56#define PVRDMA_UAR_QP_RECV BIT(31) /* Recv bit. */ 56#define PVRDMA_UAR_QP_RECV (1 << 31) /* Recv bit. */
57#define PVRDMA_UAR_CQ_OFFSET 4 /* CQ doorbell. */ 57#define PVRDMA_UAR_CQ_OFFSET 4 /* CQ doorbell. */
58#define PVRDMA_UAR_CQ_ARM_SOL BIT(29) /* Arm solicited bit. */ 58#define PVRDMA_UAR_CQ_ARM_SOL (1 << 29) /* Arm solicited bit. */
59#define PVRDMA_UAR_CQ_ARM BIT(30) /* Arm bit. */ 59#define PVRDMA_UAR_CQ_ARM (1 << 30) /* Arm bit. */
60#define PVRDMA_UAR_CQ_POLL BIT(31) /* Poll bit. */ 60#define PVRDMA_UAR_CQ_POLL (1 << 31) /* Poll bit. */
61#define PVRDMA_UAR_SRQ_OFFSET 8 /* SRQ doorbell. */
62#define PVRDMA_UAR_SRQ_RECV (1 << 30) /* Recv bit. */
61 63
62enum pvrdma_wr_opcode { 64enum pvrdma_wr_opcode {
63 PVRDMA_WR_RDMA_WRITE, 65 PVRDMA_WR_RDMA_WRITE,
diff --git a/lib/kobject.c b/lib/kobject.c
index 763d70a18941..06b849eee0ca 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -1039,6 +1039,7 @@ void *kobj_ns_grab_current(enum kobj_ns_type type)
1039 1039
1040 return ns; 1040 return ns;
1041} 1041}
1042EXPORT_SYMBOL_GPL(kobj_ns_grab_current);
1042 1043
1043const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk) 1044const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk)
1044{ 1045{
@@ -1074,3 +1075,4 @@ void kobj_ns_drop(enum kobj_ns_type type, void *ns)
1074 kobj_ns_ops_tbl[type]->drop_ns(ns); 1075 kobj_ns_ops_tbl[type]->drop_ns(ns);
1075 spin_unlock(&kobj_ns_type_lock); 1076 spin_unlock(&kobj_ns_type_lock);
1076} 1077}
1078EXPORT_SYMBOL_GPL(kobj_ns_drop);
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 36dd2099048a..b2a5067b4afe 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -301,13 +301,11 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
301 memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); 301 memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
302 if (rds_conn_state(conn) == RDS_CONN_UP) { 302 if (rds_conn_state(conn) == RDS_CONN_UP) {
303 struct rds_ib_device *rds_ibdev; 303 struct rds_ib_device *rds_ibdev;
304 struct rdma_dev_addr *dev_addr;
305 304
306 ic = conn->c_transport_data; 305 ic = conn->c_transport_data;
307 dev_addr = &ic->i_cm_id->route.addr.dev_addr;
308 306
309 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); 307 rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid,
310 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); 308 (union ib_gid *)&iinfo->dst_gid);
311 309
312 rds_ibdev = ic->rds_ibdev; 310 rds_ibdev = ic->rds_ibdev;
313 iinfo->max_send_wr = ic->i_send_ring.w_nr; 311 iinfo->max_send_wr = ic->i_send_ring.w_nr;