aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-17 15:44:48 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-17 15:44:48 -0400
commit9bd553929f68921be0f2014dd06561e0c8249a0d (patch)
tree720e556374e3500af9a0210178fabfc6bd0f754c
parent022ff62c3d8c3758d15ccc6b58615fd8f257ba85 (diff)
parent0a3173a5f09bc58a3638ecfd0a80bdbae55e123c (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "This has been a large cycle for RDMA, with several major patch series reworking parts of the core code. - Rework the so-called 'gid cache' and internal APIs to use a kref'd pointer to a struct instead of copying, push this upwards into the callers and add more stuff to the struct. The new design avoids some ugly races the old one suffered with. This is part of the namespace enablement work as the new struct is learning to be namespace aware. - Various uapi cleanups, moving more stuff to include/uapi and fixing some long standing bugs that have recently been discovered. - Driver updates for mlx5, mlx4 i40iw, rxe, cxgb4, hfi1, usnic, pvrdma, and hns - Provide max_send_sge and max_recv_sge attributes to better support HW where these values are asymmetric. - mlx5 user API 'devx' allows sending commands directly to the device FW, instead of trying to cram every wild and niche feature into the common API. Sort of like what GPU does. - Major write() and ioctl() API rework to cleanly support PCI device hot unplug and advance the ioctl conversion work - Sparse and compile warning cleanups - Add 'const' to the ib_poll_cq() signature, and permit a NULL 'bad_wr', which is the common use case - Various patches to avoid high order allocations across the stack - SRQ support for cxgb4, hns and qedr - Changes to IPoIB to better follow the netdev model for working with struct net_device liftime" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (312 commits) Revert "net/smc: Replace ib_query_gid with rdma_get_gid_attr" RDMA/hns: Fix usage of bitmap allocation functions return values IB/core: Change filter function return type from int to bool IB/core: Update GID entries for netdevice whose mac address changes IB/core: Add default GIDs of the bond master netdev IB/core: Consider adding default GIDs of bond device IB/core: Delete lower netdevice default GID entries in bonding scenario IB/core: Avoid confusing del_netdev_default_ips IB/core: Add comment for change upper netevent handling qedr: Add user space support for SRQ qedr: Add support for kernel mode SRQ's qedr: Add wrapping generic structure for qpidr and adjust idr routines. IB/mlx5: Fix leaking stack memory to userspace Update the e-mail address of Bart Van Assche IB/ucm: Fix compiling ucm.c IB/uverbs: Do not check for device disassociation during ioctl IB/uverbs: Remove struct uverbs_root_spec and all supporting code IB/uverbs: Use uverbs_api to unmarshal ioctl commands IB/uverbs: Use uverbs_alloc for allocations IB/uverbs: Add a simple allocator to uverbs_attr_bundle ...
-rw-r--r--.mailmap2
-rw-r--r--MAINTAINERS16
-rw-r--r--drivers/infiniband/Kconfig2
-rw-r--r--drivers/infiniband/core/Makefile5
-rw-r--r--drivers/infiniband/core/addr.c16
-rw-r--r--drivers/infiniband/core/cache.c724
-rw-r--r--drivers/infiniband/core/cm.c147
-rw-r--r--drivers/infiniband/core/cm_msgs.h7
-rw-r--r--drivers/infiniband/core/cma.c362
-rw-r--r--drivers/infiniband/core/core_priv.h4
-rw-r--r--drivers/infiniband/core/device.c23
-rw-r--r--drivers/infiniband/core/mad.c113
-rw-r--r--drivers/infiniband/core/mad_priv.h7
-rw-r--r--drivers/infiniband/core/multicast.c40
-rw-r--r--drivers/infiniband/core/nldev.c16
-rw-r--r--drivers/infiniband/core/rdma_core.c1018
-rw-r--r--drivers/infiniband/core/rdma_core.h96
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c306
-rw-r--r--drivers/infiniband/core/rw.c8
-rw-r--r--drivers/infiniband/core/sa_query.c138
-rw-r--r--drivers/infiniband/core/sysfs.c66
-rw-r--r--drivers/infiniband/core/ucm.c15
-rw-r--r--drivers/infiniband/core/umem.c62
-rw-r--r--drivers/infiniband/core/user_mad.c1
-rw-r--r--drivers/infiniband/core/uverbs.h34
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c599
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c709
-rw-r--r--drivers/infiniband/core/uverbs_ioctl_merge.c664
-rw-r--r--drivers/infiniband/core/uverbs_main.c232
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c2
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c200
-rw-r--r--drivers/infiniband/core/uverbs_std_types_counters.c108
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c154
-rw-r--r--drivers/infiniband/core/uverbs_std_types_dm.c61
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c170
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c88
-rw-r--r--drivers/infiniband/core/uverbs_uapi.c346
-rw-r--r--drivers/infiniband/core/verbs.c523
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c144
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h15
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c12
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c4
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c64
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c44
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h8
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c32
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c90
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c269
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c20
-rw-r--r--drivers/infiniband/hw/cxgb4/ev.c5
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h57
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c56
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c1051
-rw-r--r--drivers/infiniband/hw/cxgb4/resource.c51
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h164
-rw-r--r--drivers/infiniband/hw/cxgb4/t4fw_ri_api.h68
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c205
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h30
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c63
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c10
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h243
-rw-r--r--drivers/infiniband/hw/hfi1/init.c44
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c19
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c14
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c6
-rw-r--r--drivers/infiniband/hw/hfi1/qp.h24
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c6
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c14
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c10
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c18
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c12
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c21
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_common.h9
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_db.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h45
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c7
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c430
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.h2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c698
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h136
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c15
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_pd.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c55
-rw-r--r--drivers/infiniband/hw/i40iw/Kconfig1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c26
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c83
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c54
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c70
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c29
-rw-r--r--drivers/infiniband/hw/mlx4/main.c41
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h21
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c421
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c4
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c11
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c12
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h1
-rw-r--r--drivers/infiniband/hw/mlx5/cong.c9
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c2
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c1119
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c252
-rw-r--r--drivers/infiniband/hw/mlx5/gsi.c8
-rw-r--r--drivers/infiniband/hw/mlx5/main.c570
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h85
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c34
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c294
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c5
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h24
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c7
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c24
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c8
-rw-r--r--drivers/infiniband/hw/nes/nes.h2
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c8
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c6
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c74
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c32
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.h1
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c26
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c1
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c50
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h12
-rw-r--r--drivers/infiniband/hw/qedr/main.c107
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h43
-rw-r--r--drivers/infiniband/hw/qedr/qedr_hsi_rdma.h11
-rw-r--r--drivers/infiniband/hw/qedr/qedr_iw_cm.c12
-rw-r--r--drivers/infiniband/hw/qedr/qedr_roce_cm.c37
-rw-r--r--drivers/infiniband/hw/qedr/qedr_roce_cm.h8
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c625
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h17
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h5
-rw-r--r--drivers/infiniband/hw/usnic/Kconfig2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.c4
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c10
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h8
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c40
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.h5
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma.h5
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c26
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c52
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c11
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c7
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c6
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h15
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c4
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c27
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.h12
-rw-r--r--drivers/infiniband/sw/rdmavt/srq.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_av.c30
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c67
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c31
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c24
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c70
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h32
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c81
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c15
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c444
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c23
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c261
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c16
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c5
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c14
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c26
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c27
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c71
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h4
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_msg.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c81
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c37
-rw-r--r--drivers/nvme/host/rdma.c13
-rw-r--r--drivers/nvme/target/rdma.c13
-rw-r--r--fs/cifs/smbdirect.c32
-rw-r--r--include/linux/idr.h11
-rw-r--r--include/linux/mlx5/driver.h3
-rw-r--r--include/linux/mlx5/fs.h1
-rw-r--r--include/linux/mlx5/mlx5_ifc.h5
-rw-r--r--include/linux/overflow.h31
-rw-r--r--include/rdma/ib.h4
-rw-r--r--include/rdma/ib_addr.h6
-rw-r--r--include/rdma/ib_cache.h83
-rw-r--r--include/rdma/ib_cm.h18
-rw-r--r--include/rdma/ib_mad.h33
-rw-r--r--include/rdma/ib_sa.h49
-rw-r--r--include/rdma/ib_verbs.h257
-rw-r--r--include/rdma/opa_addr.h2
-rw-r--r--include/rdma/rdma_cm.h2
-rw-r--r--include/rdma/rdmavt_qp.h30
-rw-r--r--include/rdma/uverbs_ioctl.h627
-rw-r--r--include/rdma/uverbs_named_ioctl.h109
-rw-r--r--include/rdma/uverbs_std_types.h96
-rw-r--r--include/rdma/uverbs_types.h133
-rw-r--r--include/uapi/rdma/cxgb4-abi.h32
-rw-r--r--include/uapi/rdma/hns-abi.h1
-rw-r--r--include/uapi/rdma/ib_user_ioctl_cmds.h7
-rw-r--r--include/uapi/rdma/ib_user_ioctl_verbs.h58
-rw-r--r--include/uapi/rdma/ib_user_verbs.h5
-rw-r--r--include/uapi/rdma/mlx5-abi.h6
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_cmds.h121
-rw-r--r--include/uapi/rdma/qedr-abi.h17
-rw-r--r--include/uapi/rdma/rdma_user_ioctl_cmds.h7
-rw-r--r--lib/test_overflow.c198
-rw-r--r--net/9p/trans_rdma.c8
-rw-r--r--net/core/secure_seq.c1
-rw-r--r--net/rds/ib.c2
-rw-r--r--net/rds/ib_frmr.c11
-rw-r--r--net/rds/ib_recv.c6
-rw-r--r--net/rds/ib_send.c6
-rw-r--r--net/smc/smc_core.c1
-rw-r--r--net/smc/smc_ib.c1
-rw-r--r--net/smc/smc_tx.c3
-rw-r--r--net/smc/smc_wr.c9
-rw-r--r--net/smc/smc_wr.h3
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c4
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c7
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c3
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c3
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c3
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c2
-rw-r--r--net/sunrpc/xprtrdma/verbs.c5
233 files changed, 12380 insertions, 7059 deletions
diff --git a/.mailmap b/.mailmap
index d96147eb1a68..2a6f685bf706 100644
--- a/.mailmap
+++ b/.mailmap
@@ -31,6 +31,8 @@ Arnaud Patard <arnaud.patard@rtp-net.org>
31Arnd Bergmann <arnd@arndb.de> 31Arnd Bergmann <arnd@arndb.de>
32Axel Dyks <xl@xlsigned.net> 32Axel Dyks <xl@xlsigned.net>
33Axel Lin <axel.lin@gmail.com> 33Axel Lin <axel.lin@gmail.com>
34Bart Van Assche <bvanassche@acm.org> <bart.vanassche@wdc.com>
35Bart Van Assche <bvanassche@acm.org> <bart.vanassche@sandisk.com>
34Ben Gardner <bgardner@wabtec.com> 36Ben Gardner <bgardner@wabtec.com>
35Ben M Cahill <ben.m.cahill@intel.com> 37Ben M Cahill <ben.m.cahill@intel.com>
36Björn Steinbrink <B.Steinbrink@gmx.de> 38Björn Steinbrink <B.Steinbrink@gmx.de>
diff --git a/MAINTAINERS b/MAINTAINERS
index 7378426b663c..b6dcaaa23adf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3536,7 +3536,6 @@ F: drivers/net/ethernet/cisco/enic/
3536 3536
3537CISCO VIC LOW LATENCY NIC DRIVER 3537CISCO VIC LOW LATENCY NIC DRIVER
3538M: Christian Benvenuti <benve@cisco.com> 3538M: Christian Benvenuti <benve@cisco.com>
3539M: Dave Goodell <dgoodell@cisco.com>
3540S: Supported 3539S: Supported
3541F: drivers/infiniband/hw/usnic/ 3540F: drivers/infiniband/hw/usnic/
3542 3541
@@ -7623,9 +7622,8 @@ S: Maintained
7623F: drivers/firmware/iscsi_ibft* 7622F: drivers/firmware/iscsi_ibft*
7624 7623
7625ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR 7624ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR
7626M: Or Gerlitz <ogerlitz@mellanox.com>
7627M: Sagi Grimberg <sagi@grimberg.me> 7625M: Sagi Grimberg <sagi@grimberg.me>
7628M: Roi Dayan <roid@mellanox.com> 7626M: Max Gurtovoy <maxg@mellanox.com>
7629L: linux-rdma@vger.kernel.org 7627L: linux-rdma@vger.kernel.org
7630S: Supported 7628S: Supported
7631W: http://www.openfabrics.org 7629W: http://www.openfabrics.org
@@ -12754,15 +12752,21 @@ S: Maintained
12754F: drivers/scsi/sr* 12752F: drivers/scsi/sr*
12755 12753
12756SCSI RDMA PROTOCOL (SRP) INITIATOR 12754SCSI RDMA PROTOCOL (SRP) INITIATOR
12757M: Bart Van Assche <bart.vanassche@sandisk.com> 12755M: Bart Van Assche <bvanassche@acm.org>
12758L: linux-rdma@vger.kernel.org 12756L: linux-rdma@vger.kernel.org
12759S: Supported 12757S: Supported
12760W: http://www.openfabrics.org
12761Q: http://patchwork.kernel.org/project/linux-rdma/list/ 12758Q: http://patchwork.kernel.org/project/linux-rdma/list/
12762T: git git://git.kernel.org/pub/scm/linux/kernel/git/dad/srp-initiator.git
12763F: drivers/infiniband/ulp/srp/ 12759F: drivers/infiniband/ulp/srp/
12764F: include/scsi/srp.h 12760F: include/scsi/srp.h
12765 12761
12762SCSI RDMA PROTOCOL (SRP) TARGET
12763M: Bart Van Assche <bvanassche@acm.org>
12764L: linux-rdma@vger.kernel.org
12765L: target-devel@vger.kernel.org
12766S: Supported
12767Q: http://patchwork.kernel.org/project/linux-rdma/list/
12768F: drivers/infiniband/ulp/srpt/
12769
12766SCSI SG DRIVER 12770SCSI SG DRIVER
12767M: Doug Gilbert <dgilbert@interlog.com> 12771M: Doug Gilbert <dgilbert@interlog.com>
12768L: linux-scsi@vger.kernel.org 12772L: linux-scsi@vger.kernel.org
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index b03af54367c0..d160d2d1f3a3 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -37,7 +37,7 @@ config INFINIBAND_USER_ACCESS
37 37
38config INFINIBAND_USER_ACCESS_UCM 38config INFINIBAND_USER_ACCESS_UCM
39 bool "Userspace CM (UCM, DEPRECATED)" 39 bool "Userspace CM (UCM, DEPRECATED)"
40 depends on BROKEN 40 depends on BROKEN || COMPILE_TEST
41 depends on INFINIBAND_USER_ACCESS 41 depends on INFINIBAND_USER_ACCESS
42 help 42 help
43 The UCM module has known security flaws, which no one is 43 The UCM module has known security flaws, which no one is
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 61667705d746..867cee5e27b2 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -35,6 +35,7 @@ ib_ucm-y := ucm.o
35 35
36ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \ 36ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
37 rdma_core.o uverbs_std_types.o uverbs_ioctl.o \ 37 rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
38 uverbs_ioctl_merge.o uverbs_std_types_cq.o \ 38 uverbs_std_types_cq.o \
39 uverbs_std_types_flow_action.o uverbs_std_types_dm.o \ 39 uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
40 uverbs_std_types_mr.o uverbs_std_types_counters.o 40 uverbs_std_types_mr.o uverbs_std_types_counters.o \
41 uverbs_uapi.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 4f32c4062fb6..46b855a42884 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -188,7 +188,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
188 return -ENODATA; 188 return -ENODATA;
189} 189}
190 190
191int rdma_addr_size(struct sockaddr *addr) 191int rdma_addr_size(const struct sockaddr *addr)
192{ 192{
193 switch (addr->sa_family) { 193 switch (addr->sa_family) {
194 case AF_INET: 194 case AF_INET:
@@ -315,19 +315,17 @@ static int dst_fetch_ha(const struct dst_entry *dst,
315 int ret = 0; 315 int ret = 0;
316 316
317 n = dst_neigh_lookup(dst, daddr); 317 n = dst_neigh_lookup(dst, daddr);
318 if (!n)
319 return -ENODATA;
318 320
319 rcu_read_lock(); 321 if (!(n->nud_state & NUD_VALID)) {
320 if (!n || !(n->nud_state & NUD_VALID)) { 322 neigh_event_send(n, NULL);
321 if (n)
322 neigh_event_send(n, NULL);
323 ret = -ENODATA; 323 ret = -ENODATA;
324 } else { 324 } else {
325 rdma_copy_addr(dev_addr, dst->dev, n->ha); 325 rdma_copy_addr(dev_addr, dst->dev, n->ha);
326 } 326 }
327 rcu_read_unlock();
328 327
329 if (n) 328 neigh_release(n);
330 neigh_release(n);
331 329
332 return ret; 330 return ret;
333} 331}
@@ -587,7 +585,7 @@ static void process_one_req(struct work_struct *_work)
587 spin_unlock_bh(&lock); 585 spin_unlock_bh(&lock);
588} 586}
589 587
590int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr, 588int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
591 struct rdma_dev_addr *addr, int timeout_ms, 589 struct rdma_dev_addr *addr, int timeout_ms,
592 void (*callback)(int status, struct sockaddr *src_addr, 590 void (*callback)(int status, struct sockaddr *src_addr,
593 struct rdma_dev_addr *addr, void *context), 591 struct rdma_dev_addr *addr, void *context),
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 81d66f56e38f..0bee1f4b914e 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -66,20 +66,28 @@ enum gid_attr_find_mask {
66 GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3, 66 GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3,
67}; 67};
68 68
69enum gid_table_entry_props { 69enum gid_table_entry_state {
70 GID_TABLE_ENTRY_INVALID = 1UL << 0, 70 GID_TABLE_ENTRY_INVALID = 1,
71 GID_TABLE_ENTRY_DEFAULT = 1UL << 1, 71 GID_TABLE_ENTRY_VALID = 2,
72 /*
73 * Indicates that entry is pending to be removed, there may
74 * be active users of this GID entry.
75 * When last user of the GID entry releases reference to it,
76 * GID entry is detached from the table.
77 */
78 GID_TABLE_ENTRY_PENDING_DEL = 3,
72}; 79};
73 80
74struct ib_gid_table_entry { 81struct ib_gid_table_entry {
75 unsigned long props; 82 struct kref kref;
76 union ib_gid gid; 83 struct work_struct del_work;
77 struct ib_gid_attr attr; 84 struct ib_gid_attr attr;
78 void *context; 85 void *context;
86 enum gid_table_entry_state state;
79}; 87};
80 88
81struct ib_gid_table { 89struct ib_gid_table {
82 int sz; 90 int sz;
83 /* In RoCE, adding a GID to the table requires: 91 /* In RoCE, adding a GID to the table requires:
84 * (a) Find if this GID is already exists. 92 * (a) Find if this GID is already exists.
85 * (b) Find a free space. 93 * (b) Find a free space.
@@ -91,13 +99,16 @@ struct ib_gid_table {
91 * 99 *
92 **/ 100 **/
93 /* Any writer to data_vec must hold this lock and the write side of 101 /* Any writer to data_vec must hold this lock and the write side of
94 * rwlock. readers must hold only rwlock. All writers must be in a 102 * rwlock. Readers must hold only rwlock. All writers must be in a
95 * sleepable context. 103 * sleepable context.
96 */ 104 */
97 struct mutex lock; 105 struct mutex lock;
98 /* rwlock protects data_vec[ix]->props. */ 106 /* rwlock protects data_vec[ix]->state and entry pointer.
99 rwlock_t rwlock; 107 */
100 struct ib_gid_table_entry *data_vec; 108 rwlock_t rwlock;
109 struct ib_gid_table_entry **data_vec;
110 /* bit field, each bit indicates the index of default GID */
111 u32 default_gid_indices;
101}; 112};
102 113
103static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) 114static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
@@ -135,6 +146,19 @@ bool rdma_is_zero_gid(const union ib_gid *gid)
135} 146}
136EXPORT_SYMBOL(rdma_is_zero_gid); 147EXPORT_SYMBOL(rdma_is_zero_gid);
137 148
149/** is_gid_index_default - Check if a given index belongs to
150 * reserved default GIDs or not.
151 * @table: GID table pointer
152 * @index: Index to check in GID table
153 * Returns true if index is one of the reserved default GID index otherwise
154 * returns false.
155 */
156static bool is_gid_index_default(const struct ib_gid_table *table,
157 unsigned int index)
158{
159 return index < 32 && (BIT(index) & table->default_gid_indices);
160}
161
138int ib_cache_gid_parse_type_str(const char *buf) 162int ib_cache_gid_parse_type_str(const char *buf)
139{ 163{
140 unsigned int i; 164 unsigned int i;
@@ -164,26 +188,136 @@ static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
164 return device->cache.ports[port - rdma_start_port(device)].gid; 188 return device->cache.ports[port - rdma_start_port(device)].gid;
165} 189}
166 190
167static void del_roce_gid(struct ib_device *device, u8 port_num, 191static bool is_gid_entry_free(const struct ib_gid_table_entry *entry)
168 struct ib_gid_table *table, int ix) 192{
193 return !entry;
194}
195
196static bool is_gid_entry_valid(const struct ib_gid_table_entry *entry)
197{
198 return entry && entry->state == GID_TABLE_ENTRY_VALID;
199}
200
201static void schedule_free_gid(struct kref *kref)
169{ 202{
203 struct ib_gid_table_entry *entry =
204 container_of(kref, struct ib_gid_table_entry, kref);
205
206 queue_work(ib_wq, &entry->del_work);
207}
208
209static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
210{
211 struct ib_device *device = entry->attr.device;
212 u8 port_num = entry->attr.port_num;
213 struct ib_gid_table *table = rdma_gid_table(device, port_num);
214
170 pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, 215 pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
171 device->name, port_num, ix, 216 device->name, port_num, entry->attr.index,
172 table->data_vec[ix].gid.raw); 217 entry->attr.gid.raw);
218
219 if (rdma_cap_roce_gid_table(device, port_num) &&
220 entry->state != GID_TABLE_ENTRY_INVALID)
221 device->del_gid(&entry->attr, &entry->context);
222
223 write_lock_irq(&table->rwlock);
173 224
174 if (rdma_cap_roce_gid_table(device, port_num)) 225 /*
175 device->del_gid(&table->data_vec[ix].attr, 226 * The only way to avoid overwriting NULL in table is
176 &table->data_vec[ix].context); 227 * by comparing if it is same entry in table or not!
177 dev_put(table->data_vec[ix].attr.ndev); 228 * If new entry in table is added by the time we free here,
229 * don't overwrite the table entry.
230 */
231 if (entry == table->data_vec[entry->attr.index])
232 table->data_vec[entry->attr.index] = NULL;
233 /* Now this index is ready to be allocated */
234 write_unlock_irq(&table->rwlock);
235
236 if (entry->attr.ndev)
237 dev_put(entry->attr.ndev);
238 kfree(entry);
178} 239}
179 240
180static int add_roce_gid(struct ib_gid_table *table, 241static void free_gid_entry(struct kref *kref)
181 const union ib_gid *gid, 242{
182 const struct ib_gid_attr *attr) 243 struct ib_gid_table_entry *entry =
244 container_of(kref, struct ib_gid_table_entry, kref);
245
246 free_gid_entry_locked(entry);
247}
248
249/**
250 * free_gid_work - Release reference to the GID entry
251 * @work: Work structure to refer to GID entry which needs to be
252 * deleted.
253 *
254 * free_gid_work() frees the entry from the HCA's hardware table
255 * if provider supports it. It releases reference to netdevice.
256 */
257static void free_gid_work(struct work_struct *work)
258{
259 struct ib_gid_table_entry *entry =
260 container_of(work, struct ib_gid_table_entry, del_work);
261 struct ib_device *device = entry->attr.device;
262 u8 port_num = entry->attr.port_num;
263 struct ib_gid_table *table = rdma_gid_table(device, port_num);
264
265 mutex_lock(&table->lock);
266 free_gid_entry_locked(entry);
267 mutex_unlock(&table->lock);
268}
269
270static struct ib_gid_table_entry *
271alloc_gid_entry(const struct ib_gid_attr *attr)
183{ 272{
184 struct ib_gid_table_entry *entry; 273 struct ib_gid_table_entry *entry;
185 int ix = attr->index; 274
186 int ret = 0; 275 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
276 if (!entry)
277 return NULL;
278 kref_init(&entry->kref);
279 memcpy(&entry->attr, attr, sizeof(*attr));
280 if (entry->attr.ndev)
281 dev_hold(entry->attr.ndev);
282 INIT_WORK(&entry->del_work, free_gid_work);
283 entry->state = GID_TABLE_ENTRY_INVALID;
284 return entry;
285}
286
287static void store_gid_entry(struct ib_gid_table *table,
288 struct ib_gid_table_entry *entry)
289{
290 entry->state = GID_TABLE_ENTRY_VALID;
291
292 pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
293 entry->attr.device->name, entry->attr.port_num,
294 entry->attr.index, entry->attr.gid.raw);
295
296 lockdep_assert_held(&table->lock);
297 write_lock_irq(&table->rwlock);
298 table->data_vec[entry->attr.index] = entry;
299 write_unlock_irq(&table->rwlock);
300}
301
302static void get_gid_entry(struct ib_gid_table_entry *entry)
303{
304 kref_get(&entry->kref);
305}
306
307static void put_gid_entry(struct ib_gid_table_entry *entry)
308{
309 kref_put(&entry->kref, schedule_free_gid);
310}
311
312static void put_gid_entry_locked(struct ib_gid_table_entry *entry)
313{
314 kref_put(&entry->kref, free_gid_entry);
315}
316
317static int add_roce_gid(struct ib_gid_table_entry *entry)
318{
319 const struct ib_gid_attr *attr = &entry->attr;
320 int ret;
187 321
188 if (!attr->ndev) { 322 if (!attr->ndev) {
189 pr_err("%s NULL netdev device=%s port=%d index=%d\n", 323 pr_err("%s NULL netdev device=%s port=%d index=%d\n",
@@ -191,38 +325,22 @@ static int add_roce_gid(struct ib_gid_table *table,
191 attr->index); 325 attr->index);
192 return -EINVAL; 326 return -EINVAL;
193 } 327 }
194
195 entry = &table->data_vec[ix];
196 if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) {
197 WARN(1, "GID table corruption device=%s port=%d index=%d\n",
198 attr->device->name, attr->port_num,
199 attr->index);
200 return -EINVAL;
201 }
202
203 if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) { 328 if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
204 ret = attr->device->add_gid(gid, attr, &entry->context); 329 ret = attr->device->add_gid(attr, &entry->context);
205 if (ret) { 330 if (ret) {
206 pr_err("%s GID add failed device=%s port=%d index=%d\n", 331 pr_err("%s GID add failed device=%s port=%d index=%d\n",
207 __func__, attr->device->name, attr->port_num, 332 __func__, attr->device->name, attr->port_num,
208 attr->index); 333 attr->index);
209 goto add_err; 334 return ret;
210 } 335 }
211 } 336 }
212 dev_hold(attr->ndev); 337 return 0;
213
214add_err:
215 if (!ret)
216 pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
217 attr->device->name, attr->port_num, ix, gid->raw);
218 return ret;
219} 338}
220 339
221/** 340/**
222 * add_modify_gid - Add or modify GID table entry 341 * add_modify_gid - Add or modify GID table entry
223 * 342 *
224 * @table: GID table in which GID to be added or modified 343 * @table: GID table in which GID to be added or modified
225 * @gid: GID content
226 * @attr: Attributes of the GID 344 * @attr: Attributes of the GID
227 * 345 *
228 * Returns 0 on success or appropriate error code. It accepts zero 346 * Returns 0 on success or appropriate error code. It accepts zero
@@ -230,34 +348,42 @@ add_err:
230 * GID. However such zero GIDs are not added to the cache. 348 * GID. However such zero GIDs are not added to the cache.
231 */ 349 */
232static int add_modify_gid(struct ib_gid_table *table, 350static int add_modify_gid(struct ib_gid_table *table,
233 const union ib_gid *gid,
234 const struct ib_gid_attr *attr) 351 const struct ib_gid_attr *attr)
235{ 352{
236 int ret; 353 struct ib_gid_table_entry *entry;
354 int ret = 0;
355
356 /*
357 * Invalidate any old entry in the table to make it safe to write to
358 * this index.
359 */
360 if (is_gid_entry_valid(table->data_vec[attr->index]))
361 put_gid_entry(table->data_vec[attr->index]);
362
363 /*
364 * Some HCA's report multiple GID entries with only one valid GID, and
365 * leave other unused entries as the zero GID. Convert zero GIDs to
366 * empty table entries instead of storing them.
367 */
368 if (rdma_is_zero_gid(&attr->gid))
369 return 0;
370
371 entry = alloc_gid_entry(attr);
372 if (!entry)
373 return -ENOMEM;
237 374
238 if (rdma_protocol_roce(attr->device, attr->port_num)) { 375 if (rdma_protocol_roce(attr->device, attr->port_num)) {
239 ret = add_roce_gid(table, gid, attr); 376 ret = add_roce_gid(entry);
240 if (ret) 377 if (ret)
241 return ret; 378 goto done;
242 } else {
243 /*
244 * Some HCA's report multiple GID entries with only one
245 * valid GID, but remaining as zero GID.
246 * So ignore such behavior for IB link layer and don't
247 * fail the call, but don't add such entry to GID cache.
248 */
249 if (rdma_is_zero_gid(gid))
250 return 0;
251 } 379 }
252 380
253 lockdep_assert_held(&table->lock); 381 store_gid_entry(table, entry);
254 memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid));
255 memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr));
256
257 write_lock_irq(&table->rwlock);
258 table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID;
259 write_unlock_irq(&table->rwlock);
260 return 0; 382 return 0;
383
384done:
385 put_gid_entry(entry);
386 return ret;
261} 387}
262 388
263/** 389/**
@@ -272,16 +398,25 @@ static int add_modify_gid(struct ib_gid_table *table,
272static void del_gid(struct ib_device *ib_dev, u8 port, 398static void del_gid(struct ib_device *ib_dev, u8 port,
273 struct ib_gid_table *table, int ix) 399 struct ib_gid_table *table, int ix)
274{ 400{
401 struct ib_gid_table_entry *entry;
402
275 lockdep_assert_held(&table->lock); 403 lockdep_assert_held(&table->lock);
404
405 pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
406 ib_dev->name, port, ix,
407 table->data_vec[ix]->attr.gid.raw);
408
276 write_lock_irq(&table->rwlock); 409 write_lock_irq(&table->rwlock);
277 table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; 410 entry = table->data_vec[ix];
411 entry->state = GID_TABLE_ENTRY_PENDING_DEL;
412 /*
413 * For non RoCE protocol, GID entry slot is ready to use.
414 */
415 if (!rdma_protocol_roce(ib_dev, port))
416 table->data_vec[ix] = NULL;
278 write_unlock_irq(&table->rwlock); 417 write_unlock_irq(&table->rwlock);
279 418
280 if (rdma_protocol_roce(ib_dev, port)) 419 put_gid_entry_locked(entry);
281 del_roce_gid(ib_dev, port, table, ix);
282 memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid));
283 memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
284 table->data_vec[ix].context = NULL;
285} 420}
286 421
287/* rwlock should be read locked, or lock should be held */ 422/* rwlock should be read locked, or lock should be held */
@@ -294,8 +429,8 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
294 int empty = pempty ? -1 : 0; 429 int empty = pempty ? -1 : 0;
295 430
296 while (i < table->sz && (found < 0 || empty < 0)) { 431 while (i < table->sz && (found < 0 || empty < 0)) {
297 struct ib_gid_table_entry *data = &table->data_vec[i]; 432 struct ib_gid_table_entry *data = table->data_vec[i];
298 struct ib_gid_attr *attr = &data->attr; 433 struct ib_gid_attr *attr;
299 int curr_index = i; 434 int curr_index = i;
300 435
301 i++; 436 i++;
@@ -306,9 +441,9 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
306 * so lookup free slot only if requested. 441 * so lookup free slot only if requested.
307 */ 442 */
308 if (pempty && empty < 0) { 443 if (pempty && empty < 0) {
309 if (data->props & GID_TABLE_ENTRY_INVALID && 444 if (is_gid_entry_free(data) &&
310 (default_gid == 445 default_gid ==
311 !!(data->props & GID_TABLE_ENTRY_DEFAULT))) { 446 is_gid_index_default(table, curr_index)) {
312 /* 447 /*
313 * Found an invalid (free) entry; allocate it. 448 * Found an invalid (free) entry; allocate it.
314 * If default GID is requested, then our 449 * If default GID is requested, then our
@@ -323,22 +458,23 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
323 458
324 /* 459 /*
325 * Additionally find_gid() is used to find valid entry during 460 * Additionally find_gid() is used to find valid entry during
326 * lookup operation, where validity needs to be checked. So 461 * lookup operation; so ignore the entries which are marked as
327 * find the empty entry first to continue to search for a free 462 * pending for removal and the entries which are marked as
328 * slot and ignore its INVALID flag. 463 * invalid.
329 */ 464 */
330 if (data->props & GID_TABLE_ENTRY_INVALID) 465 if (!is_gid_entry_valid(data))
331 continue; 466 continue;
332 467
333 if (found >= 0) 468 if (found >= 0)
334 continue; 469 continue;
335 470
471 attr = &data->attr;
336 if (mask & GID_ATTR_FIND_MASK_GID_TYPE && 472 if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
337 attr->gid_type != val->gid_type) 473 attr->gid_type != val->gid_type)
338 continue; 474 continue;
339 475
340 if (mask & GID_ATTR_FIND_MASK_GID && 476 if (mask & GID_ATTR_FIND_MASK_GID &&
341 memcmp(gid, &data->gid, sizeof(*gid))) 477 memcmp(gid, &data->attr.gid, sizeof(*gid)))
342 continue; 478 continue;
343 479
344 if (mask & GID_ATTR_FIND_MASK_NETDEV && 480 if (mask & GID_ATTR_FIND_MASK_NETDEV &&
@@ -346,8 +482,7 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
346 continue; 482 continue;
347 483
348 if (mask & GID_ATTR_FIND_MASK_DEFAULT && 484 if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
349 !!(data->props & GID_TABLE_ENTRY_DEFAULT) != 485 is_gid_index_default(table, curr_index) != default_gid)
350 default_gid)
351 continue; 486 continue;
352 487
353 found = curr_index; 488 found = curr_index;
@@ -396,7 +531,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
396 attr->device = ib_dev; 531 attr->device = ib_dev;
397 attr->index = empty; 532 attr->index = empty;
398 attr->port_num = port; 533 attr->port_num = port;
399 ret = add_modify_gid(table, gid, attr); 534 attr->gid = *gid;
535 ret = add_modify_gid(table, attr);
400 if (!ret) 536 if (!ret)
401 dispatch_gid_change_event(ib_dev, port); 537 dispatch_gid_change_event(ib_dev, port);
402 538
@@ -492,7 +628,8 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
492 mutex_lock(&table->lock); 628 mutex_lock(&table->lock);
493 629
494 for (ix = 0; ix < table->sz; ix++) { 630 for (ix = 0; ix < table->sz; ix++) {
495 if (table->data_vec[ix].attr.ndev == ndev) { 631 if (is_gid_entry_valid(table->data_vec[ix]) &&
632 table->data_vec[ix]->attr.ndev == ndev) {
496 del_gid(ib_dev, port, table, ix); 633 del_gid(ib_dev, port, table, ix);
497 deleted = true; 634 deleted = true;
498 } 635 }
@@ -506,103 +643,37 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
506 return 0; 643 return 0;
507} 644}
508 645
509static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
510 union ib_gid *gid, struct ib_gid_attr *attr)
511{
512 struct ib_gid_table *table;
513
514 table = rdma_gid_table(ib_dev, port);
515
516 if (index < 0 || index >= table->sz)
517 return -EINVAL;
518
519 if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
520 return -EINVAL;
521
522 memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
523 if (attr) {
524 memcpy(attr, &table->data_vec[index].attr, sizeof(*attr));
525 if (attr->ndev)
526 dev_hold(attr->ndev);
527 }
528
529 return 0;
530}
531
532static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
533 const union ib_gid *gid,
534 const struct ib_gid_attr *val,
535 unsigned long mask,
536 u8 *port, u16 *index)
537{
538 struct ib_gid_table *table;
539 u8 p;
540 int local_index;
541 unsigned long flags;
542
543 for (p = 0; p < ib_dev->phys_port_cnt; p++) {
544 table = ib_dev->cache.ports[p].gid;
545 read_lock_irqsave(&table->rwlock, flags);
546 local_index = find_gid(table, gid, val, false, mask, NULL);
547 if (local_index >= 0) {
548 if (index)
549 *index = local_index;
550 if (port)
551 *port = p + rdma_start_port(ib_dev);
552 read_unlock_irqrestore(&table->rwlock, flags);
553 return 0;
554 }
555 read_unlock_irqrestore(&table->rwlock, flags);
556 }
557
558 return -ENOENT;
559}
560
561static int ib_cache_gid_find(struct ib_device *ib_dev,
562 const union ib_gid *gid,
563 enum ib_gid_type gid_type,
564 struct net_device *ndev, u8 *port,
565 u16 *index)
566{
567 unsigned long mask = GID_ATTR_FIND_MASK_GID |
568 GID_ATTR_FIND_MASK_GID_TYPE;
569 struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
570
571 if (ndev)
572 mask |= GID_ATTR_FIND_MASK_NETDEV;
573
574 return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val,
575 mask, port, index);
576}
577
578/** 646/**
579 * ib_find_cached_gid_by_port - Returns the GID table index where a specified 647 * rdma_find_gid_by_port - Returns the GID entry attributes when it finds
580 * GID value occurs. It searches for the specified GID value in the local 648 * a valid GID entry for given search parameters. It searches for the specified
581 * software cache. 649 * GID value in the local software cache.
582 * @device: The device to query. 650 * @device: The device to query.
583 * @gid: The GID value to search for. 651 * @gid: The GID value to search for.
584 * @gid_type: The GID type to search for. 652 * @gid_type: The GID type to search for.
585 * @port_num: The port number of the device where the GID value should be 653 * @port_num: The port number of the device where the GID value should be
586 * searched. 654 * searched.
587 * @ndev: In RoCE, the net device of the device. Null means ignore. 655 * @ndev: In RoCE, the net device of the device. NULL means ignore.
588 * @index: The index into the cached GID table where the GID was found. This 656 *
589 * parameter may be NULL. 657 * Returns sgid attributes if the GID is found with valid reference or
658 * returns ERR_PTR for the error.
659 * The caller must invoke rdma_put_gid_attr() to release the reference.
590 */ 660 */
591int ib_find_cached_gid_by_port(struct ib_device *ib_dev, 661const struct ib_gid_attr *
592 const union ib_gid *gid, 662rdma_find_gid_by_port(struct ib_device *ib_dev,
593 enum ib_gid_type gid_type, 663 const union ib_gid *gid,
594 u8 port, struct net_device *ndev, 664 enum ib_gid_type gid_type,
595 u16 *index) 665 u8 port, struct net_device *ndev)
596{ 666{
597 int local_index; 667 int local_index;
598 struct ib_gid_table *table; 668 struct ib_gid_table *table;
599 unsigned long mask = GID_ATTR_FIND_MASK_GID | 669 unsigned long mask = GID_ATTR_FIND_MASK_GID |
600 GID_ATTR_FIND_MASK_GID_TYPE; 670 GID_ATTR_FIND_MASK_GID_TYPE;
601 struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type}; 671 struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
672 const struct ib_gid_attr *attr;
602 unsigned long flags; 673 unsigned long flags;
603 674
604 if (!rdma_is_port_valid(ib_dev, port)) 675 if (!rdma_is_port_valid(ib_dev, port))
605 return -ENOENT; 676 return ERR_PTR(-ENOENT);
606 677
607 table = rdma_gid_table(ib_dev, port); 678 table = rdma_gid_table(ib_dev, port);
608 679
@@ -612,89 +683,73 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
612 read_lock_irqsave(&table->rwlock, flags); 683 read_lock_irqsave(&table->rwlock, flags);
613 local_index = find_gid(table, gid, &val, false, mask, NULL); 684 local_index = find_gid(table, gid, &val, false, mask, NULL);
614 if (local_index >= 0) { 685 if (local_index >= 0) {
615 if (index) 686 get_gid_entry(table->data_vec[local_index]);
616 *index = local_index; 687 attr = &table->data_vec[local_index]->attr;
617 read_unlock_irqrestore(&table->rwlock, flags); 688 read_unlock_irqrestore(&table->rwlock, flags);
618 return 0; 689 return attr;
619 } 690 }
620 691
621 read_unlock_irqrestore(&table->rwlock, flags); 692 read_unlock_irqrestore(&table->rwlock, flags);
622 return -ENOENT; 693 return ERR_PTR(-ENOENT);
623} 694}
624EXPORT_SYMBOL(ib_find_cached_gid_by_port); 695EXPORT_SYMBOL(rdma_find_gid_by_port);
625 696
626/** 697/**
627 * ib_cache_gid_find_by_filter - Returns the GID table index where a specified 698 * rdma_find_gid_by_filter - Returns the GID table attribute where a
628 * GID value occurs 699 * specified GID value occurs
629 * @device: The device to query. 700 * @device: The device to query.
630 * @gid: The GID value to search for. 701 * @gid: The GID value to search for.
631 * @port_num: The port number of the device where the GID value could be 702 * @port: The port number of the device where the GID value could be
632 * searched. 703 * searched.
633 * @filter: The filter function is executed on any matching GID in the table. 704 * @filter: The filter function is executed on any matching GID in the table.
634 * If the filter function returns true, the corresponding index is returned, 705 * If the filter function returns true, the corresponding index is returned,
635 * otherwise, we continue searching the GID table. It's guaranteed that 706 * otherwise, we continue searching the GID table. It's guaranteed that
636 * while filter is executed, ndev field is valid and the structure won't 707 * while filter is executed, ndev field is valid and the structure won't
637 * change. filter is executed in an atomic context. filter must not be NULL. 708 * change. filter is executed in an atomic context. filter must not be NULL.
638 * @index: The index into the cached GID table where the GID was found. This
639 * parameter may be NULL.
640 * 709 *
641 * ib_cache_gid_find_by_filter() searches for the specified GID value 710 * rdma_find_gid_by_filter() searches for the specified GID value
642 * of which the filter function returns true in the port's GID table. 711 * of which the filter function returns true in the port's GID table.
643 * This function is only supported on RoCE ports.
644 * 712 *
645 */ 713 */
646static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, 714const struct ib_gid_attr *rdma_find_gid_by_filter(
647 const union ib_gid *gid, 715 struct ib_device *ib_dev, const union ib_gid *gid, u8 port,
648 u8 port, 716 bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
649 bool (*filter)(const union ib_gid *, 717 void *),
650 const struct ib_gid_attr *, 718 void *context)
651 void *),
652 void *context,
653 u16 *index)
654{ 719{
720 const struct ib_gid_attr *res = ERR_PTR(-ENOENT);
655 struct ib_gid_table *table; 721 struct ib_gid_table *table;
656 unsigned int i;
657 unsigned long flags; 722 unsigned long flags;
658 bool found = false; 723 unsigned int i;
659
660 724
661 if (!rdma_is_port_valid(ib_dev, port) || 725 if (!rdma_is_port_valid(ib_dev, port))
662 !rdma_protocol_roce(ib_dev, port)) 726 return ERR_PTR(-EINVAL);
663 return -EPROTONOSUPPORT;
664 727
665 table = rdma_gid_table(ib_dev, port); 728 table = rdma_gid_table(ib_dev, port);
666 729
667 read_lock_irqsave(&table->rwlock, flags); 730 read_lock_irqsave(&table->rwlock, flags);
668 for (i = 0; i < table->sz; i++) { 731 for (i = 0; i < table->sz; i++) {
669 struct ib_gid_attr attr; 732 struct ib_gid_table_entry *entry = table->data_vec[i];
670 733
671 if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) 734 if (!is_gid_entry_valid(entry))
672 continue; 735 continue;
673 736
674 if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) 737 if (memcmp(gid, &entry->attr.gid, sizeof(*gid)))
675 continue; 738 continue;
676 739
677 memcpy(&attr, &table->data_vec[i].attr, sizeof(attr)); 740 if (filter(gid, &entry->attr, context)) {
678 741 get_gid_entry(entry);
679 if (filter(gid, &attr, context)) { 742 res = &entry->attr;
680 found = true;
681 if (index)
682 *index = i;
683 break; 743 break;
684 } 744 }
685 } 745 }
686 read_unlock_irqrestore(&table->rwlock, flags); 746 read_unlock_irqrestore(&table->rwlock, flags);
687 747 return res;
688 if (!found)
689 return -ENOENT;
690 return 0;
691} 748}
692 749
693static struct ib_gid_table *alloc_gid_table(int sz) 750static struct ib_gid_table *alloc_gid_table(int sz)
694{ 751{
695 struct ib_gid_table *table = 752 struct ib_gid_table *table = kzalloc(sizeof(*table), GFP_KERNEL);
696 kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
697 int i;
698 753
699 if (!table) 754 if (!table)
700 return NULL; 755 return NULL;
@@ -707,12 +762,6 @@ static struct ib_gid_table *alloc_gid_table(int sz)
707 762
708 table->sz = sz; 763 table->sz = sz;
709 rwlock_init(&table->rwlock); 764 rwlock_init(&table->rwlock);
710
711 /* Mark all entries as invalid so that allocator can allocate
712 * one of the invalid (free) entry.
713 */
714 for (i = 0; i < sz; i++)
715 table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID;
716 return table; 765 return table;
717 766
718err_free_table: 767err_free_table:
@@ -720,12 +769,30 @@ err_free_table:
720 return NULL; 769 return NULL;
721} 770}
722 771
723static void release_gid_table(struct ib_gid_table *table) 772static void release_gid_table(struct ib_device *device, u8 port,
773 struct ib_gid_table *table)
724{ 774{
725 if (table) { 775 bool leak = false;
726 kfree(table->data_vec); 776 int i;
727 kfree(table); 777
778 if (!table)
779 return;
780
781 for (i = 0; i < table->sz; i++) {
782 if (is_gid_entry_free(table->data_vec[i]))
783 continue;
784 if (kref_read(&table->data_vec[i]->kref) > 1) {
785 pr_err("GID entry ref leak for %s (index %d) ref=%d\n",
786 device->name, i,
787 kref_read(&table->data_vec[i]->kref));
788 leak = true;
789 }
728 } 790 }
791 if (leak)
792 return;
793
794 kfree(table->data_vec);
795 kfree(table);
729} 796}
730 797
731static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, 798static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
@@ -739,7 +806,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
739 806
740 mutex_lock(&table->lock); 807 mutex_lock(&table->lock);
741 for (i = 0; i < table->sz; ++i) { 808 for (i = 0; i < table->sz; ++i) {
742 if (!rdma_is_zero_gid(&table->data_vec[i].gid)) { 809 if (is_gid_entry_valid(table->data_vec[i])) {
743 del_gid(ib_dev, port, table, i); 810 del_gid(ib_dev, port, table, i);
744 deleted = true; 811 deleted = true;
745 } 812 }
@@ -757,12 +824,9 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
757{ 824{
758 union ib_gid gid = { }; 825 union ib_gid gid = { };
759 struct ib_gid_attr gid_attr; 826 struct ib_gid_attr gid_attr;
760 struct ib_gid_table *table;
761 unsigned int gid_type; 827 unsigned int gid_type;
762 unsigned long mask; 828 unsigned long mask;
763 829
764 table = rdma_gid_table(ib_dev, port);
765
766 mask = GID_ATTR_FIND_MASK_GID_TYPE | 830 mask = GID_ATTR_FIND_MASK_GID_TYPE |
767 GID_ATTR_FIND_MASK_DEFAULT | 831 GID_ATTR_FIND_MASK_DEFAULT |
768 GID_ATTR_FIND_MASK_NETDEV; 832 GID_ATTR_FIND_MASK_NETDEV;
@@ -792,19 +856,12 @@ static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
792 unsigned int i; 856 unsigned int i;
793 unsigned long roce_gid_type_mask; 857 unsigned long roce_gid_type_mask;
794 unsigned int num_default_gids; 858 unsigned int num_default_gids;
795 unsigned int current_gid = 0;
796 859
797 roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 860 roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
798 num_default_gids = hweight_long(roce_gid_type_mask); 861 num_default_gids = hweight_long(roce_gid_type_mask);
799 for (i = 0; i < num_default_gids && i < table->sz; i++) { 862 /* Reserve starting indices for default GIDs */
800 struct ib_gid_table_entry *entry = &table->data_vec[i]; 863 for (i = 0; i < num_default_gids && i < table->sz; i++)
801 864 table->default_gid_indices |= BIT(i);
802 entry->props |= GID_TABLE_ENTRY_DEFAULT;
803 current_gid = find_next_bit(&roce_gid_type_mask,
804 BITS_PER_LONG,
805 current_gid);
806 entry->attr.gid_type = current_gid++;
807 }
808} 865}
809 866
810 867
@@ -815,7 +872,7 @@ static void gid_table_release_one(struct ib_device *ib_dev)
815 872
816 for (port = 0; port < ib_dev->phys_port_cnt; port++) { 873 for (port = 0; port < ib_dev->phys_port_cnt; port++) {
817 table = ib_dev->cache.ports[port].gid; 874 table = ib_dev->cache.ports[port].gid;
818 release_gid_table(table); 875 release_gid_table(ib_dev, port, table);
819 ib_dev->cache.ports[port].gid = NULL; 876 ib_dev->cache.ports[port].gid = NULL;
820 } 877 }
821} 878}
@@ -869,69 +926,94 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
869 return err; 926 return err;
870} 927}
871 928
872int ib_get_cached_gid(struct ib_device *device, 929/**
873 u8 port_num, 930 * rdma_query_gid - Read the GID content from the GID software cache
874 int index, 931 * @device: Device to query the GID
875 union ib_gid *gid, 932 * @port_num: Port number of the device
876 struct ib_gid_attr *gid_attr) 933 * @index: Index of the GID table entry to read
934 * @gid: Pointer to GID where to store the entry's GID
935 *
936 * rdma_query_gid() only reads the GID entry content for requested device,
937 * port and index. It reads for IB, RoCE and iWarp link layers. It doesn't
938 * hold any reference to the GID table entry in the HCA or software cache.
939 *
940 * Returns 0 on success or appropriate error code.
941 *
942 */
943int rdma_query_gid(struct ib_device *device, u8 port_num,
944 int index, union ib_gid *gid)
877{ 945{
878 int res;
879 unsigned long flags;
880 struct ib_gid_table *table; 946 struct ib_gid_table *table;
947 unsigned long flags;
948 int res = -EINVAL;
881 949
882 if (!rdma_is_port_valid(device, port_num)) 950 if (!rdma_is_port_valid(device, port_num))
883 return -EINVAL; 951 return -EINVAL;
884 952
885 table = rdma_gid_table(device, port_num); 953 table = rdma_gid_table(device, port_num);
886 read_lock_irqsave(&table->rwlock, flags); 954 read_lock_irqsave(&table->rwlock, flags);
887 res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
888 read_unlock_irqrestore(&table->rwlock, flags);
889 955
956 if (index < 0 || index >= table->sz ||
957 !is_gid_entry_valid(table->data_vec[index]))
958 goto done;
959
960 memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid));
961 res = 0;
962
963done:
964 read_unlock_irqrestore(&table->rwlock, flags);
890 return res; 965 return res;
891} 966}
892EXPORT_SYMBOL(ib_get_cached_gid); 967EXPORT_SYMBOL(rdma_query_gid);
893 968
894/** 969/**
895 * ib_find_cached_gid - Returns the port number and GID table index where 970 * rdma_find_gid - Returns SGID attributes if the matching GID is found.
896 * a specified GID value occurs.
897 * @device: The device to query. 971 * @device: The device to query.
898 * @gid: The GID value to search for. 972 * @gid: The GID value to search for.
899 * @gid_type: The GID type to search for. 973 * @gid_type: The GID type to search for.
900 * @ndev: In RoCE, the net device of the device. NULL means ignore. 974 * @ndev: In RoCE, the net device of the device. NULL means ignore.
901 * @port_num: The port number of the device where the GID value was found.
902 * @index: The index into the cached GID table where the GID was found. This
903 * parameter may be NULL.
904 * 975 *
905 * ib_find_cached_gid() searches for the specified GID value in 976 * rdma_find_gid() searches for the specified GID value in the software cache.
906 * the local software cache. 977 *
978 * Returns GID attributes if a valid GID is found or returns ERR_PTR for the
979 * error. The caller must invoke rdma_put_gid_attr() to release the reference.
980 *
907 */ 981 */
908int ib_find_cached_gid(struct ib_device *device, 982const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
909 const union ib_gid *gid, 983 const union ib_gid *gid,
910 enum ib_gid_type gid_type, 984 enum ib_gid_type gid_type,
911 struct net_device *ndev, 985 struct net_device *ndev)
912 u8 *port_num,
913 u16 *index)
914{
915 return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
916}
917EXPORT_SYMBOL(ib_find_cached_gid);
918
919int ib_find_gid_by_filter(struct ib_device *device,
920 const union ib_gid *gid,
921 u8 port_num,
922 bool (*filter)(const union ib_gid *gid,
923 const struct ib_gid_attr *,
924 void *),
925 void *context, u16 *index)
926{ 986{
927 /* Only RoCE GID table supports filter function */ 987 unsigned long mask = GID_ATTR_FIND_MASK_GID |
928 if (!rdma_protocol_roce(device, port_num) && filter) 988 GID_ATTR_FIND_MASK_GID_TYPE;
929 return -EPROTONOSUPPORT; 989 struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
990 u8 p;
991
992 if (ndev)
993 mask |= GID_ATTR_FIND_MASK_NETDEV;
994
995 for (p = 0; p < device->phys_port_cnt; p++) {
996 struct ib_gid_table *table;
997 unsigned long flags;
998 int index;
999
1000 table = device->cache.ports[p].gid;
1001 read_lock_irqsave(&table->rwlock, flags);
1002 index = find_gid(table, gid, &gid_attr_val, false, mask, NULL);
1003 if (index >= 0) {
1004 const struct ib_gid_attr *attr;
1005
1006 get_gid_entry(table->data_vec[index]);
1007 attr = &table->data_vec[index]->attr;
1008 read_unlock_irqrestore(&table->rwlock, flags);
1009 return attr;
1010 }
1011 read_unlock_irqrestore(&table->rwlock, flags);
1012 }
930 1013
931 return ib_cache_gid_find_by_filter(device, gid, 1014 return ERR_PTR(-ENOENT);
932 port_num, filter,
933 context, index);
934} 1015}
1016EXPORT_SYMBOL(rdma_find_gid);
935 1017
936int ib_get_cached_pkey(struct ib_device *device, 1018int ib_get_cached_pkey(struct ib_device *device,
937 u8 port_num, 1019 u8 port_num,
@@ -1089,12 +1171,92 @@ int ib_get_cached_port_state(struct ib_device *device,
1089} 1171}
1090EXPORT_SYMBOL(ib_get_cached_port_state); 1172EXPORT_SYMBOL(ib_get_cached_port_state);
1091 1173
1174/**
1175 * rdma_get_gid_attr - Returns GID attributes for a port of a device
1176 * at a requested gid_index, if a valid GID entry exists.
1177 * @device: The device to query.
1178 * @port_num: The port number on the device where the GID value
1179 * is to be queried.
1180 * @index: Index of the GID table entry whose attributes are to
1181 * be queried.
1182 *
1183 * rdma_get_gid_attr() acquires reference count of gid attributes from the
1184 * cached GID table. Caller must invoke rdma_put_gid_attr() to release
1185 * reference to gid attribute regardless of link layer.
1186 *
1187 * Returns pointer to valid gid attribute or ERR_PTR for the appropriate error
1188 * code.
1189 */
1190const struct ib_gid_attr *
1191rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index)
1192{
1193 const struct ib_gid_attr *attr = ERR_PTR(-EINVAL);
1194 struct ib_gid_table *table;
1195 unsigned long flags;
1196
1197 if (!rdma_is_port_valid(device, port_num))
1198 return ERR_PTR(-EINVAL);
1199
1200 table = rdma_gid_table(device, port_num);
1201 if (index < 0 || index >= table->sz)
1202 return ERR_PTR(-EINVAL);
1203
1204 read_lock_irqsave(&table->rwlock, flags);
1205 if (!is_gid_entry_valid(table->data_vec[index]))
1206 goto done;
1207
1208 get_gid_entry(table->data_vec[index]);
1209 attr = &table->data_vec[index]->attr;
1210done:
1211 read_unlock_irqrestore(&table->rwlock, flags);
1212 return attr;
1213}
1214EXPORT_SYMBOL(rdma_get_gid_attr);
1215
1216/**
1217 * rdma_put_gid_attr - Release reference to the GID attribute
1218 * @attr: Pointer to the GID attribute whose reference
1219 * needs to be released.
1220 *
1221 * rdma_put_gid_attr() must be used to release reference whose
1222 * reference is acquired using rdma_get_gid_attr() or any APIs
1223 * which returns a pointer to the ib_gid_attr regardless of link layer
1224 * of IB or RoCE.
1225 *
1226 */
1227void rdma_put_gid_attr(const struct ib_gid_attr *attr)
1228{
1229 struct ib_gid_table_entry *entry =
1230 container_of(attr, struct ib_gid_table_entry, attr);
1231
1232 put_gid_entry(entry);
1233}
1234EXPORT_SYMBOL(rdma_put_gid_attr);
1235
1236/**
1237 * rdma_hold_gid_attr - Get reference to existing GID attribute
1238 *
1239 * @attr: Pointer to the GID attribute whose reference
1240 * needs to be taken.
1241 *
1242 * Increase the reference count to a GID attribute to keep it from being
1243 * freed. Callers are required to already be holding a reference to attribute.
1244 *
1245 */
1246void rdma_hold_gid_attr(const struct ib_gid_attr *attr)
1247{
1248 struct ib_gid_table_entry *entry =
1249 container_of(attr, struct ib_gid_table_entry, attr);
1250
1251 get_gid_entry(entry);
1252}
1253EXPORT_SYMBOL(rdma_hold_gid_attr);
1254
1092static int config_non_roce_gid_cache(struct ib_device *device, 1255static int config_non_roce_gid_cache(struct ib_device *device,
1093 u8 port, int gid_tbl_len) 1256 u8 port, int gid_tbl_len)
1094{ 1257{
1095 struct ib_gid_attr gid_attr = {}; 1258 struct ib_gid_attr gid_attr = {};
1096 struct ib_gid_table *table; 1259 struct ib_gid_table *table;
1097 union ib_gid gid;
1098 int ret = 0; 1260 int ret = 0;
1099 int i; 1261 int i;
1100 1262
@@ -1106,14 +1268,14 @@ static int config_non_roce_gid_cache(struct ib_device *device,
1106 for (i = 0; i < gid_tbl_len; ++i) { 1268 for (i = 0; i < gid_tbl_len; ++i) {
1107 if (!device->query_gid) 1269 if (!device->query_gid)
1108 continue; 1270 continue;
1109 ret = device->query_gid(device, port, i, &gid); 1271 ret = device->query_gid(device, port, i, &gid_attr.gid);
1110 if (ret) { 1272 if (ret) {
1111 pr_warn("query_gid failed (%d) for %s (index %d)\n", 1273 pr_warn("query_gid failed (%d) for %s (index %d)\n",
1112 ret, device->name, i); 1274 ret, device->name, i);
1113 goto err; 1275 goto err;
1114 } 1276 }
1115 gid_attr.index = i; 1277 gid_attr.index = i;
1116 add_modify_gid(table, &gid, &gid_attr); 1278 add_modify_gid(table, &gid_attr);
1117 } 1279 }
1118err: 1280err:
1119 mutex_unlock(&table->lock); 1281 mutex_unlock(&table->lock);
@@ -1128,13 +1290,10 @@ static void ib_cache_update(struct ib_device *device,
1128 struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; 1290 struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
1129 int i; 1291 int i;
1130 int ret; 1292 int ret;
1131 struct ib_gid_table *table;
1132 1293
1133 if (!rdma_is_port_valid(device, port)) 1294 if (!rdma_is_port_valid(device, port))
1134 return; 1295 return;
1135 1296
1136 table = rdma_gid_table(device, port);
1137
1138 tprops = kmalloc(sizeof *tprops, GFP_KERNEL); 1297 tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
1139 if (!tprops) 1298 if (!tprops)
1140 return; 1299 return;
@@ -1296,4 +1455,9 @@ void ib_cache_cleanup_one(struct ib_device *device)
1296 ib_unregister_event_handler(&device->cache.event_handler); 1455 ib_unregister_event_handler(&device->cache.event_handler);
1297 flush_workqueue(ib_wq); 1456 flush_workqueue(ib_wq);
1298 gid_table_cleanup_one(device); 1457 gid_table_cleanup_one(device);
1458
1459 /*
1460 * Flush the wq second time for any pending GID delete work.
1461 */
1462 flush_workqueue(ib_wq);
1299} 1463}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 27a7b0a2e27a..6e39c27dca8e 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -474,7 +474,7 @@ static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
474 if (ret) 474 if (ret)
475 return ret; 475 return ret;
476 476
477 memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr)); 477 rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
478 return 0; 478 return 0;
479} 479}
480 480
@@ -508,31 +508,50 @@ static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
508 return ret; 508 return ret;
509} 509}
510 510
511static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path) 511static struct cm_port *
512get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
512{ 513{
513 struct cm_device *cm_dev; 514 struct cm_device *cm_dev;
514 struct cm_port *port = NULL; 515 struct cm_port *port = NULL;
515 unsigned long flags; 516 unsigned long flags;
516 u8 p; 517
517 struct net_device *ndev = ib_get_ndev_from_path(path); 518 if (attr) {
518 519 read_lock_irqsave(&cm.device_lock, flags);
519 read_lock_irqsave(&cm.device_lock, flags); 520 list_for_each_entry(cm_dev, &cm.device_list, list) {
520 list_for_each_entry(cm_dev, &cm.device_list, list) { 521 if (cm_dev->ib_device == attr->device) {
521 if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, 522 port = cm_dev->port[attr->port_num - 1];
522 sa_conv_pathrec_to_gid_type(path), 523 break;
523 ndev, &p, NULL)) { 524 }
524 port = cm_dev->port[p - 1]; 525 }
525 break; 526 read_unlock_irqrestore(&cm.device_lock, flags);
527 } else {
528 /* SGID attribute can be NULL in following
529 * conditions.
530 * (a) Alternative path
531 * (b) IB link layer without GRH
532 * (c) LAP send messages
533 */
534 read_lock_irqsave(&cm.device_lock, flags);
535 list_for_each_entry(cm_dev, &cm.device_list, list) {
536 attr = rdma_find_gid(cm_dev->ib_device,
537 &path->sgid,
538 sa_conv_pathrec_to_gid_type(path),
539 NULL);
540 if (!IS_ERR(attr)) {
541 port = cm_dev->port[attr->port_num - 1];
542 break;
543 }
526 } 544 }
545 read_unlock_irqrestore(&cm.device_lock, flags);
546 if (port)
547 rdma_put_gid_attr(attr);
527 } 548 }
528 read_unlock_irqrestore(&cm.device_lock, flags);
529
530 if (ndev)
531 dev_put(ndev);
532 return port; 549 return port;
533} 550}
534 551
535static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av, 552static int cm_init_av_by_path(struct sa_path_rec *path,
553 const struct ib_gid_attr *sgid_attr,
554 struct cm_av *av,
536 struct cm_id_private *cm_id_priv) 555 struct cm_id_private *cm_id_priv)
537{ 556{
538 struct rdma_ah_attr new_ah_attr; 557 struct rdma_ah_attr new_ah_attr;
@@ -540,7 +559,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
540 struct cm_port *port; 559 struct cm_port *port;
541 int ret; 560 int ret;
542 561
543 port = get_cm_port_from_path(path); 562 port = get_cm_port_from_path(path, sgid_attr);
544 if (!port) 563 if (!port)
545 return -EINVAL; 564 return -EINVAL;
546 cm_dev = port->cm_dev; 565 cm_dev = port->cm_dev;
@@ -554,22 +573,26 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
554 573
555 /* 574 /*
556 * av->ah_attr might be initialized based on wc or during 575 * av->ah_attr might be initialized based on wc or during
557 * request processing time. So initialize a new ah_attr on stack. 576 * request processing time which might have reference to sgid_attr.
577 * So initialize a new ah_attr on stack.
558 * If initialization fails, old ah_attr is used for sending any 578 * If initialization fails, old ah_attr is used for sending any
559 * responses. If initialization is successful, than new ah_attr 579 * responses. If initialization is successful, than new ah_attr
560 * is used by overwriting the old one. 580 * is used by overwriting the old one. So that right ah_attr
581 * can be used to return an error response.
561 */ 582 */
562 ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path, 583 ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
563 &new_ah_attr); 584 &new_ah_attr, sgid_attr);
564 if (ret) 585 if (ret)
565 return ret; 586 return ret;
566 587
567 av->timeout = path->packet_life_time + 1; 588 av->timeout = path->packet_life_time + 1;
568 589
569 ret = add_cm_id_to_port_list(cm_id_priv, av, port); 590 ret = add_cm_id_to_port_list(cm_id_priv, av, port);
570 if (ret) 591 if (ret) {
592 rdma_destroy_ah_attr(&new_ah_attr);
571 return ret; 593 return ret;
572 memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr)); 594 }
595 rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
573 return 0; 596 return 0;
574} 597}
575 598
@@ -1091,6 +1114,9 @@ retest:
1091 wait_for_completion(&cm_id_priv->comp); 1114 wait_for_completion(&cm_id_priv->comp);
1092 while ((work = cm_dequeue_work(cm_id_priv)) != NULL) 1115 while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
1093 cm_free_work(work); 1116 cm_free_work(work);
1117
1118 rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr);
1119 rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr);
1094 kfree(cm_id_priv->private_data); 1120 kfree(cm_id_priv->private_data);
1095 kfree(cm_id_priv); 1121 kfree(cm_id_priv);
1096} 1122}
@@ -1230,14 +1256,12 @@ new_id:
1230} 1256}
1231EXPORT_SYMBOL(ib_cm_insert_listen); 1257EXPORT_SYMBOL(ib_cm_insert_listen);
1232 1258
1233static __be64 cm_form_tid(struct cm_id_private *cm_id_priv, 1259static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
1234 enum cm_msg_sequence msg_seq)
1235{ 1260{
1236 u64 hi_tid, low_tid; 1261 u64 hi_tid, low_tid;
1237 1262
1238 hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32; 1263 hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
1239 low_tid = (u64) ((__force u32)cm_id_priv->id.local_id | 1264 low_tid = (u64)cm_id_priv->id.local_id;
1240 (msg_seq << 30));
1241 return cpu_to_be64(hi_tid | low_tid); 1265 return cpu_to_be64(hi_tid | low_tid);
1242} 1266}
1243 1267
@@ -1265,7 +1289,7 @@ static void cm_format_req(struct cm_req_msg *req_msg,
1265 pri_path->opa.slid); 1289 pri_path->opa.slid);
1266 1290
1267 cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, 1291 cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
1268 cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ)); 1292 cm_form_tid(cm_id_priv));
1269 1293
1270 req_msg->local_comm_id = cm_id_priv->id.local_id; 1294 req_msg->local_comm_id = cm_id_priv->id.local_id;
1271 req_msg->service_id = param->service_id; 1295 req_msg->service_id = param->service_id;
@@ -1413,12 +1437,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
1413 goto out; 1437 goto out;
1414 } 1438 }
1415 1439
1416 ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av, 1440 ret = cm_init_av_by_path(param->primary_path,
1441 param->ppath_sgid_attr, &cm_id_priv->av,
1417 cm_id_priv); 1442 cm_id_priv);
1418 if (ret) 1443 if (ret)
1419 goto error1; 1444 goto error1;
1420 if (param->alternate_path) { 1445 if (param->alternate_path) {
1421 ret = cm_init_av_by_path(param->alternate_path, 1446 ret = cm_init_av_by_path(param->alternate_path, NULL,
1422 &cm_id_priv->alt_av, cm_id_priv); 1447 &cm_id_priv->alt_av, cm_id_priv);
1423 if (ret) 1448 if (ret)
1424 goto error1; 1449 goto error1;
@@ -1646,7 +1671,7 @@ static void cm_opa_to_ib_sgid(struct cm_work *work,
1646 (ib_is_opa_gid(&path->sgid))) { 1671 (ib_is_opa_gid(&path->sgid))) {
1647 union ib_gid sgid; 1672 union ib_gid sgid;
1648 1673
1649 if (ib_get_cached_gid(dev, port_num, 0, &sgid, NULL)) { 1674 if (rdma_query_gid(dev, port_num, 0, &sgid)) {
1650 dev_warn(&dev->dev, 1675 dev_warn(&dev->dev,
1651 "Error updating sgid in CM request\n"); 1676 "Error updating sgid in CM request\n");
1652 return; 1677 return;
@@ -1691,6 +1716,7 @@ static void cm_format_req_event(struct cm_work *work,
1691 param->retry_count = cm_req_get_retry_count(req_msg); 1716 param->retry_count = cm_req_get_retry_count(req_msg);
1692 param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); 1717 param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1693 param->srq = cm_req_get_srq(req_msg); 1718 param->srq = cm_req_get_srq(req_msg);
1719 param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
1694 work->cm_event.private_data = &req_msg->private_data; 1720 work->cm_event.private_data = &req_msg->private_data;
1695} 1721}
1696 1722
@@ -1914,9 +1940,8 @@ static int cm_req_handler(struct cm_work *work)
1914 struct ib_cm_id *cm_id; 1940 struct ib_cm_id *cm_id;
1915 struct cm_id_private *cm_id_priv, *listen_cm_id_priv; 1941 struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1916 struct cm_req_msg *req_msg; 1942 struct cm_req_msg *req_msg;
1917 union ib_gid gid;
1918 struct ib_gid_attr gid_attr;
1919 const struct ib_global_route *grh; 1943 const struct ib_global_route *grh;
1944 const struct ib_gid_attr *gid_attr;
1920 int ret; 1945 int ret;
1921 1946
1922 req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; 1947 req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1961,24 +1986,13 @@ static int cm_req_handler(struct cm_work *work)
1961 if (cm_req_has_alt_path(req_msg)) 1986 if (cm_req_has_alt_path(req_msg))
1962 memset(&work->path[1], 0, sizeof(work->path[1])); 1987 memset(&work->path[1], 0, sizeof(work->path[1]));
1963 grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr); 1988 grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
1964 ret = ib_get_cached_gid(work->port->cm_dev->ib_device, 1989 gid_attr = grh->sgid_attr;
1965 work->port->port_num,
1966 grh->sgid_index,
1967 &gid, &gid_attr);
1968 if (ret) {
1969 ib_send_cm_rej(cm_id, IB_CM_REJ_UNSUPPORTED, NULL, 0, NULL, 0);
1970 goto rejected;
1971 }
1972 1990
1973 if (gid_attr.ndev) { 1991 if (gid_attr && gid_attr->ndev) {
1974 work->path[0].rec_type = 1992 work->path[0].rec_type =
1975 sa_conv_gid_to_pathrec_type(gid_attr.gid_type); 1993 sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
1976 sa_path_set_ifindex(&work->path[0],
1977 gid_attr.ndev->ifindex);
1978 sa_path_set_ndev(&work->path[0],
1979 dev_net(gid_attr.ndev));
1980 dev_put(gid_attr.ndev);
1981 } else { 1994 } else {
1995 /* If no GID attribute or ndev is null, it is not RoCE. */
1982 cm_path_set_rec_type(work->port->cm_dev->ib_device, 1996 cm_path_set_rec_type(work->port->cm_dev->ib_device,
1983 work->port->port_num, 1997 work->port->port_num,
1984 &work->path[0], 1998 &work->path[0],
@@ -1992,15 +2006,14 @@ static int cm_req_handler(struct cm_work *work)
1992 sa_path_set_dmac(&work->path[0], 2006 sa_path_set_dmac(&work->path[0],
1993 cm_id_priv->av.ah_attr.roce.dmac); 2007 cm_id_priv->av.ah_attr.roce.dmac);
1994 work->path[0].hop_limit = grh->hop_limit; 2008 work->path[0].hop_limit = grh->hop_limit;
1995 ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av, 2009 ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av,
1996 cm_id_priv); 2010 cm_id_priv);
1997 if (ret) { 2011 if (ret) {
1998 int err; 2012 int err;
1999 2013
2000 err = ib_get_cached_gid(work->port->cm_dev->ib_device, 2014 err = rdma_query_gid(work->port->cm_dev->ib_device,
2001 work->port->port_num, 0, 2015 work->port->port_num, 0,
2002 &work->path[0].sgid, 2016 &work->path[0].sgid);
2003 NULL);
2004 if (err) 2017 if (err)
2005 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, 2018 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
2006 NULL, 0, NULL, 0); 2019 NULL, 0, NULL, 0);
@@ -2012,8 +2025,8 @@ static int cm_req_handler(struct cm_work *work)
2012 goto rejected; 2025 goto rejected;
2013 } 2026 }
2014 if (cm_req_has_alt_path(req_msg)) { 2027 if (cm_req_has_alt_path(req_msg)) {
2015 ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av, 2028 ret = cm_init_av_by_path(&work->path[1], NULL,
2016 cm_id_priv); 2029 &cm_id_priv->alt_av, cm_id_priv);
2017 if (ret) { 2030 if (ret) {
2018 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, 2031 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
2019 &work->path[0].sgid, 2032 &work->path[0].sgid,
@@ -2451,7 +2464,7 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
2451 u8 private_data_len) 2464 u8 private_data_len)
2452{ 2465{
2453 cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID, 2466 cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
2454 cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ)); 2467 cm_form_tid(cm_id_priv));
2455 dreq_msg->local_comm_id = cm_id_priv->id.local_id; 2468 dreq_msg->local_comm_id = cm_id_priv->id.local_id;
2456 dreq_msg->remote_comm_id = cm_id_priv->id.remote_id; 2469 dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
2457 cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn); 2470 cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
@@ -3082,7 +3095,7 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg,
3082 alt_ext = opa_is_extended_lid(alternate_path->opa.dlid, 3095 alt_ext = opa_is_extended_lid(alternate_path->opa.dlid,
3083 alternate_path->opa.slid); 3096 alternate_path->opa.slid);
3084 cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID, 3097 cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
3085 cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP)); 3098 cm_form_tid(cm_id_priv));
3086 lap_msg->local_comm_id = cm_id_priv->id.local_id; 3099 lap_msg->local_comm_id = cm_id_priv->id.local_id;
3087 lap_msg->remote_comm_id = cm_id_priv->id.remote_id; 3100 lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
3088 cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn); 3101 cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
@@ -3136,7 +3149,7 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
3136 goto out; 3149 goto out;
3137 } 3150 }
3138 3151
3139 ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av, 3152 ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av,
3140 cm_id_priv); 3153 cm_id_priv);
3141 if (ret) 3154 if (ret)
3142 goto out; 3155 goto out;
@@ -3279,7 +3292,7 @@ static int cm_lap_handler(struct cm_work *work)
3279 if (ret) 3292 if (ret)
3280 goto unlock; 3293 goto unlock;
3281 3294
3282 cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av, 3295 cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av,
3283 cm_id_priv); 3296 cm_id_priv);
3284 cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; 3297 cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
3285 cm_id_priv->tid = lap_msg->hdr.tid; 3298 cm_id_priv->tid = lap_msg->hdr.tid;
@@ -3458,7 +3471,7 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
3458 struct ib_cm_sidr_req_param *param) 3471 struct ib_cm_sidr_req_param *param)
3459{ 3472{
3460 cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, 3473 cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
3461 cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR)); 3474 cm_form_tid(cm_id_priv));
3462 sidr_req_msg->request_id = cm_id_priv->id.local_id; 3475 sidr_req_msg->request_id = cm_id_priv->id.local_id;
3463 sidr_req_msg->pkey = param->path->pkey; 3476 sidr_req_msg->pkey = param->path->pkey;
3464 sidr_req_msg->service_id = param->service_id; 3477 sidr_req_msg->service_id = param->service_id;
@@ -3481,7 +3494,9 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
3481 return -EINVAL; 3494 return -EINVAL;
3482 3495
3483 cm_id_priv = container_of(cm_id, struct cm_id_private, id); 3496 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3484 ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv); 3497 ret = cm_init_av_by_path(param->path, param->sgid_attr,
3498 &cm_id_priv->av,
3499 cm_id_priv);
3485 if (ret) 3500 if (ret)
3486 goto out; 3501 goto out;
3487 3502
@@ -3518,6 +3533,7 @@ out:
3518EXPORT_SYMBOL(ib_send_cm_sidr_req); 3533EXPORT_SYMBOL(ib_send_cm_sidr_req);
3519 3534
3520static void cm_format_sidr_req_event(struct cm_work *work, 3535static void cm_format_sidr_req_event(struct cm_work *work,
3536 const struct cm_id_private *rx_cm_id,
3521 struct ib_cm_id *listen_id) 3537 struct ib_cm_id *listen_id)
3522{ 3538{
3523 struct cm_sidr_req_msg *sidr_req_msg; 3539 struct cm_sidr_req_msg *sidr_req_msg;
@@ -3531,6 +3547,7 @@ static void cm_format_sidr_req_event(struct cm_work *work,
3531 param->service_id = sidr_req_msg->service_id; 3547 param->service_id = sidr_req_msg->service_id;
3532 param->bth_pkey = cm_get_bth_pkey(work); 3548 param->bth_pkey = cm_get_bth_pkey(work);
3533 param->port = work->port->port_num; 3549 param->port = work->port->port_num;
3550 param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr;
3534 work->cm_event.private_data = &sidr_req_msg->private_data; 3551 work->cm_event.private_data = &sidr_req_msg->private_data;
3535} 3552}
3536 3553
@@ -3588,7 +3605,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
3588 cm_id_priv->id.service_id = sidr_req_msg->service_id; 3605 cm_id_priv->id.service_id = sidr_req_msg->service_id;
3589 cm_id_priv->id.service_mask = ~cpu_to_be64(0); 3606 cm_id_priv->id.service_mask = ~cpu_to_be64(0);
3590 3607
3591 cm_format_sidr_req_event(work, &cur_cm_id_priv->id); 3608 cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
3592 cm_process_work(cm_id_priv, work); 3609 cm_process_work(cm_id_priv, work);
3593 cm_deref_id(cur_cm_id_priv); 3610 cm_deref_id(cur_cm_id_priv);
3594 return 0; 3611 return 0;
@@ -3665,7 +3682,8 @@ error: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3665} 3682}
3666EXPORT_SYMBOL(ib_send_cm_sidr_rep); 3683EXPORT_SYMBOL(ib_send_cm_sidr_rep);
3667 3684
3668static void cm_format_sidr_rep_event(struct cm_work *work) 3685static void cm_format_sidr_rep_event(struct cm_work *work,
3686 const struct cm_id_private *cm_id_priv)
3669{ 3687{
3670 struct cm_sidr_rep_msg *sidr_rep_msg; 3688 struct cm_sidr_rep_msg *sidr_rep_msg;
3671 struct ib_cm_sidr_rep_event_param *param; 3689 struct ib_cm_sidr_rep_event_param *param;
@@ -3678,6 +3696,7 @@ static void cm_format_sidr_rep_event(struct cm_work *work)
3678 param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg)); 3696 param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
3679 param->info = &sidr_rep_msg->info; 3697 param->info = &sidr_rep_msg->info;
3680 param->info_len = sidr_rep_msg->info_length; 3698 param->info_len = sidr_rep_msg->info_length;
3699 param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
3681 work->cm_event.private_data = &sidr_rep_msg->private_data; 3700 work->cm_event.private_data = &sidr_rep_msg->private_data;
3682} 3701}
3683 3702
@@ -3701,7 +3720,7 @@ static int cm_sidr_rep_handler(struct cm_work *work)
3701 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); 3720 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3702 spin_unlock_irq(&cm_id_priv->lock); 3721 spin_unlock_irq(&cm_id_priv->lock);
3703 3722
3704 cm_format_sidr_rep_event(work); 3723 cm_format_sidr_rep_event(work, cm_id_priv);
3705 cm_process_work(cm_id_priv, work); 3724 cm_process_work(cm_id_priv, work);
3706 return 0; 3725 return 0;
3707out: 3726out:
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 8b76f0ef965e..476d4309576d 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -44,13 +44,6 @@
44 44
45#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ 45#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */
46 46
47enum cm_msg_sequence {
48 CM_MSG_SEQUENCE_REQ,
49 CM_MSG_SEQUENCE_LAP,
50 CM_MSG_SEQUENCE_DREQ,
51 CM_MSG_SEQUENCE_SIDR
52};
53
54struct cm_req_msg { 47struct cm_req_msg {
55 struct ib_mad_hdr hdr; 48 struct ib_mad_hdr hdr;
56 49
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index bff10ab141b0..f72677291b69 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -366,7 +366,6 @@ struct cma_multicast {
366 void *context; 366 void *context;
367 struct sockaddr_storage addr; 367 struct sockaddr_storage addr;
368 struct kref mcref; 368 struct kref mcref;
369 bool igmp_joined;
370 u8 join_state; 369 u8 join_state;
371}; 370};
372 371
@@ -412,11 +411,11 @@ struct cma_req_info {
412 struct sockaddr_storage listen_addr_storage; 411 struct sockaddr_storage listen_addr_storage;
413 struct sockaddr_storage src_addr_storage; 412 struct sockaddr_storage src_addr_storage;
414 struct ib_device *device; 413 struct ib_device *device;
415 int port;
416 union ib_gid local_gid; 414 union ib_gid local_gid;
417 __be64 service_id; 415 __be64 service_id;
416 int port;
417 bool has_gid;
418 u16 pkey; 418 u16 pkey;
419 bool has_gid:1;
420}; 419};
421 420
422static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) 421static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
@@ -491,12 +490,10 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
491{ 490{
492 cma_ref_dev(cma_dev); 491 cma_ref_dev(cma_dev);
493 id_priv->cma_dev = cma_dev; 492 id_priv->cma_dev = cma_dev;
494 id_priv->gid_type = 0;
495 id_priv->id.device = cma_dev->device; 493 id_priv->id.device = cma_dev->device;
496 id_priv->id.route.addr.dev_addr.transport = 494 id_priv->id.route.addr.dev_addr.transport =
497 rdma_node_get_transport(cma_dev->device->node_type); 495 rdma_node_get_transport(cma_dev->device->node_type);
498 list_add_tail(&id_priv->list, &cma_dev->id_list); 496 list_add_tail(&id_priv->list, &cma_dev->id_list);
499 id_priv->res.type = RDMA_RESTRACK_CM_ID;
500 rdma_restrack_add(&id_priv->res); 497 rdma_restrack_add(&id_priv->res);
501} 498}
502 499
@@ -603,46 +600,53 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
603 return ret; 600 return ret;
604} 601}
605 602
606static inline int cma_validate_port(struct ib_device *device, u8 port, 603static const struct ib_gid_attr *
607 enum ib_gid_type gid_type, 604cma_validate_port(struct ib_device *device, u8 port,
608 union ib_gid *gid, 605 enum ib_gid_type gid_type,
609 struct rdma_id_private *id_priv) 606 union ib_gid *gid,
607 struct rdma_id_private *id_priv)
610{ 608{
611 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 609 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
612 int bound_if_index = dev_addr->bound_dev_if; 610 int bound_if_index = dev_addr->bound_dev_if;
611 const struct ib_gid_attr *sgid_attr;
613 int dev_type = dev_addr->dev_type; 612 int dev_type = dev_addr->dev_type;
614 struct net_device *ndev = NULL; 613 struct net_device *ndev = NULL;
615 int ret = -ENODEV;
616 614
617 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) 615 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
618 return ret; 616 return ERR_PTR(-ENODEV);
619 617
620 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) 618 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
621 return ret; 619 return ERR_PTR(-ENODEV);
622 620
623 if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { 621 if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
624 ndev = dev_get_by_index(dev_addr->net, bound_if_index); 622 ndev = dev_get_by_index(dev_addr->net, bound_if_index);
625 if (!ndev) 623 if (!ndev)
626 return ret; 624 return ERR_PTR(-ENODEV);
627 } else { 625 } else {
628 gid_type = IB_GID_TYPE_IB; 626 gid_type = IB_GID_TYPE_IB;
629 } 627 }
630 628
631 ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, 629 sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev);
632 ndev, NULL);
633
634 if (ndev) 630 if (ndev)
635 dev_put(ndev); 631 dev_put(ndev);
632 return sgid_attr;
633}
636 634
637 return ret; 635static void cma_bind_sgid_attr(struct rdma_id_private *id_priv,
636 const struct ib_gid_attr *sgid_attr)
637{
638 WARN_ON(id_priv->id.route.addr.dev_addr.sgid_attr);
639 id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr;
638} 640}
639 641
640static int cma_acquire_dev(struct rdma_id_private *id_priv, 642static int cma_acquire_dev(struct rdma_id_private *id_priv,
641 struct rdma_id_private *listen_id_priv) 643 const struct rdma_id_private *listen_id_priv)
642{ 644{
643 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 645 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
646 const struct ib_gid_attr *sgid_attr;
644 struct cma_device *cma_dev; 647 struct cma_device *cma_dev;
645 union ib_gid gid, iboe_gid, *gidp; 648 union ib_gid gid, iboe_gid, *gidp;
649 enum ib_gid_type gid_type;
646 int ret = -ENODEV; 650 int ret = -ENODEV;
647 u8 port; 651 u8 port;
648 652
@@ -662,14 +666,13 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
662 port = listen_id_priv->id.port_num; 666 port = listen_id_priv->id.port_num;
663 gidp = rdma_protocol_roce(cma_dev->device, port) ? 667 gidp = rdma_protocol_roce(cma_dev->device, port) ?
664 &iboe_gid : &gid; 668 &iboe_gid : &gid;
665 669 gid_type = listen_id_priv->gid_type;
666 ret = cma_validate_port(cma_dev->device, port, 670 sgid_attr = cma_validate_port(cma_dev->device, port,
667 rdma_protocol_ib(cma_dev->device, port) ? 671 gid_type, gidp, id_priv);
668 IB_GID_TYPE_IB : 672 if (!IS_ERR(sgid_attr)) {
669 listen_id_priv->gid_type, gidp,
670 id_priv);
671 if (!ret) {
672 id_priv->id.port_num = port; 673 id_priv->id.port_num = port;
674 cma_bind_sgid_attr(id_priv, sgid_attr);
675 ret = 0;
673 goto out; 676 goto out;
674 } 677 }
675 } 678 }
@@ -683,14 +686,13 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
683 686
684 gidp = rdma_protocol_roce(cma_dev->device, port) ? 687 gidp = rdma_protocol_roce(cma_dev->device, port) ?
685 &iboe_gid : &gid; 688 &iboe_gid : &gid;
686 689 gid_type = cma_dev->default_gid_type[port - 1];
687 ret = cma_validate_port(cma_dev->device, port, 690 sgid_attr = cma_validate_port(cma_dev->device, port,
688 rdma_protocol_ib(cma_dev->device, port) ? 691 gid_type, gidp, id_priv);
689 IB_GID_TYPE_IB : 692 if (!IS_ERR(sgid_attr)) {
690 cma_dev->default_gid_type[port - 1],
691 gidp, id_priv);
692 if (!ret) {
693 id_priv->id.port_num = port; 693 id_priv->id.port_num = port;
694 cma_bind_sgid_attr(id_priv, sgid_attr);
695 ret = 0;
694 goto out; 696 goto out;
695 } 697 }
696 } 698 }
@@ -732,8 +734,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
732 734
733 if (ib_get_cached_port_state(cur_dev->device, p, &port_state)) 735 if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
734 continue; 736 continue;
735 for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, 737 for (i = 0; !rdma_query_gid(cur_dev->device,
736 &gid, NULL); 738 p, i, &gid);
737 i++) { 739 i++) {
738 if (!memcmp(&gid, dgid, sizeof(gid))) { 740 if (!memcmp(&gid, dgid, sizeof(gid))) {
739 cma_dev = cur_dev; 741 cma_dev = cur_dev;
@@ -785,12 +787,14 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
785 id_priv->res.kern_name = caller; 787 id_priv->res.kern_name = caller;
786 else 788 else
787 rdma_restrack_set_task(&id_priv->res, current); 789 rdma_restrack_set_task(&id_priv->res, current);
790 id_priv->res.type = RDMA_RESTRACK_CM_ID;
788 id_priv->state = RDMA_CM_IDLE; 791 id_priv->state = RDMA_CM_IDLE;
789 id_priv->id.context = context; 792 id_priv->id.context = context;
790 id_priv->id.event_handler = event_handler; 793 id_priv->id.event_handler = event_handler;
791 id_priv->id.ps = ps; 794 id_priv->id.ps = ps;
792 id_priv->id.qp_type = qp_type; 795 id_priv->id.qp_type = qp_type;
793 id_priv->tos_set = false; 796 id_priv->tos_set = false;
797 id_priv->gid_type = IB_GID_TYPE_IB;
794 spin_lock_init(&id_priv->lock); 798 spin_lock_init(&id_priv->lock);
795 mutex_init(&id_priv->qp_mutex); 799 mutex_init(&id_priv->qp_mutex);
796 init_completion(&id_priv->comp); 800 init_completion(&id_priv->comp);
@@ -1036,35 +1040,38 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
1036} 1040}
1037EXPORT_SYMBOL(rdma_init_qp_attr); 1041EXPORT_SYMBOL(rdma_init_qp_attr);
1038 1042
1039static inline int cma_zero_addr(struct sockaddr *addr) 1043static inline bool cma_zero_addr(const struct sockaddr *addr)
1040{ 1044{
1041 switch (addr->sa_family) { 1045 switch (addr->sa_family) {
1042 case AF_INET: 1046 case AF_INET:
1043 return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); 1047 return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr);
1044 case AF_INET6: 1048 case AF_INET6:
1045 return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); 1049 return ipv6_addr_any(&((struct sockaddr_in6 *)addr)->sin6_addr);
1046 case AF_IB: 1050 case AF_IB:
1047 return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); 1051 return ib_addr_any(&((struct sockaddr_ib *)addr)->sib_addr);
1048 default: 1052 default:
1049 return 0; 1053 return false;
1050 } 1054 }
1051} 1055}
1052 1056
1053static inline int cma_loopback_addr(struct sockaddr *addr) 1057static inline bool cma_loopback_addr(const struct sockaddr *addr)
1054{ 1058{
1055 switch (addr->sa_family) { 1059 switch (addr->sa_family) {
1056 case AF_INET: 1060 case AF_INET:
1057 return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); 1061 return ipv4_is_loopback(
1062 ((struct sockaddr_in *)addr)->sin_addr.s_addr);
1058 case AF_INET6: 1063 case AF_INET6:
1059 return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); 1064 return ipv6_addr_loopback(
1065 &((struct sockaddr_in6 *)addr)->sin6_addr);
1060 case AF_IB: 1066 case AF_IB:
1061 return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); 1067 return ib_addr_loopback(
1068 &((struct sockaddr_ib *)addr)->sib_addr);
1062 default: 1069 default:
1063 return 0; 1070 return false;
1064 } 1071 }
1065} 1072}
1066 1073
1067static inline int cma_any_addr(struct sockaddr *addr) 1074static inline bool cma_any_addr(const struct sockaddr *addr)
1068{ 1075{
1069 return cma_zero_addr(addr) || cma_loopback_addr(addr); 1076 return cma_zero_addr(addr) || cma_loopback_addr(addr);
1070} 1077}
@@ -1087,7 +1094,7 @@ static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
1087 } 1094 }
1088} 1095}
1089 1096
1090static __be16 cma_port(struct sockaddr *addr) 1097static __be16 cma_port(const struct sockaddr *addr)
1091{ 1098{
1092 struct sockaddr_ib *sib; 1099 struct sockaddr_ib *sib;
1093 1100
@@ -1105,15 +1112,15 @@ static __be16 cma_port(struct sockaddr *addr)
1105 } 1112 }
1106} 1113}
1107 1114
1108static inline int cma_any_port(struct sockaddr *addr) 1115static inline int cma_any_port(const struct sockaddr *addr)
1109{ 1116{
1110 return !cma_port(addr); 1117 return !cma_port(addr);
1111} 1118}
1112 1119
1113static void cma_save_ib_info(struct sockaddr *src_addr, 1120static void cma_save_ib_info(struct sockaddr *src_addr,
1114 struct sockaddr *dst_addr, 1121 struct sockaddr *dst_addr,
1115 struct rdma_cm_id *listen_id, 1122 const struct rdma_cm_id *listen_id,
1116 struct sa_path_rec *path) 1123 const struct sa_path_rec *path)
1117{ 1124{
1118 struct sockaddr_ib *listen_ib, *ib; 1125 struct sockaddr_ib *listen_ib, *ib;
1119 1126
@@ -1198,7 +1205,7 @@ static u16 cma_port_from_service_id(__be64 service_id)
1198 1205
1199static int cma_save_ip_info(struct sockaddr *src_addr, 1206static int cma_save_ip_info(struct sockaddr *src_addr,
1200 struct sockaddr *dst_addr, 1207 struct sockaddr *dst_addr,
1201 struct ib_cm_event *ib_event, 1208 const struct ib_cm_event *ib_event,
1202 __be64 service_id) 1209 __be64 service_id)
1203{ 1210{
1204 struct cma_hdr *hdr; 1211 struct cma_hdr *hdr;
@@ -1228,8 +1235,8 @@ static int cma_save_ip_info(struct sockaddr *src_addr,
1228 1235
1229static int cma_save_net_info(struct sockaddr *src_addr, 1236static int cma_save_net_info(struct sockaddr *src_addr,
1230 struct sockaddr *dst_addr, 1237 struct sockaddr *dst_addr,
1231 struct rdma_cm_id *listen_id, 1238 const struct rdma_cm_id *listen_id,
1232 struct ib_cm_event *ib_event, 1239 const struct ib_cm_event *ib_event,
1233 sa_family_t sa_family, __be64 service_id) 1240 sa_family_t sa_family, __be64 service_id)
1234{ 1241{
1235 if (sa_family == AF_IB) { 1242 if (sa_family == AF_IB) {
@@ -1361,7 +1368,23 @@ static bool validate_net_dev(struct net_device *net_dev,
1361 } 1368 }
1362} 1369}
1363 1370
1364static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, 1371static struct net_device *
1372roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event)
1373{
1374 const struct ib_gid_attr *sgid_attr = NULL;
1375
1376 if (ib_event->event == IB_CM_REQ_RECEIVED)
1377 sgid_attr = ib_event->param.req_rcvd.ppath_sgid_attr;
1378 else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED)
1379 sgid_attr = ib_event->param.sidr_req_rcvd.sgid_attr;
1380
1381 if (!sgid_attr)
1382 return NULL;
1383 dev_hold(sgid_attr->ndev);
1384 return sgid_attr->ndev;
1385}
1386
1387static struct net_device *cma_get_net_dev(const struct ib_cm_event *ib_event,
1365 struct cma_req_info *req) 1388 struct cma_req_info *req)
1366{ 1389{
1367 struct sockaddr *listen_addr = 1390 struct sockaddr *listen_addr =
@@ -1376,8 +1399,12 @@ static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
1376 if (err) 1399 if (err)
1377 return ERR_PTR(err); 1400 return ERR_PTR(err);
1378 1401
1379 net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey, 1402 if (rdma_protocol_roce(req->device, req->port))
1380 gid, listen_addr); 1403 net_dev = roce_get_net_dev_by_cm_event(ib_event);
1404 else
1405 net_dev = ib_get_net_dev_by_params(req->device, req->port,
1406 req->pkey,
1407 gid, listen_addr);
1381 if (!net_dev) 1408 if (!net_dev)
1382 return ERR_PTR(-ENODEV); 1409 return ERR_PTR(-ENODEV);
1383 1410
@@ -1440,14 +1467,20 @@ static bool cma_match_net_dev(const struct rdma_cm_id *id,
1440 const struct rdma_addr *addr = &id->route.addr; 1467 const struct rdma_addr *addr = &id->route.addr;
1441 1468
1442 if (!net_dev) 1469 if (!net_dev)
1443 /* This request is an AF_IB request or a RoCE request */ 1470 /* This request is an AF_IB request */
1444 return (!id->port_num || id->port_num == port_num) && 1471 return (!id->port_num || id->port_num == port_num) &&
1445 (addr->src_addr.ss_family == AF_IB || 1472 (addr->src_addr.ss_family == AF_IB);
1446 rdma_protocol_roce(id->device, port_num));
1447 1473
1448 return !addr->dev_addr.bound_dev_if || 1474 /*
1449 (net_eq(dev_net(net_dev), addr->dev_addr.net) && 1475 * Net namespaces must match, and if the listner is listening
1450 addr->dev_addr.bound_dev_if == net_dev->ifindex); 1476 * on a specific netdevice than netdevice must match as well.
1477 */
1478 if (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
1479 (!!addr->dev_addr.bound_dev_if ==
1480 (addr->dev_addr.bound_dev_if == net_dev->ifindex)))
1481 return true;
1482 else
1483 return false;
1451} 1484}
1452 1485
1453static struct rdma_id_private *cma_find_listener( 1486static struct rdma_id_private *cma_find_listener(
@@ -1480,9 +1513,10 @@ static struct rdma_id_private *cma_find_listener(
1480 return ERR_PTR(-EINVAL); 1513 return ERR_PTR(-EINVAL);
1481} 1514}
1482 1515
1483static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, 1516static struct rdma_id_private *
1484 struct ib_cm_event *ib_event, 1517cma_ib_id_from_event(struct ib_cm_id *cm_id,
1485 struct net_device **net_dev) 1518 const struct ib_cm_event *ib_event,
1519 struct net_device **net_dev)
1486{ 1520{
1487 struct cma_req_info req; 1521 struct cma_req_info req;
1488 struct rdma_bind_list *bind_list; 1522 struct rdma_bind_list *bind_list;
@@ -1498,10 +1532,6 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
1498 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { 1532 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
1499 /* Assuming the protocol is AF_IB */ 1533 /* Assuming the protocol is AF_IB */
1500 *net_dev = NULL; 1534 *net_dev = NULL;
1501 } else if (rdma_protocol_roce(req.device, req.port)) {
1502 /* TODO find the net dev matching the request parameters
1503 * through the RoCE GID table */
1504 *net_dev = NULL;
1505 } else { 1535 } else {
1506 return ERR_CAST(*net_dev); 1536 return ERR_CAST(*net_dev);
1507 } 1537 }
@@ -1629,6 +1659,21 @@ static void cma_release_port(struct rdma_id_private *id_priv)
1629 mutex_unlock(&lock); 1659 mutex_unlock(&lock);
1630} 1660}
1631 1661
1662static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv,
1663 struct cma_multicast *mc)
1664{
1665 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
1666 struct net_device *ndev = NULL;
1667
1668 if (dev_addr->bound_dev_if)
1669 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
1670 if (ndev) {
1671 cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false);
1672 dev_put(ndev);
1673 }
1674 kref_put(&mc->mcref, release_mc);
1675}
1676
1632static void cma_leave_mc_groups(struct rdma_id_private *id_priv) 1677static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
1633{ 1678{
1634 struct cma_multicast *mc; 1679 struct cma_multicast *mc;
@@ -1642,22 +1687,7 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
1642 ib_sa_free_multicast(mc->multicast.ib); 1687 ib_sa_free_multicast(mc->multicast.ib);
1643 kfree(mc); 1688 kfree(mc);
1644 } else { 1689 } else {
1645 if (mc->igmp_joined) { 1690 cma_leave_roce_mc_group(id_priv, mc);
1646 struct rdma_dev_addr *dev_addr =
1647 &id_priv->id.route.addr.dev_addr;
1648 struct net_device *ndev = NULL;
1649
1650 if (dev_addr->bound_dev_if)
1651 ndev = dev_get_by_index(&init_net,
1652 dev_addr->bound_dev_if);
1653 if (ndev) {
1654 cma_igmp_send(ndev,
1655 &mc->multicast.ib->rec.mgid,
1656 false);
1657 dev_put(ndev);
1658 }
1659 }
1660 kref_put(&mc->mcref, release_mc);
1661 } 1691 }
1662 } 1692 }
1663} 1693}
@@ -1699,6 +1729,10 @@ void rdma_destroy_id(struct rdma_cm_id *id)
1699 cma_deref_id(id_priv->id.context); 1729 cma_deref_id(id_priv->id.context);
1700 1730
1701 kfree(id_priv->id.route.path_rec); 1731 kfree(id_priv->id.route.path_rec);
1732
1733 if (id_priv->id.route.addr.dev_addr.sgid_attr)
1734 rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
1735
1702 put_net(id_priv->id.route.addr.dev_addr.net); 1736 put_net(id_priv->id.route.addr.dev_addr.net);
1703 kfree(id_priv); 1737 kfree(id_priv);
1704} 1738}
@@ -1730,7 +1764,7 @@ reject:
1730} 1764}
1731 1765
1732static void cma_set_rep_event_data(struct rdma_cm_event *event, 1766static void cma_set_rep_event_data(struct rdma_cm_event *event,
1733 struct ib_cm_rep_event_param *rep_data, 1767 const struct ib_cm_rep_event_param *rep_data,
1734 void *private_data) 1768 void *private_data)
1735{ 1769{
1736 event->param.conn.private_data = private_data; 1770 event->param.conn.private_data = private_data;
@@ -1743,10 +1777,11 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event,
1743 event->param.conn.qp_num = rep_data->remote_qpn; 1777 event->param.conn.qp_num = rep_data->remote_qpn;
1744} 1778}
1745 1779
1746static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1780static int cma_ib_handler(struct ib_cm_id *cm_id,
1781 const struct ib_cm_event *ib_event)
1747{ 1782{
1748 struct rdma_id_private *id_priv = cm_id->context; 1783 struct rdma_id_private *id_priv = cm_id->context;
1749 struct rdma_cm_event event; 1784 struct rdma_cm_event event = {};
1750 int ret = 0; 1785 int ret = 0;
1751 1786
1752 mutex_lock(&id_priv->handler_mutex); 1787 mutex_lock(&id_priv->handler_mutex);
@@ -1756,7 +1791,6 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1756 id_priv->state != RDMA_CM_DISCONNECT)) 1791 id_priv->state != RDMA_CM_DISCONNECT))
1757 goto out; 1792 goto out;
1758 1793
1759 memset(&event, 0, sizeof event);
1760 switch (ib_event->event) { 1794 switch (ib_event->event) {
1761 case IB_CM_REQ_ERROR: 1795 case IB_CM_REQ_ERROR:
1762 case IB_CM_REP_ERROR: 1796 case IB_CM_REP_ERROR:
@@ -1825,9 +1859,10 @@ out:
1825 return ret; 1859 return ret;
1826} 1860}
1827 1861
1828static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, 1862static struct rdma_id_private *
1829 struct ib_cm_event *ib_event, 1863cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
1830 struct net_device *net_dev) 1864 const struct ib_cm_event *ib_event,
1865 struct net_device *net_dev)
1831{ 1866{
1832 struct rdma_id_private *listen_id_priv; 1867 struct rdma_id_private *listen_id_priv;
1833 struct rdma_id_private *id_priv; 1868 struct rdma_id_private *id_priv;
@@ -1888,11 +1923,12 @@ err:
1888 return NULL; 1923 return NULL;
1889} 1924}
1890 1925
1891static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, 1926static struct rdma_id_private *
1892 struct ib_cm_event *ib_event, 1927cma_ib_new_udp_id(const struct rdma_cm_id *listen_id,
1893 struct net_device *net_dev) 1928 const struct ib_cm_event *ib_event,
1929 struct net_device *net_dev)
1894{ 1930{
1895 struct rdma_id_private *listen_id_priv; 1931 const struct rdma_id_private *listen_id_priv;
1896 struct rdma_id_private *id_priv; 1932 struct rdma_id_private *id_priv;
1897 struct rdma_cm_id *id; 1933 struct rdma_cm_id *id;
1898 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 1934 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
@@ -1932,7 +1968,7 @@ err:
1932} 1968}
1933 1969
1934static void cma_set_req_event_data(struct rdma_cm_event *event, 1970static void cma_set_req_event_data(struct rdma_cm_event *event,
1935 struct ib_cm_req_event_param *req_data, 1971 const struct ib_cm_req_event_param *req_data,
1936 void *private_data, int offset) 1972 void *private_data, int offset)
1937{ 1973{
1938 event->param.conn.private_data = private_data + offset; 1974 event->param.conn.private_data = private_data + offset;
@@ -1946,7 +1982,8 @@ static void cma_set_req_event_data(struct rdma_cm_event *event,
1946 event->param.conn.qp_num = req_data->remote_qpn; 1982 event->param.conn.qp_num = req_data->remote_qpn;
1947} 1983}
1948 1984
1949static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) 1985static int cma_ib_check_req_qp_type(const struct rdma_cm_id *id,
1986 const struct ib_cm_event *ib_event)
1950{ 1987{
1951 return (((ib_event->event == IB_CM_REQ_RECEIVED) && 1988 return (((ib_event->event == IB_CM_REQ_RECEIVED) &&
1952 (ib_event->param.req_rcvd.qp_type == id->qp_type)) || 1989 (ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
@@ -1955,19 +1992,20 @@ static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_e
1955 (!id->qp_type)); 1992 (!id->qp_type));
1956} 1993}
1957 1994
1958static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1995static int cma_ib_req_handler(struct ib_cm_id *cm_id,
1996 const struct ib_cm_event *ib_event)
1959{ 1997{
1960 struct rdma_id_private *listen_id, *conn_id = NULL; 1998 struct rdma_id_private *listen_id, *conn_id = NULL;
1961 struct rdma_cm_event event; 1999 struct rdma_cm_event event = {};
1962 struct net_device *net_dev; 2000 struct net_device *net_dev;
1963 u8 offset; 2001 u8 offset;
1964 int ret; 2002 int ret;
1965 2003
1966 listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); 2004 listen_id = cma_ib_id_from_event(cm_id, ib_event, &net_dev);
1967 if (IS_ERR(listen_id)) 2005 if (IS_ERR(listen_id))
1968 return PTR_ERR(listen_id); 2006 return PTR_ERR(listen_id);
1969 2007
1970 if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { 2008 if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) {
1971 ret = -EINVAL; 2009 ret = -EINVAL;
1972 goto net_dev_put; 2010 goto net_dev_put;
1973 } 2011 }
@@ -1978,16 +2016,15 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1978 goto err1; 2016 goto err1;
1979 } 2017 }
1980 2018
1981 memset(&event, 0, sizeof event);
1982 offset = cma_user_data_offset(listen_id); 2019 offset = cma_user_data_offset(listen_id);
1983 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 2020 event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1984 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { 2021 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
1985 conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); 2022 conn_id = cma_ib_new_udp_id(&listen_id->id, ib_event, net_dev);
1986 event.param.ud.private_data = ib_event->private_data + offset; 2023 event.param.ud.private_data = ib_event->private_data + offset;
1987 event.param.ud.private_data_len = 2024 event.param.ud.private_data_len =
1988 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; 2025 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
1989 } else { 2026 } else {
1990 conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); 2027 conn_id = cma_ib_new_conn_id(&listen_id->id, ib_event, net_dev);
1991 cma_set_req_event_data(&event, &ib_event->param.req_rcvd, 2028 cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
1992 ib_event->private_data, offset); 2029 ib_event->private_data, offset);
1993 } 2030 }
@@ -2087,7 +2124,7 @@ EXPORT_SYMBOL(rdma_read_gids);
2087static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) 2124static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
2088{ 2125{
2089 struct rdma_id_private *id_priv = iw_id->context; 2126 struct rdma_id_private *id_priv = iw_id->context;
2090 struct rdma_cm_event event; 2127 struct rdma_cm_event event = {};
2091 int ret = 0; 2128 int ret = 0;
2092 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2129 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
2093 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2130 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
@@ -2096,7 +2133,6 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
2096 if (id_priv->state != RDMA_CM_CONNECT) 2133 if (id_priv->state != RDMA_CM_CONNECT)
2097 goto out; 2134 goto out;
2098 2135
2099 memset(&event, 0, sizeof event);
2100 switch (iw_event->event) { 2136 switch (iw_event->event) {
2101 case IW_CM_EVENT_CLOSE: 2137 case IW_CM_EVENT_CLOSE:
2102 event.event = RDMA_CM_EVENT_DISCONNECTED; 2138 event.event = RDMA_CM_EVENT_DISCONNECTED;
@@ -2156,11 +2192,17 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
2156{ 2192{
2157 struct rdma_cm_id *new_cm_id; 2193 struct rdma_cm_id *new_cm_id;
2158 struct rdma_id_private *listen_id, *conn_id; 2194 struct rdma_id_private *listen_id, *conn_id;
2159 struct rdma_cm_event event; 2195 struct rdma_cm_event event = {};
2160 int ret = -ECONNABORTED; 2196 int ret = -ECONNABORTED;
2161 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2197 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
2162 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2198 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
2163 2199
2200 event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
2201 event.param.conn.private_data = iw_event->private_data;
2202 event.param.conn.private_data_len = iw_event->private_data_len;
2203 event.param.conn.initiator_depth = iw_event->ird;
2204 event.param.conn.responder_resources = iw_event->ord;
2205
2164 listen_id = cm_id->context; 2206 listen_id = cm_id->context;
2165 2207
2166 mutex_lock(&listen_id->handler_mutex); 2208 mutex_lock(&listen_id->handler_mutex);
@@ -2202,13 +2244,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
2202 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); 2244 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
2203 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); 2245 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
2204 2246
2205 memset(&event, 0, sizeof event);
2206 event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
2207 event.param.conn.private_data = iw_event->private_data;
2208 event.param.conn.private_data_len = iw_event->private_data_len;
2209 event.param.conn.initiator_depth = iw_event->ird;
2210 event.param.conn.responder_resources = iw_event->ord;
2211
2212 /* 2247 /*
2213 * Protect against the user destroying conn_id from another thread 2248 * Protect against the user destroying conn_id from another thread
2214 * until we're done accessing it. 2249 * until we're done accessing it.
@@ -2241,7 +2276,8 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
2241 2276
2242 addr = cma_src_addr(id_priv); 2277 addr = cma_src_addr(id_priv);
2243 svc_id = rdma_get_service_id(&id_priv->id, addr); 2278 svc_id = rdma_get_service_id(&id_priv->id, addr);
2244 id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); 2279 id = ib_cm_insert_listen(id_priv->id.device,
2280 cma_ib_req_handler, svc_id);
2245 if (IS_ERR(id)) 2281 if (IS_ERR(id))
2246 return PTR_ERR(id); 2282 return PTR_ERR(id);
2247 id_priv->cm_id.ib = id; 2283 id_priv->cm_id.ib = id;
@@ -2561,8 +2597,6 @@ cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv)
2561 route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type); 2597 route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
2562 2598
2563 route->path_rec->roce.route_resolved = true; 2599 route->path_rec->roce.route_resolved = true;
2564 sa_path_set_ndev(route->path_rec, addr->dev_addr.net);
2565 sa_path_set_ifindex(route->path_rec, ndev->ifindex);
2566 sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr); 2600 sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
2567 return ndev; 2601 return ndev;
2568} 2602}
@@ -2791,7 +2825,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
2791 p = 1; 2825 p = 1;
2792 2826
2793port_found: 2827port_found:
2794 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); 2828 ret = rdma_query_gid(cma_dev->device, p, 0, &gid);
2795 if (ret) 2829 if (ret)
2796 goto out; 2830 goto out;
2797 2831
@@ -2817,9 +2851,8 @@ static void addr_handler(int status, struct sockaddr *src_addr,
2817 struct rdma_dev_addr *dev_addr, void *context) 2851 struct rdma_dev_addr *dev_addr, void *context)
2818{ 2852{
2819 struct rdma_id_private *id_priv = context; 2853 struct rdma_id_private *id_priv = context;
2820 struct rdma_cm_event event; 2854 struct rdma_cm_event event = {};
2821 2855
2822 memset(&event, 0, sizeof event);
2823 mutex_lock(&id_priv->handler_mutex); 2856 mutex_lock(&id_priv->handler_mutex);
2824 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, 2857 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY,
2825 RDMA_CM_ADDR_RESOLVED)) 2858 RDMA_CM_ADDR_RESOLVED))
@@ -2910,7 +2943,7 @@ err:
2910} 2943}
2911 2944
2912static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 2945static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2913 struct sockaddr *dst_addr) 2946 const struct sockaddr *dst_addr)
2914{ 2947{
2915 if (!src_addr || !src_addr->sa_family) { 2948 if (!src_addr || !src_addr->sa_family) {
2916 src_addr = (struct sockaddr *) &id->route.addr.src_addr; 2949 src_addr = (struct sockaddr *) &id->route.addr.src_addr;
@@ -2931,31 +2964,25 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2931} 2964}
2932 2965
2933int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 2966int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2934 struct sockaddr *dst_addr, int timeout_ms) 2967 const struct sockaddr *dst_addr, int timeout_ms)
2935{ 2968{
2936 struct rdma_id_private *id_priv; 2969 struct rdma_id_private *id_priv;
2937 int ret; 2970 int ret;
2938 2971
2939 id_priv = container_of(id, struct rdma_id_private, id); 2972 id_priv = container_of(id, struct rdma_id_private, id);
2940 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
2941 if (id_priv->state == RDMA_CM_IDLE) { 2973 if (id_priv->state == RDMA_CM_IDLE) {
2942 ret = cma_bind_addr(id, src_addr, dst_addr); 2974 ret = cma_bind_addr(id, src_addr, dst_addr);
2943 if (ret) { 2975 if (ret)
2944 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
2945 return ret; 2976 return ret;
2946 }
2947 } 2977 }
2948 2978
2949 if (cma_family(id_priv) != dst_addr->sa_family) { 2979 if (cma_family(id_priv) != dst_addr->sa_family)
2950 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
2951 return -EINVAL; 2980 return -EINVAL;
2952 }
2953 2981
2954 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { 2982 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
2955 memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
2956 return -EINVAL; 2983 return -EINVAL;
2957 }
2958 2984
2985 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
2959 atomic_inc(&id_priv->refcount); 2986 atomic_inc(&id_priv->refcount);
2960 if (cma_any_addr(dst_addr)) { 2987 if (cma_any_addr(dst_addr)) {
2961 ret = cma_resolve_loopback(id_priv); 2988 ret = cma_resolve_loopback(id_priv);
@@ -3451,18 +3478,18 @@ static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
3451} 3478}
3452 3479
3453static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, 3480static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
3454 struct ib_cm_event *ib_event) 3481 const struct ib_cm_event *ib_event)
3455{ 3482{
3456 struct rdma_id_private *id_priv = cm_id->context; 3483 struct rdma_id_private *id_priv = cm_id->context;
3457 struct rdma_cm_event event; 3484 struct rdma_cm_event event = {};
3458 struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; 3485 const struct ib_cm_sidr_rep_event_param *rep =
3486 &ib_event->param.sidr_rep_rcvd;
3459 int ret = 0; 3487 int ret = 0;
3460 3488
3461 mutex_lock(&id_priv->handler_mutex); 3489 mutex_lock(&id_priv->handler_mutex);
3462 if (id_priv->state != RDMA_CM_CONNECT) 3490 if (id_priv->state != RDMA_CM_CONNECT)
3463 goto out; 3491 goto out;
3464 3492
3465 memset(&event, 0, sizeof event);
3466 switch (ib_event->event) { 3493 switch (ib_event->event) {
3467 case IB_CM_SIDR_REQ_ERROR: 3494 case IB_CM_SIDR_REQ_ERROR:
3468 event.event = RDMA_CM_EVENT_UNREACHABLE; 3495 event.event = RDMA_CM_EVENT_UNREACHABLE;
@@ -3488,7 +3515,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
3488 ib_init_ah_attr_from_path(id_priv->id.device, 3515 ib_init_ah_attr_from_path(id_priv->id.device,
3489 id_priv->id.port_num, 3516 id_priv->id.port_num,
3490 id_priv->id.route.path_rec, 3517 id_priv->id.route.path_rec,
3491 &event.param.ud.ah_attr); 3518 &event.param.ud.ah_attr,
3519 rep->sgid_attr);
3492 event.param.ud.qp_num = rep->qpn; 3520 event.param.ud.qp_num = rep->qpn;
3493 event.param.ud.qkey = rep->qkey; 3521 event.param.ud.qkey = rep->qkey;
3494 event.event = RDMA_CM_EVENT_ESTABLISHED; 3522 event.event = RDMA_CM_EVENT_ESTABLISHED;
@@ -3501,6 +3529,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
3501 } 3529 }
3502 3530
3503 ret = id_priv->id.event_handler(&id_priv->id, &event); 3531 ret = id_priv->id.event_handler(&id_priv->id, &event);
3532
3533 rdma_destroy_ah_attr(&event.param.ud.ah_attr);
3504 if (ret) { 3534 if (ret) {
3505 /* Destroy the CM ID by returning a non-zero value. */ 3535 /* Destroy the CM ID by returning a non-zero value. */
3506 id_priv->cm_id.ib = NULL; 3536 id_priv->cm_id.ib = NULL;
@@ -3557,6 +3587,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
3557 id_priv->cm_id.ib = id; 3587 id_priv->cm_id.ib = id;
3558 3588
3559 req.path = id_priv->id.route.path_rec; 3589 req.path = id_priv->id.route.path_rec;
3590 req.sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
3560 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3591 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
3561 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); 3592 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
3562 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3593 req.max_cm_retries = CMA_MAX_CM_RETRIES;
@@ -3618,6 +3649,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
3618 if (route->num_paths == 2) 3649 if (route->num_paths == 2)
3619 req.alternate_path = &route->path_rec[1]; 3650 req.alternate_path = &route->path_rec[1];
3620 3651
3652 req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
3653 /* Alternate path SGID attribute currently unsupported */
3621 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3654 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
3622 req.qp_num = id_priv->qp_num; 3655 req.qp_num = id_priv->qp_num;
3623 req.qp_type = id_priv->id.qp_type; 3656 req.qp_type = id_priv->id.qp_type;
@@ -3928,7 +3961,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
3928{ 3961{
3929 struct rdma_id_private *id_priv; 3962 struct rdma_id_private *id_priv;
3930 struct cma_multicast *mc = multicast->context; 3963 struct cma_multicast *mc = multicast->context;
3931 struct rdma_cm_event event; 3964 struct rdma_cm_event event = {};
3932 int ret = 0; 3965 int ret = 0;
3933 3966
3934 id_priv = mc->id_priv; 3967 id_priv = mc->id_priv;
@@ -3952,7 +3985,6 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
3952 } 3985 }
3953 mutex_unlock(&id_priv->qp_mutex); 3986 mutex_unlock(&id_priv->qp_mutex);
3954 3987
3955 memset(&event, 0, sizeof event);
3956 event.status = status; 3988 event.status = status;
3957 event.param.ud.private_data = mc->context; 3989 event.param.ud.private_data = mc->context;
3958 if (!status) { 3990 if (!status) {
@@ -3981,6 +4013,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
3981 event.event = RDMA_CM_EVENT_MULTICAST_ERROR; 4013 event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
3982 4014
3983 ret = id_priv->id.event_handler(&id_priv->id, &event); 4015 ret = id_priv->id.event_handler(&id_priv->id, &event);
4016
4017 rdma_destroy_ah_attr(&event.param.ud.ah_attr);
3984 if (ret) { 4018 if (ret) {
3985 cma_exch(id_priv, RDMA_CM_DESTROYING); 4019 cma_exch(id_priv, RDMA_CM_DESTROYING);
3986 mutex_unlock(&id_priv->handler_mutex); 4020 mutex_unlock(&id_priv->handler_mutex);
@@ -4010,7 +4044,7 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
4010 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 4044 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
4011 } else if (addr->sa_family == AF_IB) { 4045 } else if (addr->sa_family == AF_IB) {
4012 memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); 4046 memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid);
4013 } else if ((addr->sa_family == AF_INET6)) { 4047 } else if (addr->sa_family == AF_INET6) {
4014 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); 4048 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
4015 if (id_priv->id.ps == RDMA_PS_UDP) 4049 if (id_priv->id.ps == RDMA_PS_UDP)
4016 mc_map[7] = 0x01; /* Use RDMA CM signature */ 4050 mc_map[7] = 0x01; /* Use RDMA CM signature */
@@ -4168,8 +4202,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
4168 if (!send_only) { 4202 if (!send_only) {
4169 err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, 4203 err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
4170 true); 4204 true);
4171 if (!err)
4172 mc->igmp_joined = true;
4173 } 4205 }
4174 } 4206 }
4175 } else { 4207 } else {
@@ -4221,26 +4253,29 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
4221 memcpy(&mc->addr, addr, rdma_addr_size(addr)); 4253 memcpy(&mc->addr, addr, rdma_addr_size(addr));
4222 mc->context = context; 4254 mc->context = context;
4223 mc->id_priv = id_priv; 4255 mc->id_priv = id_priv;
4224 mc->igmp_joined = false;
4225 mc->join_state = join_state; 4256 mc->join_state = join_state;
4226 spin_lock(&id_priv->lock);
4227 list_add(&mc->list, &id_priv->mc_list);
4228 spin_unlock(&id_priv->lock);
4229 4257
4230 if (rdma_protocol_roce(id->device, id->port_num)) { 4258 if (rdma_protocol_roce(id->device, id->port_num)) {
4231 kref_init(&mc->mcref); 4259 kref_init(&mc->mcref);
4232 ret = cma_iboe_join_multicast(id_priv, mc); 4260 ret = cma_iboe_join_multicast(id_priv, mc);
4233 } else if (rdma_cap_ib_mcast(id->device, id->port_num)) 4261 if (ret)
4262 goto out_err;
4263 } else if (rdma_cap_ib_mcast(id->device, id->port_num)) {
4234 ret = cma_join_ib_multicast(id_priv, mc); 4264 ret = cma_join_ib_multicast(id_priv, mc);
4235 else 4265 if (ret)
4266 goto out_err;
4267 } else {
4236 ret = -ENOSYS; 4268 ret = -ENOSYS;
4237 4269 goto out_err;
4238 if (ret) {
4239 spin_lock_irq(&id_priv->lock);
4240 list_del(&mc->list);
4241 spin_unlock_irq(&id_priv->lock);
4242 kfree(mc);
4243 } 4270 }
4271
4272 spin_lock(&id_priv->lock);
4273 list_add(&mc->list, &id_priv->mc_list);
4274 spin_unlock(&id_priv->lock);
4275
4276 return 0;
4277out_err:
4278 kfree(mc);
4244 return ret; 4279 return ret;
4245} 4280}
4246EXPORT_SYMBOL(rdma_join_multicast); 4281EXPORT_SYMBOL(rdma_join_multicast);
@@ -4268,23 +4303,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
4268 ib_sa_free_multicast(mc->multicast.ib); 4303 ib_sa_free_multicast(mc->multicast.ib);
4269 kfree(mc); 4304 kfree(mc);
4270 } else if (rdma_protocol_roce(id->device, id->port_num)) { 4305 } else if (rdma_protocol_roce(id->device, id->port_num)) {
4271 if (mc->igmp_joined) { 4306 cma_leave_roce_mc_group(id_priv, mc);
4272 struct rdma_dev_addr *dev_addr =
4273 &id->route.addr.dev_addr;
4274 struct net_device *ndev = NULL;
4275
4276 if (dev_addr->bound_dev_if)
4277 ndev = dev_get_by_index(dev_addr->net,
4278 dev_addr->bound_dev_if);
4279 if (ndev) {
4280 cma_igmp_send(ndev,
4281 &mc->multicast.ib->rec.mgid,
4282 false);
4283 dev_put(ndev);
4284 }
4285 mc->igmp_joined = false;
4286 }
4287 kref_put(&mc->mcref, release_mc);
4288 } 4307 }
4289 return; 4308 return;
4290 } 4309 }
@@ -4410,7 +4429,7 @@ free_cma_dev:
4410 4429
4411static int cma_remove_id_dev(struct rdma_id_private *id_priv) 4430static int cma_remove_id_dev(struct rdma_id_private *id_priv)
4412{ 4431{
4413 struct rdma_cm_event event; 4432 struct rdma_cm_event event = {};
4414 enum rdma_cm_state state; 4433 enum rdma_cm_state state;
4415 int ret = 0; 4434 int ret = 0;
4416 4435
@@ -4426,7 +4445,6 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv)
4426 if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) 4445 if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
4427 goto out; 4446 goto out;
4428 4447
4429 memset(&event, 0, sizeof event);
4430 event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; 4448 event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
4431 ret = id_priv->id.event_handler(&id_priv->id, &event); 4449 ret = id_priv->id.event_handler(&id_priv->id, &event);
4432out: 4450out:
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index fae417a391fb..77c7005c396c 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -91,8 +91,8 @@ void ib_device_unregister_sysfs(struct ib_device *device);
91typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, 91typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
92 struct net_device *idev, void *cookie); 92 struct net_device *idev, void *cookie);
93 93
94typedef int (*roce_netdev_filter)(struct ib_device *device, u8 port, 94typedef bool (*roce_netdev_filter)(struct ib_device *device, u8 port,
95 struct net_device *idev, void *cookie); 95 struct net_device *idev, void *cookie);
96 96
97void ib_enum_roce_netdev(struct ib_device *ib_dev, 97void ib_enum_roce_netdev(struct ib_device *ib_dev,
98 roce_netdev_filter filter, 98 roce_netdev_filter filter,
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 6fa4c59dc7a7..db3b6271f09d 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -105,8 +105,6 @@ static int ib_device_check_mandatory(struct ib_device *device)
105 IB_MANDATORY_FUNC(query_pkey), 105 IB_MANDATORY_FUNC(query_pkey),
106 IB_MANDATORY_FUNC(alloc_pd), 106 IB_MANDATORY_FUNC(alloc_pd),
107 IB_MANDATORY_FUNC(dealloc_pd), 107 IB_MANDATORY_FUNC(dealloc_pd),
108 IB_MANDATORY_FUNC(create_ah),
109 IB_MANDATORY_FUNC(destroy_ah),
110 IB_MANDATORY_FUNC(create_qp), 108 IB_MANDATORY_FUNC(create_qp),
111 IB_MANDATORY_FUNC(modify_qp), 109 IB_MANDATORY_FUNC(modify_qp),
112 IB_MANDATORY_FUNC(destroy_qp), 110 IB_MANDATORY_FUNC(destroy_qp),
@@ -862,25 +860,6 @@ int ib_query_port(struct ib_device *device,
862EXPORT_SYMBOL(ib_query_port); 860EXPORT_SYMBOL(ib_query_port);
863 861
864/** 862/**
865 * ib_query_gid - Get GID table entry
866 * @device:Device to query
867 * @port_num:Port number to query
868 * @index:GID table index to query
869 * @gid:Returned GID
870 * @attr: Returned GID attributes related to this GID index (only in RoCE).
871 * NULL means ignore.
872 *
873 * ib_query_gid() fetches the specified GID table entry from the cache.
874 */
875int ib_query_gid(struct ib_device *device,
876 u8 port_num, int index, union ib_gid *gid,
877 struct ib_gid_attr *attr)
878{
879 return ib_get_cached_gid(device, port_num, index, gid, attr);
880}
881EXPORT_SYMBOL(ib_query_gid);
882
883/**
884 * ib_enum_roce_netdev - enumerate all RoCE ports 863 * ib_enum_roce_netdev - enumerate all RoCE ports
885 * @ib_dev : IB device we want to query 864 * @ib_dev : IB device we want to query
886 * @filter: Should we call the callback? 865 * @filter: Should we call the callback?
@@ -1057,7 +1036,7 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid,
1057 continue; 1036 continue;
1058 1037
1059 for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { 1038 for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
1060 ret = ib_query_gid(device, port, i, &tmp_gid, NULL); 1039 ret = rdma_query_gid(device, port, i, &tmp_gid);
1061 if (ret) 1040 if (ret)
1062 return ret; 1041 return ret;
1063 if (!memcmp(&tmp_gid, gid, sizeof *gid)) { 1042 if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index f742ae7a768b..ef459f2f2eeb 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -38,6 +38,7 @@
38#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 38#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
39 39
40#include <linux/dma-mapping.h> 40#include <linux/dma-mapping.h>
41#include <linux/idr.h>
41#include <linux/slab.h> 42#include <linux/slab.h>
42#include <linux/module.h> 43#include <linux/module.h>
43#include <linux/security.h> 44#include <linux/security.h>
@@ -58,8 +59,13 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests
58module_param_named(recv_queue_size, mad_recvq_size, int, 0444); 59module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
59MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); 60MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
60 61
62/*
63 * The mlx4 driver uses the top byte to distinguish which virtual function
64 * generated the MAD, so we must avoid using it.
65 */
66#define AGENT_ID_LIMIT (1 << 24)
67static DEFINE_IDR(ib_mad_clients);
61static struct list_head ib_mad_port_list; 68static struct list_head ib_mad_port_list;
62static atomic_t ib_mad_client_id = ATOMIC_INIT(0);
63 69
64/* Port list lock */ 70/* Port list lock */
65static DEFINE_SPINLOCK(ib_mad_port_list_lock); 71static DEFINE_SPINLOCK(ib_mad_port_list_lock);
@@ -190,6 +196,8 @@ EXPORT_SYMBOL(ib_response_mad);
190 196
191/* 197/*
192 * ib_register_mad_agent - Register to send/receive MADs 198 * ib_register_mad_agent - Register to send/receive MADs
199 *
200 * Context: Process context.
193 */ 201 */
194struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, 202struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
195 u8 port_num, 203 u8 port_num,
@@ -210,7 +218,6 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
210 struct ib_mad_mgmt_vendor_class *vendor_class; 218 struct ib_mad_mgmt_vendor_class *vendor_class;
211 struct ib_mad_mgmt_method_table *method; 219 struct ib_mad_mgmt_method_table *method;
212 int ret2, qpn; 220 int ret2, qpn;
213 unsigned long flags;
214 u8 mgmt_class, vclass; 221 u8 mgmt_class, vclass;
215 222
216 /* Validate parameters */ 223 /* Validate parameters */
@@ -376,13 +383,24 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
376 goto error4; 383 goto error4;
377 } 384 }
378 385
379 spin_lock_irqsave(&port_priv->reg_lock, flags); 386 idr_preload(GFP_KERNEL);
380 mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id); 387 idr_lock(&ib_mad_clients);
388 ret2 = idr_alloc_cyclic(&ib_mad_clients, mad_agent_priv, 0,
389 AGENT_ID_LIMIT, GFP_ATOMIC);
390 idr_unlock(&ib_mad_clients);
391 idr_preload_end();
392
393 if (ret2 < 0) {
394 ret = ERR_PTR(ret2);
395 goto error5;
396 }
397 mad_agent_priv->agent.hi_tid = ret2;
381 398
382 /* 399 /*
383 * Make sure MAD registration (if supplied) 400 * Make sure MAD registration (if supplied)
384 * is non overlapping with any existing ones 401 * is non overlapping with any existing ones
385 */ 402 */
403 spin_lock_irq(&port_priv->reg_lock);
386 if (mad_reg_req) { 404 if (mad_reg_req) {
387 mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class); 405 mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
388 if (!is_vendor_class(mgmt_class)) { 406 if (!is_vendor_class(mgmt_class)) {
@@ -393,7 +411,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
393 if (method) { 411 if (method) {
394 if (method_in_use(&method, 412 if (method_in_use(&method,
395 mad_reg_req)) 413 mad_reg_req))
396 goto error5; 414 goto error6;
397 } 415 }
398 } 416 }
399 ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv, 417 ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
@@ -409,24 +427,25 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
409 if (is_vendor_method_in_use( 427 if (is_vendor_method_in_use(
410 vendor_class, 428 vendor_class,
411 mad_reg_req)) 429 mad_reg_req))
412 goto error5; 430 goto error6;
413 } 431 }
414 } 432 }
415 ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv); 433 ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
416 } 434 }
417 if (ret2) { 435 if (ret2) {
418 ret = ERR_PTR(ret2); 436 ret = ERR_PTR(ret2);
419 goto error5; 437 goto error6;
420 } 438 }
421 } 439 }
422 440 spin_unlock_irq(&port_priv->reg_lock);
423 /* Add mad agent into port's agent list */
424 list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list);
425 spin_unlock_irqrestore(&port_priv->reg_lock, flags);
426 441
427 return &mad_agent_priv->agent; 442 return &mad_agent_priv->agent;
443error6:
444 spin_unlock_irq(&port_priv->reg_lock);
445 idr_lock(&ib_mad_clients);
446 idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
447 idr_unlock(&ib_mad_clients);
428error5: 448error5:
429 spin_unlock_irqrestore(&port_priv->reg_lock, flags);
430 ib_mad_agent_security_cleanup(&mad_agent_priv->agent); 449 ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
431error4: 450error4:
432 kfree(reg_req); 451 kfree(reg_req);
@@ -575,7 +594,6 @@ static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv
575static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) 594static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
576{ 595{
577 struct ib_mad_port_private *port_priv; 596 struct ib_mad_port_private *port_priv;
578 unsigned long flags;
579 597
580 /* Note that we could still be handling received MADs */ 598 /* Note that we could still be handling received MADs */
581 599
@@ -587,10 +605,12 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
587 port_priv = mad_agent_priv->qp_info->port_priv; 605 port_priv = mad_agent_priv->qp_info->port_priv;
588 cancel_delayed_work(&mad_agent_priv->timed_work); 606 cancel_delayed_work(&mad_agent_priv->timed_work);
589 607
590 spin_lock_irqsave(&port_priv->reg_lock, flags); 608 spin_lock_irq(&port_priv->reg_lock);
591 remove_mad_reg_req(mad_agent_priv); 609 remove_mad_reg_req(mad_agent_priv);
592 list_del(&mad_agent_priv->agent_list); 610 spin_unlock_irq(&port_priv->reg_lock);
593 spin_unlock_irqrestore(&port_priv->reg_lock, flags); 611 idr_lock(&ib_mad_clients);
612 idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
613 idr_unlock(&ib_mad_clients);
594 614
595 flush_workqueue(port_priv->wq); 615 flush_workqueue(port_priv->wq);
596 ib_cancel_rmpp_recvs(mad_agent_priv); 616 ib_cancel_rmpp_recvs(mad_agent_priv);
@@ -601,7 +621,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
601 ib_mad_agent_security_cleanup(&mad_agent_priv->agent); 621 ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
602 622
603 kfree(mad_agent_priv->reg_req); 623 kfree(mad_agent_priv->reg_req);
604 kfree(mad_agent_priv); 624 kfree_rcu(mad_agent_priv, rcu);
605} 625}
606 626
607static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) 627static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
@@ -625,6 +645,8 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
625 645
626/* 646/*
627 * ib_unregister_mad_agent - Unregisters a client from using MAD services 647 * ib_unregister_mad_agent - Unregisters a client from using MAD services
648 *
649 * Context: Process context.
628 */ 650 */
629void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) 651void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
630{ 652{
@@ -1159,7 +1181,6 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
1159{ 1181{
1160 struct ib_mad_qp_info *qp_info; 1182 struct ib_mad_qp_info *qp_info;
1161 struct list_head *list; 1183 struct list_head *list;
1162 struct ib_send_wr *bad_send_wr;
1163 struct ib_mad_agent *mad_agent; 1184 struct ib_mad_agent *mad_agent;
1164 struct ib_sge *sge; 1185 struct ib_sge *sge;
1165 unsigned long flags; 1186 unsigned long flags;
@@ -1197,7 +1218,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
1197 spin_lock_irqsave(&qp_info->send_queue.lock, flags); 1218 spin_lock_irqsave(&qp_info->send_queue.lock, flags);
1198 if (qp_info->send_queue.count < qp_info->send_queue.max_active) { 1219 if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
1199 ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr, 1220 ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr,
1200 &bad_send_wr); 1221 NULL);
1201 list = &qp_info->send_queue.list; 1222 list = &qp_info->send_queue.list;
1202 } else { 1223 } else {
1203 ret = 0; 1224 ret = 0;
@@ -1720,22 +1741,19 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
1720 struct ib_mad_agent_private *mad_agent = NULL; 1741 struct ib_mad_agent_private *mad_agent = NULL;
1721 unsigned long flags; 1742 unsigned long flags;
1722 1743
1723 spin_lock_irqsave(&port_priv->reg_lock, flags);
1724 if (ib_response_mad(mad_hdr)) { 1744 if (ib_response_mad(mad_hdr)) {
1725 u32 hi_tid; 1745 u32 hi_tid;
1726 struct ib_mad_agent_private *entry;
1727 1746
1728 /* 1747 /*
1729 * Routing is based on high 32 bits of transaction ID 1748 * Routing is based on high 32 bits of transaction ID
1730 * of MAD. 1749 * of MAD.
1731 */ 1750 */
1732 hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; 1751 hi_tid = be64_to_cpu(mad_hdr->tid) >> 32;
1733 list_for_each_entry(entry, &port_priv->agent_list, agent_list) { 1752 rcu_read_lock();
1734 if (entry->agent.hi_tid == hi_tid) { 1753 mad_agent = idr_find(&ib_mad_clients, hi_tid);
1735 mad_agent = entry; 1754 if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount))
1736 break; 1755 mad_agent = NULL;
1737 } 1756 rcu_read_unlock();
1738 }
1739 } else { 1757 } else {
1740 struct ib_mad_mgmt_class_table *class; 1758 struct ib_mad_mgmt_class_table *class;
1741 struct ib_mad_mgmt_method_table *method; 1759 struct ib_mad_mgmt_method_table *method;
@@ -1744,6 +1762,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
1744 const struct ib_vendor_mad *vendor_mad; 1762 const struct ib_vendor_mad *vendor_mad;
1745 int index; 1763 int index;
1746 1764
1765 spin_lock_irqsave(&port_priv->reg_lock, flags);
1747 /* 1766 /*
1748 * Routing is based on version, class, and method 1767 * Routing is based on version, class, and method
1749 * For "newer" vendor MADs, also based on OUI 1768 * For "newer" vendor MADs, also based on OUI
@@ -1783,20 +1802,19 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
1783 ~IB_MGMT_METHOD_RESP]; 1802 ~IB_MGMT_METHOD_RESP];
1784 } 1803 }
1785 } 1804 }
1805 if (mad_agent)
1806 atomic_inc(&mad_agent->refcount);
1807out:
1808 spin_unlock_irqrestore(&port_priv->reg_lock, flags);
1786 } 1809 }
1787 1810
1788 if (mad_agent) { 1811 if (mad_agent && !mad_agent->agent.recv_handler) {
1789 if (mad_agent->agent.recv_handler) 1812 dev_notice(&port_priv->device->dev,
1790 atomic_inc(&mad_agent->refcount); 1813 "No receive handler for client %p on port %d\n",
1791 else { 1814 &mad_agent->agent, port_priv->port_num);
1792 dev_notice(&port_priv->device->dev, 1815 deref_mad_agent(mad_agent);
1793 "No receive handler for client %p on port %d\n", 1816 mad_agent = NULL;
1794 &mad_agent->agent, port_priv->port_num);
1795 mad_agent = NULL;
1796 }
1797 } 1817 }
1798out:
1799 spin_unlock_irqrestore(&port_priv->reg_lock, flags);
1800 1818
1801 return mad_agent; 1819 return mad_agent;
1802} 1820}
@@ -1896,8 +1914,8 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
1896 const struct ib_global_route *grh = 1914 const struct ib_global_route *grh =
1897 rdma_ah_read_grh(&attr); 1915 rdma_ah_read_grh(&attr);
1898 1916
1899 if (ib_get_cached_gid(device, port_num, 1917 if (rdma_query_gid(device, port_num,
1900 grh->sgid_index, &sgid, NULL)) 1918 grh->sgid_index, &sgid))
1901 return 0; 1919 return 0;
1902 return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, 1920 return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
1903 16); 1921 16);
@@ -2457,7 +2475,6 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
2457 struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr; 2475 struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr;
2458 struct ib_mad_qp_info *qp_info; 2476 struct ib_mad_qp_info *qp_info;
2459 struct ib_mad_queue *send_queue; 2477 struct ib_mad_queue *send_queue;
2460 struct ib_send_wr *bad_send_wr;
2461 struct ib_mad_send_wc mad_send_wc; 2478 struct ib_mad_send_wc mad_send_wc;
2462 unsigned long flags; 2479 unsigned long flags;
2463 int ret; 2480 int ret;
@@ -2507,7 +2524,7 @@ retry:
2507 2524
2508 if (queued_send_wr) { 2525 if (queued_send_wr) {
2509 ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr, 2526 ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr,
2510 &bad_send_wr); 2527 NULL);
2511 if (ret) { 2528 if (ret) {
2512 dev_err(&port_priv->device->dev, 2529 dev_err(&port_priv->device->dev,
2513 "ib_post_send failed: %d\n", ret); 2530 "ib_post_send failed: %d\n", ret);
@@ -2552,11 +2569,9 @@ static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
2552 if (wc->status == IB_WC_WR_FLUSH_ERR) { 2569 if (wc->status == IB_WC_WR_FLUSH_ERR) {
2553 if (mad_send_wr->retry) { 2570 if (mad_send_wr->retry) {
2554 /* Repost send */ 2571 /* Repost send */
2555 struct ib_send_wr *bad_send_wr;
2556
2557 mad_send_wr->retry = 0; 2572 mad_send_wr->retry = 0;
2558 ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, 2573 ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
2559 &bad_send_wr); 2574 NULL);
2560 if (!ret) 2575 if (!ret)
2561 return false; 2576 return false;
2562 } 2577 }
@@ -2872,7 +2887,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
2872 int post, ret; 2887 int post, ret;
2873 struct ib_mad_private *mad_priv; 2888 struct ib_mad_private *mad_priv;
2874 struct ib_sge sg_list; 2889 struct ib_sge sg_list;
2875 struct ib_recv_wr recv_wr, *bad_recv_wr; 2890 struct ib_recv_wr recv_wr;
2876 struct ib_mad_queue *recv_queue = &qp_info->recv_queue; 2891 struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
2877 2892
2878 /* Initialize common scatter list fields */ 2893 /* Initialize common scatter list fields */
@@ -2916,7 +2931,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
2916 post = (++recv_queue->count < recv_queue->max_active); 2931 post = (++recv_queue->count < recv_queue->max_active);
2917 list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list); 2932 list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
2918 spin_unlock_irqrestore(&recv_queue->lock, flags); 2933 spin_unlock_irqrestore(&recv_queue->lock, flags);
2919 ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr); 2934 ret = ib_post_recv(qp_info->qp, &recv_wr, NULL);
2920 if (ret) { 2935 if (ret) {
2921 spin_lock_irqsave(&recv_queue->lock, flags); 2936 spin_lock_irqsave(&recv_queue->lock, flags);
2922 list_del(&mad_priv->header.mad_list.list); 2937 list_del(&mad_priv->header.mad_list.list);
@@ -3159,7 +3174,6 @@ static int ib_mad_port_open(struct ib_device *device,
3159 port_priv->device = device; 3174 port_priv->device = device;
3160 port_priv->port_num = port_num; 3175 port_priv->port_num = port_num;
3161 spin_lock_init(&port_priv->reg_lock); 3176 spin_lock_init(&port_priv->reg_lock);
3162 INIT_LIST_HEAD(&port_priv->agent_list);
3163 init_mad_qp(port_priv, &port_priv->qp_info[0]); 3177 init_mad_qp(port_priv, &port_priv->qp_info[0]);
3164 init_mad_qp(port_priv, &port_priv->qp_info[1]); 3178 init_mad_qp(port_priv, &port_priv->qp_info[1]);
3165 3179
@@ -3338,6 +3352,9 @@ int ib_mad_init(void)
3338 3352
3339 INIT_LIST_HEAD(&ib_mad_port_list); 3353 INIT_LIST_HEAD(&ib_mad_port_list);
3340 3354
3355 /* Client ID 0 is used for snoop-only clients */
3356 idr_alloc(&ib_mad_clients, NULL, 0, 0, GFP_KERNEL);
3357
3341 if (ib_register_client(&mad_client)) { 3358 if (ib_register_client(&mad_client)) {
3342 pr_err("Couldn't register ib_mad client\n"); 3359 pr_err("Couldn't register ib_mad client\n");
3343 return -EINVAL; 3360 return -EINVAL;
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 28669f6419e1..d84ae1671898 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -89,7 +89,6 @@ struct ib_rmpp_segment {
89}; 89};
90 90
91struct ib_mad_agent_private { 91struct ib_mad_agent_private {
92 struct list_head agent_list;
93 struct ib_mad_agent agent; 92 struct ib_mad_agent agent;
94 struct ib_mad_reg_req *reg_req; 93 struct ib_mad_reg_req *reg_req;
95 struct ib_mad_qp_info *qp_info; 94 struct ib_mad_qp_info *qp_info;
@@ -105,7 +104,10 @@ struct ib_mad_agent_private {
105 struct list_head rmpp_list; 104 struct list_head rmpp_list;
106 105
107 atomic_t refcount; 106 atomic_t refcount;
108 struct completion comp; 107 union {
108 struct completion comp;
109 struct rcu_head rcu;
110 };
109}; 111};
110 112
111struct ib_mad_snoop_private { 113struct ib_mad_snoop_private {
@@ -203,7 +205,6 @@ struct ib_mad_port_private {
203 205
204 spinlock_t reg_lock; 206 spinlock_t reg_lock;
205 struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION]; 207 struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
206 struct list_head agent_list;
207 struct workqueue_struct *wq; 208 struct workqueue_struct *wq;
208 struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE]; 209 struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
209}; 210};
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 6c48f4193dda..d50ff70bb24b 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -716,14 +716,28 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
716} 716}
717EXPORT_SYMBOL(ib_sa_get_mcmember_rec); 717EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
718 718
719/**
720 * ib_init_ah_from_mcmember - Initialize AH attribute from multicast
721 * member record and gid of the device.
722 * @device: RDMA device
723 * @port_num: Port of the rdma device to consider
724 * @ndev: Optional netdevice, applicable only for RoCE
725 * @gid_type: GID type to consider
726 * @ah_attr: AH attribute to fillup on successful completion
727 *
728 * ib_init_ah_from_mcmember() initializes AH attribute based on multicast
729 * member record and other device properties. On success the caller is
730 * responsible to call rdma_destroy_ah_attr on the ah_attr. Returns 0 on
731 * success or appropriate error code.
732 *
733 */
719int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, 734int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
720 struct ib_sa_mcmember_rec *rec, 735 struct ib_sa_mcmember_rec *rec,
721 struct net_device *ndev, 736 struct net_device *ndev,
722 enum ib_gid_type gid_type, 737 enum ib_gid_type gid_type,
723 struct rdma_ah_attr *ah_attr) 738 struct rdma_ah_attr *ah_attr)
724{ 739{
725 int ret; 740 const struct ib_gid_attr *sgid_attr;
726 u16 gid_index;
727 741
728 /* GID table is not based on the netdevice for IB link layer, 742 /* GID table is not based on the netdevice for IB link layer,
729 * so ignore ndev during search. 743 * so ignore ndev during search.
@@ -733,26 +747,22 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
733 else if (!rdma_protocol_roce(device, port_num)) 747 else if (!rdma_protocol_roce(device, port_num))
734 return -EINVAL; 748 return -EINVAL;
735 749
736 ret = ib_find_cached_gid_by_port(device, &rec->port_gid, 750 sgid_attr = rdma_find_gid_by_port(device, &rec->port_gid,
737 gid_type, port_num, 751 gid_type, port_num, ndev);
738 ndev, 752 if (IS_ERR(sgid_attr))
739 &gid_index); 753 return PTR_ERR(sgid_attr);
740 if (ret)
741 return ret;
742 754
743 memset(ah_attr, 0, sizeof *ah_attr); 755 memset(ah_attr, 0, sizeof(*ah_attr));
744 ah_attr->type = rdma_ah_find_type(device, port_num); 756 ah_attr->type = rdma_ah_find_type(device, port_num);
745 757
746 rdma_ah_set_dlid(ah_attr, be16_to_cpu(rec->mlid)); 758 rdma_ah_set_dlid(ah_attr, be16_to_cpu(rec->mlid));
747 rdma_ah_set_sl(ah_attr, rec->sl); 759 rdma_ah_set_sl(ah_attr, rec->sl);
748 rdma_ah_set_port_num(ah_attr, port_num); 760 rdma_ah_set_port_num(ah_attr, port_num);
749 rdma_ah_set_static_rate(ah_attr, rec->rate); 761 rdma_ah_set_static_rate(ah_attr, rec->rate);
750 762 rdma_move_grh_sgid_attr(ah_attr, &rec->mgid,
751 rdma_ah_set_grh(ah_attr, &rec->mgid, 763 be32_to_cpu(rec->flow_label),
752 be32_to_cpu(rec->flow_label), 764 rec->hop_limit, rec->traffic_class,
753 (u8)gid_index, 765 sgid_attr);
754 rec->hop_limit,
755 rec->traffic_class);
756 return 0; 766 return 0;
757} 767}
758EXPORT_SYMBOL(ib_init_ah_from_mcmember); 768EXPORT_SYMBOL(ib_init_ah_from_mcmember);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 340c7bea45ab..0385ab438320 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -237,15 +237,15 @@ static int fill_port_info(struct sk_buff *msg,
237 if (ret) 237 if (ret)
238 return ret; 238 return ret;
239 239
240 BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
241 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
242 (u64)attr.port_cap_flags, RDMA_NLDEV_ATTR_PAD))
243 return -EMSGSIZE;
244 if (rdma_protocol_ib(device, port) &&
245 nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
246 attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
247 return -EMSGSIZE;
248 if (rdma_protocol_ib(device, port)) { 240 if (rdma_protocol_ib(device, port)) {
241 BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
242 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
243 (u64)attr.port_cap_flags,
244 RDMA_NLDEV_ATTR_PAD))
245 return -EMSGSIZE;
246 if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
247 attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
248 return -EMSGSIZE;
249 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid)) 249 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
250 return -EMSGSIZE; 250 return -EMSGSIZE;
251 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid)) 251 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 475910ffbcb6..6eb64c6f0802 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -32,6 +32,7 @@
32 32
33#include <linux/file.h> 33#include <linux/file.h>
34#include <linux/anon_inodes.h> 34#include <linux/anon_inodes.h>
35#include <linux/sched/mm.h>
35#include <rdma/ib_verbs.h> 36#include <rdma/ib_verbs.h>
36#include <rdma/uverbs_types.h> 37#include <rdma/uverbs_types.h>
37#include <linux/rcupdate.h> 38#include <linux/rcupdate.h>
@@ -41,51 +42,6 @@
41#include "core_priv.h" 42#include "core_priv.h"
42#include "rdma_core.h" 43#include "rdma_core.h"
43 44
44int uverbs_ns_idx(u16 *id, unsigned int ns_count)
45{
46 int ret = (*id & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT;
47
48 if (ret >= ns_count)
49 return -EINVAL;
50
51 *id &= ~UVERBS_ID_NS_MASK;
52 return ret;
53}
54
55const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev,
56 uint16_t object)
57{
58 const struct uverbs_root_spec *object_hash = ibdev->specs_root;
59 const struct uverbs_object_spec_hash *objects;
60 int ret = uverbs_ns_idx(&object, object_hash->num_buckets);
61
62 if (ret < 0)
63 return NULL;
64
65 objects = object_hash->object_buckets[ret];
66
67 if (object >= objects->num_objects)
68 return NULL;
69
70 return objects->objects[object];
71}
72
73const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object,
74 uint16_t method)
75{
76 const struct uverbs_method_spec_hash *methods;
77 int ret = uverbs_ns_idx(&method, object->num_buckets);
78
79 if (ret < 0)
80 return NULL;
81
82 methods = object->method_buckets[ret];
83 if (method >= methods->num_methods)
84 return NULL;
85
86 return methods->methods[method];
87}
88
89void uverbs_uobject_get(struct ib_uobject *uobject) 45void uverbs_uobject_get(struct ib_uobject *uobject)
90{ 46{
91 kref_get(&uobject->ref); 47 kref_get(&uobject->ref);
@@ -96,7 +52,7 @@ static void uverbs_uobject_free(struct kref *ref)
96 struct ib_uobject *uobj = 52 struct ib_uobject *uobj =
97 container_of(ref, struct ib_uobject, ref); 53 container_of(ref, struct ib_uobject, ref);
98 54
99 if (uobj->type->type_class->needs_kfree_rcu) 55 if (uobj->uapi_object->type_class->needs_kfree_rcu)
100 kfree_rcu(uobj, rcu); 56 kfree_rcu(uobj, rcu);
101 else 57 else
102 kfree(uobj); 58 kfree(uobj);
@@ -107,7 +63,8 @@ void uverbs_uobject_put(struct ib_uobject *uobject)
107 kref_put(&uobject->ref, uverbs_uobject_free); 63 kref_put(&uobject->ref, uverbs_uobject_free);
108} 64}
109 65
110static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive) 66static int uverbs_try_lock_object(struct ib_uobject *uobj,
67 enum rdma_lookup_mode mode)
111{ 68{
112 /* 69 /*
113 * When a shared access is required, we use a positive counter. Each 70 * When a shared access is required, we use a positive counter. Each
@@ -120,27 +77,211 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
120 * concurrently, setting the counter to zero is enough for releasing 77 * concurrently, setting the counter to zero is enough for releasing
121 * this lock. 78 * this lock.
122 */ 79 */
123 if (!exclusive) 80 switch (mode) {
81 case UVERBS_LOOKUP_READ:
124 return atomic_fetch_add_unless(&uobj->usecnt, 1, -1) == -1 ? 82 return atomic_fetch_add_unless(&uobj->usecnt, 1, -1) == -1 ?
125 -EBUSY : 0; 83 -EBUSY : 0;
84 case UVERBS_LOOKUP_WRITE:
85 /* lock is exclusive */
86 return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
87 case UVERBS_LOOKUP_DESTROY:
88 return 0;
89 }
90 return 0;
91}
92
93static void assert_uverbs_usecnt(struct ib_uobject *uobj,
94 enum rdma_lookup_mode mode)
95{
96#ifdef CONFIG_LOCKDEP
97 switch (mode) {
98 case UVERBS_LOOKUP_READ:
99 WARN_ON(atomic_read(&uobj->usecnt) <= 0);
100 break;
101 case UVERBS_LOOKUP_WRITE:
102 WARN_ON(atomic_read(&uobj->usecnt) != -1);
103 break;
104 case UVERBS_LOOKUP_DESTROY:
105 break;
106 }
107#endif
108}
109
110/*
111 * This must be called with the hw_destroy_rwsem locked for read or write,
112 * also the uobject itself must be locked for write.
113 *
114 * Upon return the HW object is guaranteed to be destroyed.
115 *
116 * For RDMA_REMOVE_ABORT, the hw_destroy_rwsem is not required to be held,
117 * however the type's allocat_commit function cannot have been called and the
118 * uobject cannot be on the uobjects_lists
119 *
120 * For RDMA_REMOVE_DESTROY the caller shold be holding a kref (eg via
121 * rdma_lookup_get_uobject) and the object is left in a state where the caller
122 * needs to call rdma_lookup_put_uobject.
123 *
124 * For all other destroy modes this function internally unlocks the uobject
125 * and consumes the kref on the uobj.
126 */
127static int uverbs_destroy_uobject(struct ib_uobject *uobj,
128 enum rdma_remove_reason reason)
129{
130 struct ib_uverbs_file *ufile = uobj->ufile;
131 unsigned long flags;
132 int ret;
133
134 lockdep_assert_held(&ufile->hw_destroy_rwsem);
135 assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
136
137 if (uobj->object) {
138 ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason);
139 if (ret) {
140 if (ib_is_destroy_retryable(ret, reason, uobj))
141 return ret;
142
143 /* Nothing to be done, dangle the memory and move on */
144 WARN(true,
145 "ib_uverbs: failed to remove uobject id %d, driver err=%d",
146 uobj->id, ret);
147 }
148
149 uobj->object = NULL;
150 }
126 151
127 /* lock is either WRITE or DESTROY - should be exclusive */ 152 if (reason == RDMA_REMOVE_ABORT) {
128 return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY; 153 WARN_ON(!list_empty(&uobj->list));
154 WARN_ON(!uobj->context);
155 uobj->uapi_object->type_class->alloc_abort(uobj);
156 }
157
158 uobj->context = NULL;
159
160 /*
161 * For DESTROY the usecnt is held write locked, the caller is expected
162 * to put it unlock and put the object when done with it. Only DESTROY
163 * can remove the IDR handle.
164 */
165 if (reason != RDMA_REMOVE_DESTROY)
166 atomic_set(&uobj->usecnt, 0);
167 else
168 uobj->uapi_object->type_class->remove_handle(uobj);
169
170 if (!list_empty(&uobj->list)) {
171 spin_lock_irqsave(&ufile->uobjects_lock, flags);
172 list_del_init(&uobj->list);
173 spin_unlock_irqrestore(&ufile->uobjects_lock, flags);
174
175 /*
176 * Pairs with the get in rdma_alloc_commit_uobject(), could
177 * destroy uobj.
178 */
179 uverbs_uobject_put(uobj);
180 }
181
182 /*
183 * When aborting the stack kref remains owned by the core code, and is
184 * not transferred into the type. Pairs with the get in alloc_uobj
185 */
186 if (reason == RDMA_REMOVE_ABORT)
187 uverbs_uobject_put(uobj);
188
189 return 0;
129} 190}
130 191
131static struct ib_uobject *alloc_uobj(struct ib_ucontext *context, 192/*
132 const struct uverbs_obj_type *type) 193 * This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY
194 * sequence. It should only be used from command callbacks. On success the
195 * caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This
196 * version requires the caller to have already obtained an
197 * LOOKUP_DESTROY uobject kref.
198 */
199int uobj_destroy(struct ib_uobject *uobj)
133{ 200{
134 struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL); 201 struct ib_uverbs_file *ufile = uobj->ufile;
202 int ret;
203
204 down_read(&ufile->hw_destroy_rwsem);
205
206 ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE);
207 if (ret)
208 goto out_unlock;
209
210 ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY);
211 if (ret) {
212 atomic_set(&uobj->usecnt, 0);
213 goto out_unlock;
214 }
135 215
216out_unlock:
217 up_read(&ufile->hw_destroy_rwsem);
218 return ret;
219}
220
221/*
222 * uobj_get_destroy destroys the HW object and returns a handle to the uobj
223 * with a NULL object pointer. The caller must pair this with
224 * uverbs_put_destroy.
225 */
226struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
227 u32 id, struct ib_uverbs_file *ufile)
228{
229 struct ib_uobject *uobj;
230 int ret;
231
232 uobj = rdma_lookup_get_uobject(obj, ufile, id, UVERBS_LOOKUP_DESTROY);
233 if (IS_ERR(uobj))
234 return uobj;
235
236 ret = uobj_destroy(uobj);
237 if (ret) {
238 rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
239 return ERR_PTR(ret);
240 }
241
242 return uobj;
243}
244
245/*
246 * Does both uobj_get_destroy() and uobj_put_destroy(). Returns success_res
247 * on success (negative errno on failure). For use by callers that do not need
248 * the uobj.
249 */
250int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
251 struct ib_uverbs_file *ufile, int success_res)
252{
253 struct ib_uobject *uobj;
254
255 uobj = __uobj_get_destroy(obj, id, ufile);
256 if (IS_ERR(uobj))
257 return PTR_ERR(uobj);
258
259 rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
260 return success_res;
261}
262
263/* alloc_uobj must be undone by uverbs_destroy_uobject() */
264static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
265 const struct uverbs_api_object *obj)
266{
267 struct ib_uobject *uobj;
268 struct ib_ucontext *ucontext;
269
270 ucontext = ib_uverbs_get_ucontext(ufile);
271 if (IS_ERR(ucontext))
272 return ERR_CAST(ucontext);
273
274 uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL);
136 if (!uobj) 275 if (!uobj)
137 return ERR_PTR(-ENOMEM); 276 return ERR_PTR(-ENOMEM);
138 /* 277 /*
139 * user_handle should be filled by the handler, 278 * user_handle should be filled by the handler,
140 * The object is added to the list in the commit stage. 279 * The object is added to the list in the commit stage.
141 */ 280 */
142 uobj->context = context; 281 uobj->ufile = ufile;
143 uobj->type = type; 282 uobj->context = ucontext;
283 INIT_LIST_HEAD(&uobj->list);
284 uobj->uapi_object = obj;
144 /* 285 /*
145 * Allocated objects start out as write locked to deny any other 286 * Allocated objects start out as write locked to deny any other
146 * syscalls from accessing them until they are committed. See 287 * syscalls from accessing them until they are committed. See
@@ -157,45 +298,39 @@ static int idr_add_uobj(struct ib_uobject *uobj)
157 int ret; 298 int ret;
158 299
159 idr_preload(GFP_KERNEL); 300 idr_preload(GFP_KERNEL);
160 spin_lock(&uobj->context->ufile->idr_lock); 301 spin_lock(&uobj->ufile->idr_lock);
161 302
162 /* 303 /*
163 * We start with allocating an idr pointing to NULL. This represents an 304 * We start with allocating an idr pointing to NULL. This represents an
164 * object which isn't initialized yet. We'll replace it later on with 305 * object which isn't initialized yet. We'll replace it later on with
165 * the real object once we commit. 306 * the real object once we commit.
166 */ 307 */
167 ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0, 308 ret = idr_alloc(&uobj->ufile->idr, NULL, 0,
168 min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT); 309 min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT);
169 if (ret >= 0) 310 if (ret >= 0)
170 uobj->id = ret; 311 uobj->id = ret;
171 312
172 spin_unlock(&uobj->context->ufile->idr_lock); 313 spin_unlock(&uobj->ufile->idr_lock);
173 idr_preload_end(); 314 idr_preload_end();
174 315
175 return ret < 0 ? ret : 0; 316 return ret < 0 ? ret : 0;
176} 317}
177 318
178/*
179 * It only removes it from the uobjects list, uverbs_uobject_put() is still
180 * required.
181 */
182static void uverbs_idr_remove_uobj(struct ib_uobject *uobj)
183{
184 spin_lock(&uobj->context->ufile->idr_lock);
185 idr_remove(&uobj->context->ufile->idr, uobj->id);
186 spin_unlock(&uobj->context->ufile->idr_lock);
187}
188
189/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */ 319/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
190static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type, 320static struct ib_uobject *
191 struct ib_ucontext *ucontext, 321lookup_get_idr_uobject(const struct uverbs_api_object *obj,
192 int id, bool exclusive) 322 struct ib_uverbs_file *ufile, s64 id,
323 enum rdma_lookup_mode mode)
193{ 324{
194 struct ib_uobject *uobj; 325 struct ib_uobject *uobj;
326 unsigned long idrno = id;
327
328 if (id < 0 || id > ULONG_MAX)
329 return ERR_PTR(-EINVAL);
195 330
196 rcu_read_lock(); 331 rcu_read_lock();
197 /* object won't be released as we're protected in rcu */ 332 /* object won't be released as we're protected in rcu */
198 uobj = idr_find(&ucontext->ufile->idr, id); 333 uobj = idr_find(&ufile->idr, idrno);
199 if (!uobj) { 334 if (!uobj) {
200 uobj = ERR_PTR(-ENOENT); 335 uobj = ERR_PTR(-ENOENT);
201 goto free; 336 goto free;
@@ -215,19 +350,28 @@ free:
215 return uobj; 350 return uobj;
216} 351}
217 352
218static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type, 353static struct ib_uobject *
219 struct ib_ucontext *ucontext, 354lookup_get_fd_uobject(const struct uverbs_api_object *obj,
220 int id, bool exclusive) 355 struct ib_uverbs_file *ufile, s64 id,
356 enum rdma_lookup_mode mode)
221{ 357{
358 const struct uverbs_obj_fd_type *fd_type;
222 struct file *f; 359 struct file *f;
223 struct ib_uobject *uobject; 360 struct ib_uobject *uobject;
224 const struct uverbs_obj_fd_type *fd_type = 361 int fdno = id;
225 container_of(type, struct uverbs_obj_fd_type, type);
226 362
227 if (exclusive) 363 if (fdno != id)
364 return ERR_PTR(-EINVAL);
365
366 if (mode != UVERBS_LOOKUP_READ)
228 return ERR_PTR(-EOPNOTSUPP); 367 return ERR_PTR(-EOPNOTSUPP);
229 368
230 f = fget(id); 369 if (!obj->type_attrs)
370 return ERR_PTR(-EIO);
371 fd_type =
372 container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
373
374 f = fget(fdno);
231 if (!f) 375 if (!f)
232 return ERR_PTR(-EBADF); 376 return ERR_PTR(-EBADF);
233 377
@@ -246,43 +390,55 @@ static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *ty
246 return uobject; 390 return uobject;
247} 391}
248 392
249struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, 393struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
250 struct ib_ucontext *ucontext, 394 struct ib_uverbs_file *ufile, s64 id,
251 int id, bool exclusive) 395 enum rdma_lookup_mode mode)
252{ 396{
253 struct ib_uobject *uobj; 397 struct ib_uobject *uobj;
254 int ret; 398 int ret;
255 399
256 uobj = type->type_class->lookup_get(type, ucontext, id, exclusive); 400 if (!obj)
401 return ERR_PTR(-EINVAL);
402
403 uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
257 if (IS_ERR(uobj)) 404 if (IS_ERR(uobj))
258 return uobj; 405 return uobj;
259 406
260 if (uobj->type != type) { 407 if (uobj->uapi_object != obj) {
261 ret = -EINVAL; 408 ret = -EINVAL;
262 goto free; 409 goto free;
263 } 410 }
264 411
265 ret = uverbs_try_lock_object(uobj, exclusive); 412 /*
266 if (ret) { 413 * If we have been disassociated block every command except for
267 WARN(ucontext->cleanup_reason, 414 * DESTROY based commands.
268 "ib_uverbs: Trying to lookup_get while cleanup context\n"); 415 */
416 if (mode != UVERBS_LOOKUP_DESTROY &&
417 !srcu_dereference(ufile->device->ib_dev,
418 &ufile->device->disassociate_srcu)) {
419 ret = -EIO;
269 goto free; 420 goto free;
270 } 421 }
271 422
423 ret = uverbs_try_lock_object(uobj, mode);
424 if (ret)
425 goto free;
426
272 return uobj; 427 return uobj;
273free: 428free:
274 uobj->type->type_class->lookup_put(uobj, exclusive); 429 obj->type_class->lookup_put(uobj, mode);
275 uverbs_uobject_put(uobj); 430 uverbs_uobject_put(uobj);
276 return ERR_PTR(ret); 431 return ERR_PTR(ret);
277} 432}
278 433
279static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type, 434static struct ib_uobject *
280 struct ib_ucontext *ucontext) 435alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
436 struct ib_uverbs_file *ufile)
281{ 437{
282 int ret; 438 int ret;
283 struct ib_uobject *uobj; 439 struct ib_uobject *uobj;
284 440
285 uobj = alloc_uobj(ucontext, type); 441 uobj = alloc_uobj(ufile, obj);
286 if (IS_ERR(uobj)) 442 if (IS_ERR(uobj))
287 return uobj; 443 return uobj;
288 444
@@ -290,7 +446,7 @@ static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *
290 if (ret) 446 if (ret)
291 goto uobj_put; 447 goto uobj_put;
292 448
293 ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device, 449 ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device,
294 RDMACG_RESOURCE_HCA_OBJECT); 450 RDMACG_RESOURCE_HCA_OBJECT);
295 if (ret) 451 if (ret)
296 goto idr_remove; 452 goto idr_remove;
@@ -298,304 +454,305 @@ static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *
298 return uobj; 454 return uobj;
299 455
300idr_remove: 456idr_remove:
301 uverbs_idr_remove_uobj(uobj); 457 spin_lock(&ufile->idr_lock);
458 idr_remove(&ufile->idr, uobj->id);
459 spin_unlock(&ufile->idr_lock);
302uobj_put: 460uobj_put:
303 uverbs_uobject_put(uobj); 461 uverbs_uobject_put(uobj);
304 return ERR_PTR(ret); 462 return ERR_PTR(ret);
305} 463}
306 464
307static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type, 465static struct ib_uobject *
308 struct ib_ucontext *ucontext) 466alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
467 struct ib_uverbs_file *ufile)
309{ 468{
310 const struct uverbs_obj_fd_type *fd_type =
311 container_of(type, struct uverbs_obj_fd_type, type);
312 int new_fd; 469 int new_fd;
313 struct ib_uobject *uobj; 470 struct ib_uobject *uobj;
314 struct ib_uobject_file *uobj_file;
315 struct file *filp;
316 471
317 new_fd = get_unused_fd_flags(O_CLOEXEC); 472 new_fd = get_unused_fd_flags(O_CLOEXEC);
318 if (new_fd < 0) 473 if (new_fd < 0)
319 return ERR_PTR(new_fd); 474 return ERR_PTR(new_fd);
320 475
321 uobj = alloc_uobj(ucontext, type); 476 uobj = alloc_uobj(ufile, obj);
322 if (IS_ERR(uobj)) { 477 if (IS_ERR(uobj)) {
323 put_unused_fd(new_fd); 478 put_unused_fd(new_fd);
324 return uobj; 479 return uobj;
325 } 480 }
326 481
327 uobj_file = container_of(uobj, struct ib_uobject_file, uobj); 482 uobj->id = new_fd;
328 filp = anon_inode_getfile(fd_type->name, 483 uobj->ufile = ufile;
329 fd_type->fops,
330 uobj_file,
331 fd_type->flags);
332 if (IS_ERR(filp)) {
333 put_unused_fd(new_fd);
334 uverbs_uobject_put(uobj);
335 return (void *)filp;
336 }
337
338 uobj_file->uobj.id = new_fd;
339 uobj_file->uobj.object = filp;
340 uobj_file->ufile = ucontext->ufile;
341 INIT_LIST_HEAD(&uobj->list);
342 kref_get(&uobj_file->ufile->ref);
343 484
344 return uobj; 485 return uobj;
345} 486}
346 487
347struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, 488struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
348 struct ib_ucontext *ucontext) 489 struct ib_uverbs_file *ufile)
349{ 490{
350 return type->type_class->alloc_begin(type, ucontext); 491 struct ib_uobject *ret;
351}
352 492
353static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj, 493 if (!obj)
354 enum rdma_remove_reason why) 494 return ERR_PTR(-EINVAL);
355{
356 const struct uverbs_obj_idr_type *idr_type =
357 container_of(uobj->type, struct uverbs_obj_idr_type,
358 type);
359 int ret = idr_type->destroy_object(uobj, why);
360 495
361 /* 496 /*
362 * We can only fail gracefully if the user requested to destroy the 497 * The hw_destroy_rwsem is held across the entire object creation and
363 * object. In the rest of the cases, just remove whatever you can. 498 * released during rdma_alloc_commit_uobject or
499 * rdma_alloc_abort_uobject
364 */ 500 */
365 if (why == RDMA_REMOVE_DESTROY && ret) 501 if (!down_read_trylock(&ufile->hw_destroy_rwsem))
366 return ret; 502 return ERR_PTR(-EIO);
367
368 ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
369 RDMACG_RESOURCE_HCA_OBJECT);
370 uverbs_idr_remove_uobj(uobj);
371 503
504 ret = obj->type_class->alloc_begin(obj, ufile);
505 if (IS_ERR(ret)) {
506 up_read(&ufile->hw_destroy_rwsem);
507 return ret;
508 }
372 return ret; 509 return ret;
373} 510}
374 511
375static void alloc_abort_fd_uobject(struct ib_uobject *uobj) 512static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
376{ 513{
377 struct ib_uobject_file *uobj_file = 514 ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
378 container_of(uobj, struct ib_uobject_file, uobj); 515 RDMACG_RESOURCE_HCA_OBJECT);
379 struct file *filp = uobj->object;
380 int id = uobj_file->uobj.id;
381 516
382 /* Unsuccessful NEW */ 517 spin_lock(&uobj->ufile->idr_lock);
383 fput(filp); 518 idr_remove(&uobj->ufile->idr, uobj->id);
384 put_unused_fd(id); 519 spin_unlock(&uobj->ufile->idr_lock);
385} 520}
386 521
387static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj, 522static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
388 enum rdma_remove_reason why) 523 enum rdma_remove_reason why)
389{ 524{
390 const struct uverbs_obj_fd_type *fd_type = 525 const struct uverbs_obj_idr_type *idr_type =
391 container_of(uobj->type, struct uverbs_obj_fd_type, type); 526 container_of(uobj->uapi_object->type_attrs,
392 struct ib_uobject_file *uobj_file = 527 struct uverbs_obj_idr_type, type);
393 container_of(uobj, struct ib_uobject_file, uobj); 528 int ret = idr_type->destroy_object(uobj, why);
394 int ret = fd_type->context_closed(uobj_file, why);
395 529
396 if (why == RDMA_REMOVE_DESTROY && ret) 530 /*
531 * We can only fail gracefully if the user requested to destroy the
532 * object or when a retry may be called upon an error.
533 * In the rest of the cases, just remove whatever you can.
534 */
535 if (ib_is_destroy_retryable(ret, why, uobj))
397 return ret; 536 return ret;
398 537
399 if (why == RDMA_REMOVE_DURING_CLEANUP) { 538 if (why == RDMA_REMOVE_ABORT)
400 alloc_abort_fd_uobject(uobj); 539 return 0;
401 return ret;
402 }
403 540
404 uobj_file->uobj.context = NULL; 541 ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
405 return ret; 542 RDMACG_RESOURCE_HCA_OBJECT);
543
544 return 0;
406} 545}
407 546
408static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive) 547static void remove_handle_idr_uobject(struct ib_uobject *uobj)
409{ 548{
410#ifdef CONFIG_LOCKDEP 549 spin_lock(&uobj->ufile->idr_lock);
411 if (exclusive) 550 idr_remove(&uobj->ufile->idr, uobj->id);
412 WARN_ON(atomic_read(&uobj->usecnt) != -1); 551 spin_unlock(&uobj->ufile->idr_lock);
413 else 552 /* Matches the kref in alloc_commit_idr_uobject */
414 WARN_ON(atomic_read(&uobj->usecnt) <= 0); 553 uverbs_uobject_put(uobj);
415#endif
416} 554}
417 555
418static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj, 556static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
419 enum rdma_remove_reason why)
420{ 557{
421 int ret; 558 put_unused_fd(uobj->id);
422 struct ib_ucontext *ucontext = uobj->context;
423
424 ret = uobj->type->type_class->remove_commit(uobj, why);
425 if (ret && why == RDMA_REMOVE_DESTROY) {
426 /* We couldn't remove the object, so just unlock the uobject */
427 atomic_set(&uobj->usecnt, 0);
428 uobj->type->type_class->lookup_put(uobj, true);
429 } else {
430 mutex_lock(&ucontext->uobjects_lock);
431 list_del(&uobj->list);
432 mutex_unlock(&ucontext->uobjects_lock);
433 /* put the ref we took when we created the object */
434 uverbs_uobject_put(uobj);
435 }
436
437 return ret;
438} 559}
439 560
440/* This is called only for user requested DESTROY reasons */ 561static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj,
441int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj) 562 enum rdma_remove_reason why)
442{ 563{
443 int ret; 564 const struct uverbs_obj_fd_type *fd_type = container_of(
444 struct ib_ucontext *ucontext = uobj->context; 565 uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
445 566 int ret = fd_type->context_closed(uobj, why);
446 /* put the ref count we took at lookup_get */
447 uverbs_uobject_put(uobj);
448 /* Cleanup is running. Calling this should have been impossible */
449 if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
450 WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
451 return 0;
452 }
453 assert_uverbs_usecnt(uobj, true);
454 ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY);
455 567
456 up_read(&ucontext->cleanup_rwsem); 568 if (ib_is_destroy_retryable(ret, why, uobj))
457 return ret; 569 return ret;
458}
459 570
460static int null_obj_type_class_remove_commit(struct ib_uobject *uobj,
461 enum rdma_remove_reason why)
462{
463 return 0; 571 return 0;
464} 572}
465 573
466static const struct uverbs_obj_type null_obj_type = { 574static void remove_handle_fd_uobject(struct ib_uobject *uobj)
467 .type_class = &((const struct uverbs_obj_type_class){
468 .remove_commit = null_obj_type_class_remove_commit,
469 /* be cautious */
470 .needs_kfree_rcu = true}),
471};
472
473int rdma_explicit_destroy(struct ib_uobject *uobject)
474{ 575{
475 int ret;
476 struct ib_ucontext *ucontext = uobject->context;
477
478 /* Cleanup is running. Calling this should have been impossible */
479 if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
480 WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
481 return 0;
482 }
483 assert_uverbs_usecnt(uobject, true);
484 ret = uobject->type->type_class->remove_commit(uobject,
485 RDMA_REMOVE_DESTROY);
486 if (ret)
487 goto out;
488
489 uobject->type = &null_obj_type;
490
491out:
492 up_read(&ucontext->cleanup_rwsem);
493 return ret;
494} 576}
495 577
496static void alloc_commit_idr_uobject(struct ib_uobject *uobj) 578static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
497{ 579{
498 spin_lock(&uobj->context->ufile->idr_lock); 580 struct ib_uverbs_file *ufile = uobj->ufile;
581
582 spin_lock(&ufile->idr_lock);
499 /* 583 /*
500 * We already allocated this IDR with a NULL object, so 584 * We already allocated this IDR with a NULL object, so
501 * this shouldn't fail. 585 * this shouldn't fail.
586 *
587 * NOTE: Once we set the IDR we loose ownership of our kref on uobj.
588 * It will be put by remove_commit_idr_uobject()
502 */ 589 */
503 WARN_ON(idr_replace(&uobj->context->ufile->idr, 590 WARN_ON(idr_replace(&ufile->idr, uobj, uobj->id));
504 uobj, uobj->id)); 591 spin_unlock(&ufile->idr_lock);
505 spin_unlock(&uobj->context->ufile->idr_lock); 592
593 return 0;
506} 594}
507 595
508static void alloc_commit_fd_uobject(struct ib_uobject *uobj) 596static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
509{ 597{
510 struct ib_uobject_file *uobj_file = 598 const struct uverbs_obj_fd_type *fd_type = container_of(
511 container_of(uobj, struct ib_uobject_file, uobj); 599 uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
600 int fd = uobj->id;
601 struct file *filp;
602
603 /*
604 * The kref for uobj is moved into filp->private data and put in
605 * uverbs_close_fd(). Once alloc_commit() succeeds uverbs_close_fd()
606 * must be guaranteed to be called from the provided fops release
607 * callback.
608 */
609 filp = anon_inode_getfile(fd_type->name,
610 fd_type->fops,
611 uobj,
612 fd_type->flags);
613 if (IS_ERR(filp))
614 return PTR_ERR(filp);
615
616 uobj->object = filp;
617
618 /* Matching put will be done in uverbs_close_fd() */
619 kref_get(&uobj->ufile->ref);
512 620
513 fd_install(uobj_file->uobj.id, uobj->object);
514 /* This shouldn't be used anymore. Use the file object instead */ 621 /* This shouldn't be used anymore. Use the file object instead */
515 uobj_file->uobj.id = 0; 622 uobj->id = 0;
516 /* Get another reference as we export this to the fops */ 623
517 uverbs_uobject_get(&uobj_file->uobj); 624 /*
625 * NOTE: Once we install the file we loose ownership of our kref on
626 * uobj. It will be put by uverbs_close_fd()
627 */
628 fd_install(fd, filp);
629
630 return 0;
518} 631}
519 632
520int rdma_alloc_commit_uobject(struct ib_uobject *uobj) 633/*
634 * In all cases rdma_alloc_commit_uobject() consumes the kref to uobj and the
635 * caller can no longer assume uobj is valid. If this function fails it
636 * destroys the uboject, including the attached HW object.
637 */
638int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj)
521{ 639{
522 /* Cleanup is running. Calling this should have been impossible */ 640 struct ib_uverbs_file *ufile = uobj->ufile;
523 if (!down_read_trylock(&uobj->context->cleanup_rwsem)) { 641 int ret;
524 int ret;
525 642
526 WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n"); 643 /* alloc_commit consumes the uobj kref */
527 ret = uobj->type->type_class->remove_commit(uobj, 644 ret = uobj->uapi_object->type_class->alloc_commit(uobj);
528 RDMA_REMOVE_DURING_CLEANUP); 645 if (ret) {
529 if (ret) 646 uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT);
530 pr_warn("ib_uverbs: cleanup of idr object %d failed\n", 647 up_read(&ufile->hw_destroy_rwsem);
531 uobj->id);
532 return ret; 648 return ret;
533 } 649 }
534 650
651 /* kref is held so long as the uobj is on the uobj list. */
652 uverbs_uobject_get(uobj);
653 spin_lock_irq(&ufile->uobjects_lock);
654 list_add(&uobj->list, &ufile->uobjects);
655 spin_unlock_irq(&ufile->uobjects_lock);
656
535 /* matches atomic_set(-1) in alloc_uobj */ 657 /* matches atomic_set(-1) in alloc_uobj */
536 assert_uverbs_usecnt(uobj, true);
537 atomic_set(&uobj->usecnt, 0); 658 atomic_set(&uobj->usecnt, 0);
538 659
539 mutex_lock(&uobj->context->uobjects_lock); 660 /* Matches the down_read in rdma_alloc_begin_uobject */
540 list_add(&uobj->list, &uobj->context->uobjects); 661 up_read(&ufile->hw_destroy_rwsem);
541 mutex_unlock(&uobj->context->uobjects_lock);
542
543 uobj->type->type_class->alloc_commit(uobj);
544 up_read(&uobj->context->cleanup_rwsem);
545 662
546 return 0; 663 return 0;
547} 664}
548 665
549static void alloc_abort_idr_uobject(struct ib_uobject *uobj) 666/*
550{ 667 * This consumes the kref for uobj. It is up to the caller to unwind the HW
551 uverbs_idr_remove_uobj(uobj); 668 * object and anything else connected to uobj before calling this.
552 ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device, 669 */
553 RDMACG_RESOURCE_HCA_OBJECT);
554 uverbs_uobject_put(uobj);
555}
556
557void rdma_alloc_abort_uobject(struct ib_uobject *uobj) 670void rdma_alloc_abort_uobject(struct ib_uobject *uobj)
558{ 671{
559 uobj->type->type_class->alloc_abort(uobj); 672 struct ib_uverbs_file *ufile = uobj->ufile;
673
674 uobj->object = NULL;
675 uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT);
676
677 /* Matches the down_read in rdma_alloc_begin_uobject */
678 up_read(&ufile->hw_destroy_rwsem);
560} 679}
561 680
562static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive) 681static void lookup_put_idr_uobject(struct ib_uobject *uobj,
682 enum rdma_lookup_mode mode)
563{ 683{
564} 684}
565 685
566static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive) 686static void lookup_put_fd_uobject(struct ib_uobject *uobj,
687 enum rdma_lookup_mode mode)
567{ 688{
568 struct file *filp = uobj->object; 689 struct file *filp = uobj->object;
569 690
570 WARN_ON(exclusive); 691 WARN_ON(mode != UVERBS_LOOKUP_READ);
571 /* This indirectly calls uverbs_close_fd and free the object */ 692 /* This indirectly calls uverbs_close_fd and free the object */
572 fput(filp); 693 fput(filp);
573} 694}
574 695
575void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive) 696void rdma_lookup_put_uobject(struct ib_uobject *uobj,
697 enum rdma_lookup_mode mode)
576{ 698{
577 assert_uverbs_usecnt(uobj, exclusive); 699 assert_uverbs_usecnt(uobj, mode);
578 uobj->type->type_class->lookup_put(uobj, exclusive); 700 uobj->uapi_object->type_class->lookup_put(uobj, mode);
579 /* 701 /*
580 * In order to unlock an object, either decrease its usecnt for 702 * In order to unlock an object, either decrease its usecnt for
581 * read access or zero it in case of exclusive access. See 703 * read access or zero it in case of exclusive access. See
582 * uverbs_try_lock_object for locking schema information. 704 * uverbs_try_lock_object for locking schema information.
583 */ 705 */
584 if (!exclusive) 706 switch (mode) {
707 case UVERBS_LOOKUP_READ:
585 atomic_dec(&uobj->usecnt); 708 atomic_dec(&uobj->usecnt);
586 else 709 break;
710 case UVERBS_LOOKUP_WRITE:
587 atomic_set(&uobj->usecnt, 0); 711 atomic_set(&uobj->usecnt, 0);
712 break;
713 case UVERBS_LOOKUP_DESTROY:
714 break;
715 }
588 716
717 /* Pairs with the kref obtained by type->lookup_get */
589 uverbs_uobject_put(uobj); 718 uverbs_uobject_put(uobj);
590} 719}
591 720
721void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile)
722{
723 spin_lock_init(&ufile->idr_lock);
724 idr_init(&ufile->idr);
725}
726
727void release_ufile_idr_uobject(struct ib_uverbs_file *ufile)
728{
729 struct ib_uobject *entry;
730 int id;
731
732 /*
733 * At this point uverbs_cleanup_ufile() is guaranteed to have run, and
734 * there are no HW objects left, however the IDR is still populated
735 * with anything that has not been cleaned up by userspace. Since the
736 * kref on ufile is 0, nothing is allowed to call lookup_get.
737 *
738 * This is an optimized equivalent to remove_handle_idr_uobject
739 */
740 idr_for_each_entry(&ufile->idr, entry, id) {
741 WARN_ON(entry->object);
742 uverbs_uobject_put(entry);
743 }
744
745 idr_destroy(&ufile->idr);
746}
747
592const struct uverbs_obj_type_class uverbs_idr_class = { 748const struct uverbs_obj_type_class uverbs_idr_class = {
593 .alloc_begin = alloc_begin_idr_uobject, 749 .alloc_begin = alloc_begin_idr_uobject,
594 .lookup_get = lookup_get_idr_uobject, 750 .lookup_get = lookup_get_idr_uobject,
595 .alloc_commit = alloc_commit_idr_uobject, 751 .alloc_commit = alloc_commit_idr_uobject,
596 .alloc_abort = alloc_abort_idr_uobject, 752 .alloc_abort = alloc_abort_idr_uobject,
597 .lookup_put = lookup_put_idr_uobject, 753 .lookup_put = lookup_put_idr_uobject,
598 .remove_commit = remove_commit_idr_uobject, 754 .destroy_hw = destroy_hw_idr_uobject,
755 .remove_handle = remove_handle_idr_uobject,
599 /* 756 /*
600 * When we destroy an object, we first just lock it for WRITE and 757 * When we destroy an object, we first just lock it for WRITE and
601 * actually DESTROY it in the finalize stage. So, the problematic 758 * actually DESTROY it in the finalize stage. So, the problematic
@@ -611,103 +768,180 @@ const struct uverbs_obj_type_class uverbs_idr_class = {
611 */ 768 */
612 .needs_kfree_rcu = true, 769 .needs_kfree_rcu = true,
613}; 770};
771EXPORT_SYMBOL(uverbs_idr_class);
614 772
615static void _uverbs_close_fd(struct ib_uobject_file *uobj_file) 773void uverbs_close_fd(struct file *f)
616{ 774{
617 struct ib_ucontext *ucontext; 775 struct ib_uobject *uobj = f->private_data;
618 struct ib_uverbs_file *ufile = uobj_file->ufile; 776 struct ib_uverbs_file *ufile = uobj->ufile;
619 int ret;
620 777
621 mutex_lock(&uobj_file->ufile->cleanup_mutex); 778 if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
779 /*
780 * lookup_get_fd_uobject holds the kref on the struct file any
781 * time a FD uobj is locked, which prevents this release
782 * method from being invoked. Meaning we can always get the
783 * write lock here, or we have a kernel bug.
784 */
785 WARN_ON(uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE));
786 uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE);
787 up_read(&ufile->hw_destroy_rwsem);
788 }
622 789
623 /* uobject was either already cleaned up or is cleaned up right now anyway */ 790 /* Matches the get in alloc_begin_fd_uobject */
624 if (!uobj_file->uobj.context || 791 kref_put(&ufile->ref, ib_uverbs_release_file);
625 !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem))
626 goto unlock;
627 792
628 ucontext = uobj_file->uobj.context; 793 /* Pairs with filp->private_data in alloc_begin_fd_uobject */
629 ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE); 794 uverbs_uobject_put(uobj);
630 up_read(&ucontext->cleanup_rwsem);
631 if (ret)
632 pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n");
633unlock:
634 mutex_unlock(&ufile->cleanup_mutex);
635} 795}
636 796
637void uverbs_close_fd(struct file *f) 797static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext)
638{ 798{
639 struct ib_uobject_file *uobj_file = f->private_data; 799 struct ib_device *ib_dev = ibcontext->device;
640 struct kref *uverbs_file_ref = &uobj_file->ufile->ref; 800 struct task_struct *owning_process = NULL;
801 struct mm_struct *owning_mm = NULL;
802
803 owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
804 if (!owning_process)
805 return;
806
807 owning_mm = get_task_mm(owning_process);
808 if (!owning_mm) {
809 pr_info("no mm, disassociate ucontext is pending task termination\n");
810 while (1) {
811 put_task_struct(owning_process);
812 usleep_range(1000, 2000);
813 owning_process = get_pid_task(ibcontext->tgid,
814 PIDTYPE_PID);
815 if (!owning_process ||
816 owning_process->state == TASK_DEAD) {
817 pr_info("disassociate ucontext done, task was terminated\n");
818 /* in case task was dead need to release the
819 * task struct.
820 */
821 if (owning_process)
822 put_task_struct(owning_process);
823 return;
824 }
825 }
826 }
641 827
642 _uverbs_close_fd(uobj_file); 828 down_write(&owning_mm->mmap_sem);
643 uverbs_uobject_put(&uobj_file->uobj); 829 ib_dev->disassociate_ucontext(ibcontext);
644 kref_put(uverbs_file_ref, ib_uverbs_release_file); 830 up_write(&owning_mm->mmap_sem);
831 mmput(owning_mm);
832 put_task_struct(owning_process);
645} 833}
646 834
647void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed) 835/*
836 * Drop the ucontext off the ufile and completely disconnect it from the
837 * ib_device
838 */
839static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
840 enum rdma_remove_reason reason)
648{ 841{
649 enum rdma_remove_reason reason = device_removed ? 842 struct ib_ucontext *ucontext = ufile->ucontext;
650 RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE; 843 int ret;
651 unsigned int cur_order = 0; 844
845 if (reason == RDMA_REMOVE_DRIVER_REMOVE)
846 ufile_disassociate_ucontext(ucontext);
847
848 put_pid(ucontext->tgid);
849 ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device,
850 RDMACG_RESOURCE_HCA_HANDLE);
652 851
653 ucontext->cleanup_reason = reason;
654 /* 852 /*
655 * Waits for all remove_commit and alloc_commit to finish. Logically, We 853 * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
656 * want to hold this forever as the context is going to be destroyed, 854 * the error return.
657 * but we'll release it since it causes a "held lock freed" BUG message.
658 */ 855 */
659 down_write(&ucontext->cleanup_rwsem); 856 ret = ucontext->device->dealloc_ucontext(ucontext);
857 WARN_ON(ret);
660 858
661 while (!list_empty(&ucontext->uobjects)) { 859 ufile->ucontext = NULL;
662 struct ib_uobject *obj, *next_obj; 860}
663 unsigned int next_order = UINT_MAX; 861
862static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
863 enum rdma_remove_reason reason)
864{
865 struct ib_uobject *obj, *next_obj;
866 int ret = -EINVAL;
664 867
868 /*
869 * This shouldn't run while executing other commands on this
870 * context. Thus, the only thing we should take care of is
871 * releasing a FD while traversing this list. The FD could be
872 * closed and released from the _release fop of this FD.
873 * In order to mitigate this, we add a lock.
874 * We take and release the lock per traversal in order to let
875 * other threads (which might still use the FDs) chance to run.
876 */
877 list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) {
665 /* 878 /*
666 * This shouldn't run while executing other commands on this 879 * if we hit this WARN_ON, that means we are
667 * context. Thus, the only thing we should take care of is 880 * racing with a lookup_get.
668 * releasing a FD while traversing this list. The FD could be
669 * closed and released from the _release fop of this FD.
670 * In order to mitigate this, we add a lock.
671 * We take and release the lock per order traversal in order
672 * to let other threads (which might still use the FDs) chance
673 * to run.
674 */ 881 */
675 mutex_lock(&ucontext->uobjects_lock); 882 WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE));
676 list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects, 883 if (!uverbs_destroy_uobject(obj, reason))
677 list) { 884 ret = 0;
678 if (obj->type->destroy_order == cur_order) {
679 int ret;
680
681 /*
682 * if we hit this WARN_ON, that means we are
683 * racing with a lookup_get.
684 */
685 WARN_ON(uverbs_try_lock_object(obj, true));
686 ret = obj->type->type_class->remove_commit(obj,
687 reason);
688 list_del(&obj->list);
689 if (ret)
690 pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n",
691 obj->id, cur_order);
692 /* put the ref we took when we created the object */
693 uverbs_uobject_put(obj);
694 } else {
695 next_order = min(next_order,
696 obj->type->destroy_order);
697 }
698 }
699 mutex_unlock(&ucontext->uobjects_lock);
700 cur_order = next_order;
701 } 885 }
702 up_write(&ucontext->cleanup_rwsem); 886 return ret;
703} 887}
704 888
705void uverbs_initialize_ucontext(struct ib_ucontext *ucontext) 889/*
890 * Destroy the uncontext and every uobject associated with it. If called with
891 * reason != RDMA_REMOVE_CLOSE this will not return until the destruction has
892 * been completed and ufile->ucontext is NULL.
893 *
894 * This is internally locked and can be called in parallel from multiple
895 * contexts.
896 */
897void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
898 enum rdma_remove_reason reason)
706{ 899{
707 ucontext->cleanup_reason = 0; 900 if (reason == RDMA_REMOVE_CLOSE) {
708 mutex_init(&ucontext->uobjects_lock); 901 /*
709 INIT_LIST_HEAD(&ucontext->uobjects); 902 * During destruction we might trigger something that
710 init_rwsem(&ucontext->cleanup_rwsem); 903 * synchronously calls release on any file descriptor. For
904 * this reason all paths that come from file_operations
905 * release must use try_lock. They can progress knowing that
906 * there is an ongoing uverbs_destroy_ufile_hw that will clean
907 * up the driver resources.
908 */
909 if (!mutex_trylock(&ufile->ucontext_lock))
910 return;
911
912 } else {
913 mutex_lock(&ufile->ucontext_lock);
914 }
915
916 down_write(&ufile->hw_destroy_rwsem);
917
918 /*
919 * If a ucontext was never created then we can't have any uobjects to
920 * cleanup, nothing to do.
921 */
922 if (!ufile->ucontext)
923 goto done;
924
925 ufile->ucontext->closing = true;
926 ufile->ucontext->cleanup_retryable = true;
927 while (!list_empty(&ufile->uobjects))
928 if (__uverbs_cleanup_ufile(ufile, reason)) {
929 /*
930 * No entry was cleaned-up successfully during this
931 * iteration
932 */
933 break;
934 }
935
936 ufile->ucontext->cleanup_retryable = false;
937 if (!list_empty(&ufile->uobjects))
938 __uverbs_cleanup_ufile(ufile, reason);
939
940 ufile_destroy_ucontext(ufile, reason);
941
942done:
943 up_write(&ufile->hw_destroy_rwsem);
944 mutex_unlock(&ufile->ucontext_lock);
711} 945}
712 946
713const struct uverbs_obj_type_class uverbs_fd_class = { 947const struct uverbs_obj_type_class uverbs_fd_class = {
@@ -716,23 +950,33 @@ const struct uverbs_obj_type_class uverbs_fd_class = {
716 .alloc_commit = alloc_commit_fd_uobject, 950 .alloc_commit = alloc_commit_fd_uobject,
717 .alloc_abort = alloc_abort_fd_uobject, 951 .alloc_abort = alloc_abort_fd_uobject,
718 .lookup_put = lookup_put_fd_uobject, 952 .lookup_put = lookup_put_fd_uobject,
719 .remove_commit = remove_commit_fd_uobject, 953 .destroy_hw = destroy_hw_fd_uobject,
954 .remove_handle = remove_handle_fd_uobject,
720 .needs_kfree_rcu = false, 955 .needs_kfree_rcu = false,
721}; 956};
957EXPORT_SYMBOL(uverbs_fd_class);
722 958
723struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs, 959struct ib_uobject *
724 struct ib_ucontext *ucontext, 960uverbs_get_uobject_from_file(u16 object_id,
725 enum uverbs_obj_access access, 961 struct ib_uverbs_file *ufile,
726 int id) 962 enum uverbs_obj_access access, s64 id)
727{ 963{
964 const struct uverbs_api_object *obj =
965 uapi_get_object(ufile->device->uapi, object_id);
966
728 switch (access) { 967 switch (access) {
729 case UVERBS_ACCESS_READ: 968 case UVERBS_ACCESS_READ:
730 return rdma_lookup_get_uobject(type_attrs, ucontext, id, false); 969 return rdma_lookup_get_uobject(obj, ufile, id,
970 UVERBS_LOOKUP_READ);
731 case UVERBS_ACCESS_DESTROY: 971 case UVERBS_ACCESS_DESTROY:
972 /* Actual destruction is done inside uverbs_handle_method */
973 return rdma_lookup_get_uobject(obj, ufile, id,
974 UVERBS_LOOKUP_DESTROY);
732 case UVERBS_ACCESS_WRITE: 975 case UVERBS_ACCESS_WRITE:
733 return rdma_lookup_get_uobject(type_attrs, ucontext, id, true); 976 return rdma_lookup_get_uobject(obj, ufile, id,
977 UVERBS_LOOKUP_WRITE);
734 case UVERBS_ACCESS_NEW: 978 case UVERBS_ACCESS_NEW:
735 return rdma_alloc_begin_uobject(type_attrs, ucontext); 979 return rdma_alloc_begin_uobject(obj, ufile);
736 default: 980 default:
737 WARN_ON(true); 981 WARN_ON(true);
738 return ERR_PTR(-EOPNOTSUPP); 982 return ERR_PTR(-EOPNOTSUPP);
@@ -753,16 +997,14 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
753 997
754 switch (access) { 998 switch (access) {
755 case UVERBS_ACCESS_READ: 999 case UVERBS_ACCESS_READ:
756 rdma_lookup_put_uobject(uobj, false); 1000 rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ);
757 break; 1001 break;
758 case UVERBS_ACCESS_WRITE: 1002 case UVERBS_ACCESS_WRITE:
759 rdma_lookup_put_uobject(uobj, true); 1003 rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
760 break; 1004 break;
761 case UVERBS_ACCESS_DESTROY: 1005 case UVERBS_ACCESS_DESTROY:
762 if (commit) 1006 if (uobj)
763 ret = rdma_remove_commit_uobject(uobj); 1007 rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
764 else
765 rdma_lookup_put_uobject(uobj, true);
766 break; 1008 break;
767 case UVERBS_ACCESS_NEW: 1009 case UVERBS_ACCESS_NEW:
768 if (commit) 1010 if (commit)
@@ -777,43 +1019,3 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
777 1019
778 return ret; 1020 return ret;
779} 1021}
780
781int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle,
782 struct uverbs_attr_spec_hash * const *spec_hash,
783 size_t num,
784 bool commit)
785{
786 unsigned int i;
787 int ret = 0;
788
789 for (i = 0; i < num; i++) {
790 struct uverbs_attr_bundle_hash *curr_bundle =
791 &attrs_bundle->hash[i];
792 const struct uverbs_attr_spec_hash *curr_spec_bucket =
793 spec_hash[i];
794 unsigned int j;
795
796 for (j = 0; j < curr_bundle->num_attrs; j++) {
797 struct uverbs_attr *attr;
798 const struct uverbs_attr_spec *spec;
799
800 if (!uverbs_attr_is_valid_in_hash(curr_bundle, j))
801 continue;
802
803 attr = &curr_bundle->attrs[j];
804 spec = &curr_spec_bucket->attrs[j];
805
806 if (spec->type == UVERBS_ATTR_TYPE_IDR ||
807 spec->type == UVERBS_ATTR_TYPE_FD) {
808 int current_ret;
809
810 current_ret = uverbs_finalize_object(attr->obj_attr.uobject,
811 spec->obj.access,
812 commit);
813 if (!ret)
814 ret = current_ret;
815 }
816 }
817 }
818 return ret;
819}
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index 1efcf93238dd..f962f2a593ba 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -43,20 +43,12 @@
43#include <rdma/ib_verbs.h> 43#include <rdma/ib_verbs.h>
44#include <linux/mutex.h> 44#include <linux/mutex.h>
45 45
46int uverbs_ns_idx(u16 *id, unsigned int ns_count); 46struct ib_uverbs_device;
47const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev, 47
48 uint16_t object); 48void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
49const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object, 49 enum rdma_remove_reason reason);
50 uint16_t method); 50
51/* 51int uobj_destroy(struct ib_uobject *uobj);
52 * These functions initialize the context and cleanups its uobjects.
53 * The context has a list of objects which is protected by a mutex
54 * on the context. initialize_ucontext should be called when we create
55 * a context.
56 * cleanup_ucontext removes all uobjects from the context and puts them.
57 */
58void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed);
59void uverbs_initialize_ucontext(struct ib_ucontext *ucontext);
60 52
61/* 53/*
62 * uverbs_uobject_get is called in order to increase the reference count on 54 * uverbs_uobject_get is called in order to increase the reference count on
@@ -82,7 +74,7 @@ void uverbs_uobject_put(struct ib_uobject *uobject);
82void uverbs_close_fd(struct file *f); 74void uverbs_close_fd(struct file *f);
83 75
84/* 76/*
85 * Get an ib_uobject that corresponds to the given id from ucontext, assuming 77 * Get an ib_uobject that corresponds to the given id from ufile, assuming
86 * the object is from the given type. Lock it to the required access when 78 * the object is from the given type. Lock it to the required access when
87 * applicable. 79 * applicable.
88 * This function could create (access == NEW), destroy (access == DESTROY) 80 * This function could create (access == NEW), destroy (access == DESTROY)
@@ -90,13 +82,11 @@ void uverbs_close_fd(struct file *f);
90 * The action will be finalized only when uverbs_finalize_object or 82 * The action will be finalized only when uverbs_finalize_object or
91 * uverbs_finalize_objects are called. 83 * uverbs_finalize_objects are called.
92 */ 84 */
93struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs, 85struct ib_uobject *
94 struct ib_ucontext *ucontext, 86uverbs_get_uobject_from_file(u16 object_id,
95 enum uverbs_obj_access access, 87 struct ib_uverbs_file *ufile,
96 int id); 88 enum uverbs_obj_access access, s64 id);
97int uverbs_finalize_object(struct ib_uobject *uobj, 89
98 enum uverbs_obj_access access,
99 bool commit);
100/* 90/*
101 * Note that certain finalize stages could return a status: 91 * Note that certain finalize stages could return a status:
102 * (a) alloc_commit could return a failure if the object is committed at the 92 * (a) alloc_commit could return a failure if the object is committed at the
@@ -112,9 +102,63 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
112 * function. For example, this could happen when we couldn't destroy an 102 * function. For example, this could happen when we couldn't destroy an
113 * object. 103 * object.
114 */ 104 */
115int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle, 105int uverbs_finalize_object(struct ib_uobject *uobj,
116 struct uverbs_attr_spec_hash * const *spec_hash, 106 enum uverbs_obj_access access,
117 size_t num, 107 bool commit);
118 bool commit); 108
109void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile);
110void release_ufile_idr_uobject(struct ib_uverbs_file *ufile);
111
112/*
113 * This is the runtime description of the uverbs API, used by the syscall
114 * machinery to validate and dispatch calls.
115 */
116
117/*
118 * Depending on ID the slot pointer in the radix tree points at one of these
119 * structs.
120 */
121struct uverbs_api_object {
122 const struct uverbs_obj_type *type_attrs;
123 const struct uverbs_obj_type_class *type_class;
124};
125
126struct uverbs_api_ioctl_method {
127 int (__rcu *handler)(struct ib_uverbs_file *ufile,
128 struct uverbs_attr_bundle *ctx);
129 DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN);
130 u16 bundle_size;
131 u8 use_stack:1;
132 u8 driver_method:1;
133 u8 key_bitmap_len;
134 u8 destroy_bkey;
135};
136
137struct uverbs_api_attr {
138 struct uverbs_attr_spec spec;
139};
140
141struct uverbs_api_object;
142struct uverbs_api {
143 /* radix tree contains struct uverbs_api_* pointers */
144 struct radix_tree_root radix;
145 enum rdma_driver_id driver_id;
146};
147
148static inline const struct uverbs_api_object *
149uapi_get_object(struct uverbs_api *uapi, u16 object_id)
150{
151 return radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id));
152}
153
154char *uapi_key_format(char *S, unsigned int key);
155struct uverbs_api *uverbs_alloc_api(
156 const struct uverbs_object_tree_def *const *driver_specs,
157 enum rdma_driver_id driver_id);
158void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev);
159void uverbs_disassociate_api(struct uverbs_api *uapi);
160void uverbs_destroy_api(struct uverbs_api *uapi);
161void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
162 unsigned int num_attrs);
119 163
120#endif /* RDMA_CORE_H */ 164#endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index a4fbdc5d28fa..ee366199b169 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -143,14 +143,15 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de
143 143
144#define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \ 144#define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \
145 BONDING_SLAVE_STATE_NA) 145 BONDING_SLAVE_STATE_NA)
146static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port, 146static bool
147 struct net_device *rdma_ndev, void *cookie) 147is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u8 port,
148 struct net_device *rdma_ndev, void *cookie)
148{ 149{
149 struct net_device *real_dev; 150 struct net_device *real_dev;
150 int res; 151 bool res;
151 152
152 if (!rdma_ndev) 153 if (!rdma_ndev)
153 return 0; 154 return false;
154 155
155 rcu_read_lock(); 156 rcu_read_lock();
156 real_dev = rdma_vlan_dev_real_dev(cookie); 157 real_dev = rdma_vlan_dev_real_dev(cookie);
@@ -166,14 +167,15 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
166 return res; 167 return res;
167} 168}
168 169
169static int is_eth_port_inactive_slave(struct ib_device *ib_dev, u8 port, 170static bool
170 struct net_device *rdma_ndev, void *cookie) 171is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u8 port,
172 struct net_device *rdma_ndev, void *cookie)
171{ 173{
172 struct net_device *master_dev; 174 struct net_device *master_dev;
173 int res; 175 bool res;
174 176
175 if (!rdma_ndev) 177 if (!rdma_ndev)
176 return 0; 178 return false;
177 179
178 rcu_read_lock(); 180 rcu_read_lock();
179 master_dev = netdev_master_upper_dev_get_rcu(rdma_ndev); 181 master_dev = netdev_master_upper_dev_get_rcu(rdma_ndev);
@@ -184,22 +186,59 @@ static int is_eth_port_inactive_slave(struct ib_device *ib_dev, u8 port,
184 return res; 186 return res;
185} 187}
186 188
187static int pass_all_filter(struct ib_device *ib_dev, u8 port, 189/** is_ndev_for_default_gid_filter - Check if a given netdevice
188 struct net_device *rdma_ndev, void *cookie) 190 * can be considered for default GIDs or not.
191 * @ib_dev: IB device to check
192 * @port: Port to consider for adding default GID
193 * @rdma_ndev: rdma netdevice pointer
194 * @cookie_ndev: Netdevice to consider to form a default GID
195 *
196 * is_ndev_for_default_gid_filter() returns true if a given netdevice can be
197 * considered for deriving default RoCE GID, returns false otherwise.
198 */
199static bool
200is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u8 port,
201 struct net_device *rdma_ndev, void *cookie)
202{
203 struct net_device *cookie_ndev = cookie;
204 bool res;
205
206 if (!rdma_ndev)
207 return false;
208
209 rcu_read_lock();
210
211 /*
212 * When rdma netdevice is used in bonding, bonding master netdevice
213 * should be considered for default GIDs. Therefore, ignore slave rdma
214 * netdevices when bonding is considered.
215 * Additionally when event(cookie) netdevice is bond master device,
216 * make sure that it the upper netdevice of rdma netdevice.
217 */
218 res = ((cookie_ndev == rdma_ndev && !netif_is_bond_slave(rdma_ndev)) ||
219 (netif_is_bond_master(cookie_ndev) &&
220 rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev)));
221
222 rcu_read_unlock();
223 return res;
224}
225
226static bool pass_all_filter(struct ib_device *ib_dev, u8 port,
227 struct net_device *rdma_ndev, void *cookie)
189{ 228{
190 return 1; 229 return true;
191} 230}
192 231
193static int upper_device_filter(struct ib_device *ib_dev, u8 port, 232static bool upper_device_filter(struct ib_device *ib_dev, u8 port,
194 struct net_device *rdma_ndev, void *cookie) 233 struct net_device *rdma_ndev, void *cookie)
195{ 234{
196 int res; 235 bool res;
197 236
198 if (!rdma_ndev) 237 if (!rdma_ndev)
199 return 0; 238 return false;
200 239
201 if (rdma_ndev == cookie) 240 if (rdma_ndev == cookie)
202 return 1; 241 return true;
203 242
204 rcu_read_lock(); 243 rcu_read_lock();
205 res = rdma_is_upper_dev_rcu(rdma_ndev, cookie); 244 res = rdma_is_upper_dev_rcu(rdma_ndev, cookie);
@@ -208,6 +247,34 @@ static int upper_device_filter(struct ib_device *ib_dev, u8 port,
208 return res; 247 return res;
209} 248}
210 249
250/**
251 * is_upper_ndev_bond_master_filter - Check if a given netdevice
252 * is bond master device of netdevice of the the RDMA device of port.
253 * @ib_dev: IB device to check
254 * @port: Port to consider for adding default GID
255 * @rdma_ndev: Pointer to rdma netdevice
256 * @cookie: Netdevice to consider to form a default GID
257 *
258 * is_upper_ndev_bond_master_filter() returns true if a cookie_netdev
259 * is bond master device and rdma_ndev is its lower netdevice. It might
260 * not have been established as slave device yet.
261 */
262static bool
263is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u8 port,
264 struct net_device *rdma_ndev,
265 void *cookie)
266{
267 struct net_device *cookie_ndev = cookie;
268 bool match = false;
269
270 rcu_read_lock();
271 if (netif_is_bond_master(cookie_ndev) &&
272 rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev))
273 match = true;
274 rcu_read_unlock();
275 return match;
276}
277
211static void update_gid_ip(enum gid_op_type gid_op, 278static void update_gid_ip(enum gid_op_type gid_op,
212 struct ib_device *ib_dev, 279 struct ib_device *ib_dev,
213 u8 port, struct net_device *ndev, 280 u8 port, struct net_device *ndev,
@@ -223,34 +290,10 @@ static void update_gid_ip(enum gid_op_type gid_op,
223 update_gid(gid_op, ib_dev, port, &gid, &gid_attr); 290 update_gid(gid_op, ib_dev, port, &gid, &gid_attr);
224} 291}
225 292
226static void enum_netdev_default_gids(struct ib_device *ib_dev,
227 u8 port, struct net_device *event_ndev,
228 struct net_device *rdma_ndev)
229{
230 unsigned long gid_type_mask;
231
232 rcu_read_lock();
233 if (!rdma_ndev ||
234 ((rdma_ndev != event_ndev &&
235 !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
236 is_eth_active_slave_of_bonding_rcu(rdma_ndev,
237 netdev_master_upper_dev_get_rcu(rdma_ndev)) ==
238 BONDING_SLAVE_STATE_INACTIVE)) {
239 rcu_read_unlock();
240 return;
241 }
242 rcu_read_unlock();
243
244 gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
245
246 ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, gid_type_mask,
247 IB_CACHE_GID_DEFAULT_MODE_SET);
248}
249
250static void bond_delete_netdev_default_gids(struct ib_device *ib_dev, 293static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
251 u8 port, 294 u8 port,
252 struct net_device *event_ndev, 295 struct net_device *rdma_ndev,
253 struct net_device *rdma_ndev) 296 struct net_device *event_ndev)
254{ 297{
255 struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev); 298 struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev);
256 unsigned long gid_type_mask; 299 unsigned long gid_type_mask;
@@ -381,7 +424,6 @@ static void _add_netdev_ips(struct ib_device *ib_dev, u8 port,
381static void add_netdev_ips(struct ib_device *ib_dev, u8 port, 424static void add_netdev_ips(struct ib_device *ib_dev, u8 port,
382 struct net_device *rdma_ndev, void *cookie) 425 struct net_device *rdma_ndev, void *cookie)
383{ 426{
384 enum_netdev_default_gids(ib_dev, port, cookie, rdma_ndev);
385 _add_netdev_ips(ib_dev, port, cookie); 427 _add_netdev_ips(ib_dev, port, cookie);
386} 428}
387 429
@@ -391,6 +433,38 @@ static void del_netdev_ips(struct ib_device *ib_dev, u8 port,
391 ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie); 433 ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie);
392} 434}
393 435
436/**
437 * del_default_gids - Delete default GIDs of the event/cookie netdevice
438 * @ib_dev: RDMA device pointer
439 * @port: Port of the RDMA device whose GID table to consider
440 * @rdma_ndev: Unused rdma netdevice
441 * @cookie: Pointer to event netdevice
442 *
443 * del_default_gids() deletes the default GIDs of the event/cookie netdevice.
444 */
445static void del_default_gids(struct ib_device *ib_dev, u8 port,
446 struct net_device *rdma_ndev, void *cookie)
447{
448 struct net_device *cookie_ndev = cookie;
449 unsigned long gid_type_mask;
450
451 gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
452
453 ib_cache_gid_set_default_gid(ib_dev, port, cookie_ndev, gid_type_mask,
454 IB_CACHE_GID_DEFAULT_MODE_DELETE);
455}
456
457static void add_default_gids(struct ib_device *ib_dev, u8 port,
458 struct net_device *rdma_ndev, void *cookie)
459{
460 struct net_device *event_ndev = cookie;
461 unsigned long gid_type_mask;
462
463 gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
464 ib_cache_gid_set_default_gid(ib_dev, port, event_ndev, gid_type_mask,
465 IB_CACHE_GID_DEFAULT_MODE_SET);
466}
467
394static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, 468static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
395 u8 port, 469 u8 port,
396 struct net_device *rdma_ndev, 470 struct net_device *rdma_ndev,
@@ -405,9 +479,20 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
405 rtnl_lock(); 479 rtnl_lock();
406 down_read(&net_rwsem); 480 down_read(&net_rwsem);
407 for_each_net(net) 481 for_each_net(net)
408 for_each_netdev(net, ndev) 482 for_each_netdev(net, ndev) {
409 if (is_eth_port_of_netdev(ib_dev, port, rdma_ndev, ndev)) 483 /*
410 add_netdev_ips(ib_dev, port, rdma_ndev, ndev); 484 * Filter and add default GIDs of the primary netdevice
485 * when not in bonding mode, or add default GIDs
486 * of bond master device, when in bonding mode.
487 */
488 if (is_ndev_for_default_gid_filter(ib_dev, port,
489 rdma_ndev, ndev))
490 add_default_gids(ib_dev, port, rdma_ndev, ndev);
491
492 if (is_eth_port_of_netdev_filter(ib_dev, port,
493 rdma_ndev, ndev))
494 _add_netdev_ips(ib_dev, port, ndev);
495 }
411 up_read(&net_rwsem); 496 up_read(&net_rwsem);
412 rtnl_unlock(); 497 rtnl_unlock();
413} 498}
@@ -513,18 +598,12 @@ static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port,
513 rcu_read_unlock(); 598 rcu_read_unlock();
514 599
515 if (master_ndev) { 600 if (master_ndev) {
516 bond_delete_netdev_default_gids(ib_dev, port, master_ndev, 601 bond_delete_netdev_default_gids(ib_dev, port, rdma_ndev,
517 rdma_ndev); 602 master_ndev);
518 dev_put(master_ndev); 603 dev_put(master_ndev);
519 } 604 }
520} 605}
521 606
522static void del_netdev_default_ips(struct ib_device *ib_dev, u8 port,
523 struct net_device *rdma_ndev, void *cookie)
524{
525 bond_delete_netdev_default_gids(ib_dev, port, cookie, rdma_ndev);
526}
527
528/* The following functions operate on all IB devices. netdevice_event and 607/* The following functions operate on all IB devices. netdevice_event and
529 * addr_event execute ib_enum_all_roce_netdevs through a work. 608 * addr_event execute ib_enum_all_roce_netdevs through a work.
530 * ib_enum_all_roce_netdevs iterates through all IB devices. 609 * ib_enum_all_roce_netdevs iterates through all IB devices.
@@ -575,40 +654,94 @@ static int netdevice_queue_work(struct netdev_event_work_cmd *cmds,
575} 654}
576 655
577static const struct netdev_event_work_cmd add_cmd = { 656static const struct netdev_event_work_cmd add_cmd = {
578 .cb = add_netdev_ips, .filter = is_eth_port_of_netdev}; 657 .cb = add_netdev_ips,
658 .filter = is_eth_port_of_netdev_filter
659};
660
579static const struct netdev_event_work_cmd add_cmd_upper_ips = { 661static const struct netdev_event_work_cmd add_cmd_upper_ips = {
580 .cb = add_netdev_upper_ips, .filter = is_eth_port_of_netdev}; 662 .cb = add_netdev_upper_ips,
663 .filter = is_eth_port_of_netdev_filter
664};
581 665
582static void netdevice_event_changeupper(struct netdev_notifier_changeupper_info *changeupper_info, 666static void
583 struct netdev_event_work_cmd *cmds) 667ndev_event_unlink(struct netdev_notifier_changeupper_info *changeupper_info,
668 struct netdev_event_work_cmd *cmds)
584{ 669{
585 static const struct netdev_event_work_cmd upper_ips_del_cmd = { 670 static const struct netdev_event_work_cmd
586 .cb = del_netdev_upper_ips, .filter = upper_device_filter}; 671 upper_ips_del_cmd = {
587 static const struct netdev_event_work_cmd bonding_default_del_cmd = { 672 .cb = del_netdev_upper_ips,
588 .cb = del_netdev_default_ips, .filter = is_eth_port_inactive_slave}; 673 .filter = upper_device_filter
589 674 };
590 if (changeupper_info->linking == false) { 675
591 cmds[0] = upper_ips_del_cmd; 676 cmds[0] = upper_ips_del_cmd;
592 cmds[0].ndev = changeupper_info->upper_dev; 677 cmds[0].ndev = changeupper_info->upper_dev;
593 cmds[1] = add_cmd; 678 cmds[1] = add_cmd;
594 } else {
595 cmds[0] = bonding_default_del_cmd;
596 cmds[0].ndev = changeupper_info->upper_dev;
597 cmds[1] = add_cmd_upper_ips;
598 cmds[1].ndev = changeupper_info->upper_dev;
599 cmds[1].filter_ndev = changeupper_info->upper_dev;
600 }
601} 679}
602 680
681static const struct netdev_event_work_cmd bonding_default_add_cmd = {
682 .cb = add_default_gids,
683 .filter = is_upper_ndev_bond_master_filter
684};
685
686static void
687ndev_event_link(struct net_device *event_ndev,
688 struct netdev_notifier_changeupper_info *changeupper_info,
689 struct netdev_event_work_cmd *cmds)
690{
691 static const struct netdev_event_work_cmd
692 bonding_default_del_cmd = {
693 .cb = del_default_gids,
694 .filter = is_upper_ndev_bond_master_filter
695 };
696 /*
697 * When a lower netdev is linked to its upper bonding
698 * netdev, delete lower slave netdev's default GIDs.
699 */
700 cmds[0] = bonding_default_del_cmd;
701 cmds[0].ndev = event_ndev;
702 cmds[0].filter_ndev = changeupper_info->upper_dev;
703
704 /* Now add bonding upper device default GIDs */
705 cmds[1] = bonding_default_add_cmd;
706 cmds[1].ndev = changeupper_info->upper_dev;
707 cmds[1].filter_ndev = changeupper_info->upper_dev;
708
709 /* Now add bonding upper device IP based GIDs */
710 cmds[2] = add_cmd_upper_ips;
711 cmds[2].ndev = changeupper_info->upper_dev;
712 cmds[2].filter_ndev = changeupper_info->upper_dev;
713}
714
715static void netdevice_event_changeupper(struct net_device *event_ndev,
716 struct netdev_notifier_changeupper_info *changeupper_info,
717 struct netdev_event_work_cmd *cmds)
718{
719 if (changeupper_info->linking)
720 ndev_event_link(event_ndev, changeupper_info, cmds);
721 else
722 ndev_event_unlink(changeupper_info, cmds);
723}
724
725static const struct netdev_event_work_cmd add_default_gid_cmd = {
726 .cb = add_default_gids,
727 .filter = is_ndev_for_default_gid_filter,
728};
729
603static int netdevice_event(struct notifier_block *this, unsigned long event, 730static int netdevice_event(struct notifier_block *this, unsigned long event,
604 void *ptr) 731 void *ptr)
605{ 732{
606 static const struct netdev_event_work_cmd del_cmd = { 733 static const struct netdev_event_work_cmd del_cmd = {
607 .cb = del_netdev_ips, .filter = pass_all_filter}; 734 .cb = del_netdev_ips, .filter = pass_all_filter};
608 static const struct netdev_event_work_cmd bonding_default_del_cmd_join = { 735 static const struct netdev_event_work_cmd
609 .cb = del_netdev_default_ips_join, .filter = is_eth_port_inactive_slave}; 736 bonding_default_del_cmd_join = {
610 static const struct netdev_event_work_cmd default_del_cmd = { 737 .cb = del_netdev_default_ips_join,
611 .cb = del_netdev_default_ips, .filter = pass_all_filter}; 738 .filter = is_eth_port_inactive_slave_filter
739 };
740 static const struct netdev_event_work_cmd
741 netdev_del_cmd = {
742 .cb = del_netdev_ips,
743 .filter = is_eth_port_of_netdev_filter
744 };
612 static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = { 745 static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = {
613 .cb = del_netdev_upper_ips, .filter = upper_device_filter}; 746 .cb = del_netdev_upper_ips, .filter = upper_device_filter};
614 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 747 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
@@ -621,7 +754,8 @@ static int netdevice_event(struct notifier_block *this, unsigned long event,
621 case NETDEV_REGISTER: 754 case NETDEV_REGISTER:
622 case NETDEV_UP: 755 case NETDEV_UP:
623 cmds[0] = bonding_default_del_cmd_join; 756 cmds[0] = bonding_default_del_cmd_join;
624 cmds[1] = add_cmd; 757 cmds[1] = add_default_gid_cmd;
758 cmds[2] = add_cmd;
625 break; 759 break;
626 760
627 case NETDEV_UNREGISTER: 761 case NETDEV_UNREGISTER:
@@ -632,19 +766,22 @@ static int netdevice_event(struct notifier_block *this, unsigned long event,
632 break; 766 break;
633 767
634 case NETDEV_CHANGEADDR: 768 case NETDEV_CHANGEADDR:
635 cmds[0] = default_del_cmd; 769 cmds[0] = netdev_del_cmd;
636 cmds[1] = add_cmd; 770 cmds[1] = add_default_gid_cmd;
771 cmds[2] = add_cmd;
637 break; 772 break;
638 773
639 case NETDEV_CHANGEUPPER: 774 case NETDEV_CHANGEUPPER:
640 netdevice_event_changeupper( 775 netdevice_event_changeupper(ndev,
641 container_of(ptr, struct netdev_notifier_changeupper_info, info), 776 container_of(ptr, struct netdev_notifier_changeupper_info, info),
642 cmds); 777 cmds);
643 break; 778 break;
644 779
645 case NETDEV_BONDING_FAILOVER: 780 case NETDEV_BONDING_FAILOVER:
646 cmds[0] = bonding_event_ips_del_cmd; 781 cmds[0] = bonding_event_ips_del_cmd;
647 cmds[1] = bonding_default_del_cmd_join; 782 /* Add default GIDs of the bond device */
783 cmds[1] = bonding_default_add_cmd;
784 /* Add IP based GIDs of the bond device */
648 cmds[2] = add_cmd_upper_ips; 785 cmds[2] = add_cmd_upper_ips;
649 break; 786 break;
650 787
@@ -660,7 +797,8 @@ static void update_gid_event_work_handler(struct work_struct *_work)
660 struct update_gid_event_work *work = 797 struct update_gid_event_work *work =
661 container_of(_work, struct update_gid_event_work, work); 798 container_of(_work, struct update_gid_event_work, work);
662 799
663 ib_enum_all_roce_netdevs(is_eth_port_of_netdev, work->gid_attr.ndev, 800 ib_enum_all_roce_netdevs(is_eth_port_of_netdev_filter,
801 work->gid_attr.ndev,
664 callback_for_addr_gid_device_scan, work); 802 callback_for_addr_gid_device_scan, work);
665 803
666 dev_put(work->gid_attr.ndev); 804 dev_put(work->gid_attr.ndev);
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index c8963e91f92a..683e6d11a564 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -87,7 +87,7 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
87 } 87 }
88 88
89 ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE); 89 ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE);
90 if (ret < nents) { 90 if (ret < 0 || ret < nents) {
91 ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr); 91 ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr);
92 return -EINVAL; 92 return -EINVAL;
93 } 93 }
@@ -325,7 +325,7 @@ out_unmap_sg:
325EXPORT_SYMBOL(rdma_rw_ctx_init); 325EXPORT_SYMBOL(rdma_rw_ctx_init);
326 326
327/** 327/**
328 * rdma_rw_ctx_signature init - initialize a RW context with signature offload 328 * rdma_rw_ctx_signature_init - initialize a RW context with signature offload
329 * @ctx: context to initialize 329 * @ctx: context to initialize
330 * @qp: queue pair to operate on 330 * @qp: queue pair to operate on
331 * @port_num: port num to which the connection is bound 331 * @port_num: port num to which the connection is bound
@@ -564,10 +564,10 @@ EXPORT_SYMBOL(rdma_rw_ctx_wrs);
564int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, 564int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
565 struct ib_cqe *cqe, struct ib_send_wr *chain_wr) 565 struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
566{ 566{
567 struct ib_send_wr *first_wr, *bad_wr; 567 struct ib_send_wr *first_wr;
568 568
569 first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr); 569 first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr);
570 return ib_post_send(qp, first_wr, &bad_wr); 570 return ib_post_send(qp, first_wr, NULL);
571} 571}
572EXPORT_SYMBOL(rdma_rw_ctx_post); 572EXPORT_SYMBOL(rdma_rw_ctx_post);
573 573
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index a61ec7e33613..7b794a14d6e8 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1227,20 +1227,10 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
1227 return src_path_mask; 1227 return src_path_mask;
1228} 1228}
1229 1229
1230static int 1230static int roce_resolve_route_from_path(struct sa_path_rec *rec,
1231roce_resolve_route_from_path(struct ib_device *device, u8 port_num, 1231 const struct ib_gid_attr *attr)
1232 struct sa_path_rec *rec)
1233{ 1232{
1234 struct net_device *resolved_dev; 1233 struct rdma_dev_addr dev_addr = {};
1235 struct net_device *ndev;
1236 struct net_device *idev;
1237 struct rdma_dev_addr dev_addr = {
1238 .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
1239 sa_path_get_ifindex(rec) : 0),
1240 .net = sa_path_get_ndev(rec) ?
1241 sa_path_get_ndev(rec) :
1242 &init_net
1243 };
1244 union { 1234 union {
1245 struct sockaddr _sockaddr; 1235 struct sockaddr _sockaddr;
1246 struct sockaddr_in _sockaddr_in; 1236 struct sockaddr_in _sockaddr_in;
@@ -1250,9 +1240,14 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
1250 1240
1251 if (rec->roce.route_resolved) 1241 if (rec->roce.route_resolved)
1252 return 0; 1242 return 0;
1243 if (!attr || !attr->ndev)
1244 return -EINVAL;
1253 1245
1254 if (!device->get_netdev) 1246 dev_addr.bound_dev_if = attr->ndev->ifindex;
1255 return -EOPNOTSUPP; 1247 /* TODO: Use net from the ib_gid_attr once it is added to it,
1248 * until than, limit itself to init_net.
1249 */
1250 dev_addr.net = &init_net;
1256 1251
1257 rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid); 1252 rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
1258 rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid); 1253 rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
@@ -1268,60 +1263,52 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
1268 rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2) 1263 rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
1269 return -EINVAL; 1264 return -EINVAL;
1270 1265
1271 idev = device->get_netdev(device, port_num); 1266 rec->roce.route_resolved = true;
1272 if (!idev) 1267 return 0;
1273 return -ENODEV;
1274
1275 resolved_dev = dev_get_by_index(dev_addr.net,
1276 dev_addr.bound_dev_if);
1277 if (!resolved_dev) {
1278 ret = -ENODEV;
1279 goto done;
1280 }
1281 ndev = ib_get_ndev_from_path(rec);
1282 rcu_read_lock();
1283 if ((ndev && ndev != resolved_dev) ||
1284 (resolved_dev != idev &&
1285 !rdma_is_upper_dev_rcu(idev, resolved_dev)))
1286 ret = -EHOSTUNREACH;
1287 rcu_read_unlock();
1288 dev_put(resolved_dev);
1289 if (ndev)
1290 dev_put(ndev);
1291done:
1292 dev_put(idev);
1293 if (!ret)
1294 rec->roce.route_resolved = true;
1295 return ret;
1296} 1268}
1297 1269
1298static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num, 1270static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
1299 struct sa_path_rec *rec, 1271 struct sa_path_rec *rec,
1300 struct rdma_ah_attr *ah_attr) 1272 struct rdma_ah_attr *ah_attr,
1273 const struct ib_gid_attr *gid_attr)
1301{ 1274{
1302 enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec); 1275 enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
1303 struct net_device *ndev;
1304 u16 gid_index;
1305 int ret;
1306 1276
1307 ndev = ib_get_ndev_from_path(rec); 1277 if (!gid_attr) {
1308 ret = ib_find_cached_gid_by_port(device, &rec->sgid, type, 1278 gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type,
1309 port_num, ndev, &gid_index); 1279 port_num, NULL);
1310 if (ndev) 1280 if (IS_ERR(gid_attr))
1311 dev_put(ndev); 1281 return PTR_ERR(gid_attr);
1312 if (ret) 1282 } else
1313 return ret; 1283 rdma_hold_gid_attr(gid_attr);
1314 1284
1315 rdma_ah_set_grh(ah_attr, &rec->dgid, 1285 rdma_move_grh_sgid_attr(ah_attr, &rec->dgid,
1316 be32_to_cpu(rec->flow_label), 1286 be32_to_cpu(rec->flow_label),
1317 gid_index, rec->hop_limit, 1287 rec->hop_limit, rec->traffic_class,
1318 rec->traffic_class); 1288 gid_attr);
1319 return 0; 1289 return 0;
1320} 1290}
1321 1291
1292/**
1293 * ib_init_ah_attr_from_path - Initialize address handle attributes based on
1294 * an SA path record.
1295 * @device: Device associated ah attributes initialization.
1296 * @port_num: Port on the specified device.
1297 * @rec: path record entry to use for ah attributes initialization.
1298 * @ah_attr: address handle attributes to initialization from path record.
1299 * @sgid_attr: SGID attribute to consider during initialization.
1300 *
1301 * When ib_init_ah_attr_from_path() returns success,
1302 * (a) for IB link layer it optionally contains a reference to SGID attribute
1303 * when GRH is present for IB link layer.
1304 * (b) for RoCE link layer it contains a reference to SGID attribute.
1305 * User must invoke rdma_destroy_ah_attr() to release reference to SGID
1306 * attributes which are initialized using ib_init_ah_attr_from_path().
1307 */
1322int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, 1308int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
1323 struct sa_path_rec *rec, 1309 struct sa_path_rec *rec,
1324 struct rdma_ah_attr *ah_attr) 1310 struct rdma_ah_attr *ah_attr,
1311 const struct ib_gid_attr *gid_attr)
1325{ 1312{
1326 int ret = 0; 1313 int ret = 0;
1327 1314
@@ -1332,7 +1319,7 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
1332 rdma_ah_set_static_rate(ah_attr, rec->rate); 1319 rdma_ah_set_static_rate(ah_attr, rec->rate);
1333 1320
1334 if (sa_path_is_roce(rec)) { 1321 if (sa_path_is_roce(rec)) {
1335 ret = roce_resolve_route_from_path(device, port_num, rec); 1322 ret = roce_resolve_route_from_path(rec, gid_attr);
1336 if (ret) 1323 if (ret)
1337 return ret; 1324 return ret;
1338 1325
@@ -1349,7 +1336,8 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
1349 } 1336 }
1350 1337
1351 if (rec->hop_limit > 0 || sa_path_is_roce(rec)) 1338 if (rec->hop_limit > 0 || sa_path_is_roce(rec))
1352 ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr); 1339 ret = init_ah_attr_grh_fields(device, port_num,
1340 rec, ah_attr, gid_attr);
1353 return ret; 1341 return ret;
1354} 1342}
1355EXPORT_SYMBOL(ib_init_ah_attr_from_path); 1343EXPORT_SYMBOL(ib_init_ah_attr_from_path);
@@ -1557,8 +1545,6 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
1557 ARRAY_SIZE(path_rec_table), 1545 ARRAY_SIZE(path_rec_table),
1558 mad->data, &rec); 1546 mad->data, &rec);
1559 rec.rec_type = SA_PATH_REC_TYPE_IB; 1547 rec.rec_type = SA_PATH_REC_TYPE_IB;
1560 sa_path_set_ndev(&rec, NULL);
1561 sa_path_set_ifindex(&rec, 0);
1562 sa_path_set_dmac_zero(&rec); 1548 sa_path_set_dmac_zero(&rec);
1563 1549
1564 if (query->conv_pr) { 1550 if (query->conv_pr) {
@@ -2290,6 +2276,7 @@ static void update_sm_ah(struct work_struct *work)
2290 struct ib_sa_sm_ah *new_ah; 2276 struct ib_sa_sm_ah *new_ah;
2291 struct ib_port_attr port_attr; 2277 struct ib_port_attr port_attr;
2292 struct rdma_ah_attr ah_attr; 2278 struct rdma_ah_attr ah_attr;
2279 bool grh_required;
2293 2280
2294 if (ib_query_port(port->agent->device, port->port_num, &port_attr)) { 2281 if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
2295 pr_warn("Couldn't query port\n"); 2282 pr_warn("Couldn't query port\n");
@@ -2314,16 +2301,27 @@ static void update_sm_ah(struct work_struct *work)
2314 rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid); 2301 rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid);
2315 rdma_ah_set_sl(&ah_attr, port_attr.sm_sl); 2302 rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
2316 rdma_ah_set_port_num(&ah_attr, port->port_num); 2303 rdma_ah_set_port_num(&ah_attr, port->port_num);
2317 if (port_attr.grh_required) { 2304
2318 if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA) { 2305 grh_required = rdma_is_grh_required(port->agent->device,
2319 rdma_ah_set_make_grd(&ah_attr, true); 2306 port->port_num);
2320 } else { 2307
2321 rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH); 2308 /*
2322 rdma_ah_set_subnet_prefix(&ah_attr, 2309 * The OPA sm_lid of 0xFFFF needs special handling so that it can be
2323 cpu_to_be64(port_attr.subnet_prefix)); 2310 * differentiated from a permissive LID of 0xFFFF. We set the
2324 rdma_ah_set_interface_id(&ah_attr, 2311 * grh_required flag here so the SA can program the DGID in the
2325 cpu_to_be64(IB_SA_WELL_KNOWN_GUID)); 2312 * address handle appropriately
2326 } 2313 */
2314 if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA &&
2315 (grh_required ||
2316 port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)))
2317 rdma_ah_set_make_grd(&ah_attr, true);
2318
2319 if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) {
2320 rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
2321 rdma_ah_set_subnet_prefix(&ah_attr,
2322 cpu_to_be64(port_attr.subnet_prefix));
2323 rdma_ah_set_interface_id(&ah_attr,
2324 cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
2327 } 2325 }
2328 2326
2329 new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr); 2327 new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 31c7efaf8e7a..7fd14ead7b37 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -42,6 +42,7 @@
42 42
43#include <rdma/ib_mad.h> 43#include <rdma/ib_mad.h>
44#include <rdma/ib_pma.h> 44#include <rdma/ib_pma.h>
45#include <rdma/ib_cache.h>
45 46
46struct ib_port; 47struct ib_port;
47 48
@@ -346,7 +347,7 @@ static struct attribute *port_default_attrs[] = {
346 NULL 347 NULL
347}; 348};
348 349
349static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf) 350static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf)
350{ 351{
351 if (!gid_attr->ndev) 352 if (!gid_attr->ndev)
352 return -EINVAL; 353 return -EINVAL;
@@ -354,33 +355,26 @@ static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf)
354 return sprintf(buf, "%s\n", gid_attr->ndev->name); 355 return sprintf(buf, "%s\n", gid_attr->ndev->name);
355} 356}
356 357
357static size_t print_gid_type(struct ib_gid_attr *gid_attr, char *buf) 358static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf)
358{ 359{
359 return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type)); 360 return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type));
360} 361}
361 362
362static ssize_t _show_port_gid_attr(struct ib_port *p, 363static ssize_t _show_port_gid_attr(
363 struct port_attribute *attr, 364 struct ib_port *p, struct port_attribute *attr, char *buf,
364 char *buf, 365 size_t (*print)(const struct ib_gid_attr *gid_attr, char *buf))
365 size_t (*print)(struct ib_gid_attr *gid_attr,
366 char *buf))
367{ 366{
368 struct port_table_attribute *tab_attr = 367 struct port_table_attribute *tab_attr =
369 container_of(attr, struct port_table_attribute, attr); 368 container_of(attr, struct port_table_attribute, attr);
370 union ib_gid gid; 369 const struct ib_gid_attr *gid_attr;
371 struct ib_gid_attr gid_attr = {};
372 ssize_t ret; 370 ssize_t ret;
373 371
374 ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, 372 gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
375 &gid_attr); 373 if (IS_ERR(gid_attr))
376 if (ret) 374 return PTR_ERR(gid_attr);
377 goto err;
378 375
379 ret = print(&gid_attr, buf); 376 ret = print(gid_attr, buf);
380 377 rdma_put_gid_attr(gid_attr);
381err:
382 if (gid_attr.ndev)
383 dev_put(gid_attr.ndev);
384 return ret; 378 return ret;
385} 379}
386 380
@@ -389,26 +383,28 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
389{ 383{
390 struct port_table_attribute *tab_attr = 384 struct port_table_attribute *tab_attr =
391 container_of(attr, struct port_table_attribute, attr); 385 container_of(attr, struct port_table_attribute, attr);
392 union ib_gid *pgid; 386 const struct ib_gid_attr *gid_attr;
393 union ib_gid gid;
394 ssize_t ret; 387 ssize_t ret;
395 388
396 ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL); 389 gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
390 if (IS_ERR(gid_attr)) {
391 const union ib_gid zgid = {};
392
393 /* If reading GID fails, it is likely due to GID entry being
394 * empty (invalid) or reserved GID in the table. User space
395 * expects to read GID table entries as long as it given index
396 * is within GID table size. Administrative/debugging tool
397 * fails to query rest of the GID entries if it hits error
398 * while querying a GID of the given index. To avoid user
399 * space throwing such error on fail to read gid, return zero
400 * GID as before. This maintains backward compatibility.
401 */
402 return sprintf(buf, "%pI6\n", zgid.raw);
403 }
397 404
398 /* If reading GID fails, it is likely due to GID entry being empty 405 ret = sprintf(buf, "%pI6\n", gid_attr->gid.raw);
399 * (invalid) or reserved GID in the table. 406 rdma_put_gid_attr(gid_attr);
400 * User space expects to read GID table entries as long as it given 407 return ret;
401 * index is within GID table size.
402 * Administrative/debugging tool fails to query rest of the GID entries
403 * if it hits error while querying a GID of the given index.
404 * To avoid user space throwing such error on fail to read gid, return
405 * zero GID as before. This maintains backward compatibility.
406 */
407 if (ret)
408 pgid = &zgid;
409 else
410 pgid = &gid;
411 return sprintf(buf, "%pI6\n", pgid->raw);
412} 408}
413 409
414static ssize_t show_port_gid_attr_ndev(struct ib_port *p, 410static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 9eef96dacbd7..faa9e6116b2f 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -207,7 +207,7 @@ error:
207} 207}
208 208
209static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, 209static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
210 struct ib_cm_req_event_param *kreq) 210 const struct ib_cm_req_event_param *kreq)
211{ 211{
212 ureq->remote_ca_guid = kreq->remote_ca_guid; 212 ureq->remote_ca_guid = kreq->remote_ca_guid;
213 ureq->remote_qkey = kreq->remote_qkey; 213 ureq->remote_qkey = kreq->remote_qkey;
@@ -231,7 +231,7 @@ static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
231} 231}
232 232
233static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, 233static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep,
234 struct ib_cm_rep_event_param *krep) 234 const struct ib_cm_rep_event_param *krep)
235{ 235{
236 urep->remote_ca_guid = krep->remote_ca_guid; 236 urep->remote_ca_guid = krep->remote_ca_guid;
237 urep->remote_qkey = krep->remote_qkey; 237 urep->remote_qkey = krep->remote_qkey;
@@ -247,14 +247,14 @@ static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep,
247} 247}
248 248
249static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep, 249static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep,
250 struct ib_cm_sidr_rep_event_param *krep) 250 const struct ib_cm_sidr_rep_event_param *krep)
251{ 251{
252 urep->status = krep->status; 252 urep->status = krep->status;
253 urep->qkey = krep->qkey; 253 urep->qkey = krep->qkey;
254 urep->qpn = krep->qpn; 254 urep->qpn = krep->qpn;
255}; 255};
256 256
257static int ib_ucm_event_process(struct ib_cm_event *evt, 257static int ib_ucm_event_process(const struct ib_cm_event *evt,
258 struct ib_ucm_event *uvt) 258 struct ib_ucm_event *uvt)
259{ 259{
260 void *info = NULL; 260 void *info = NULL;
@@ -351,7 +351,7 @@ err1:
351} 351}
352 352
353static int ib_ucm_event_handler(struct ib_cm_id *cm_id, 353static int ib_ucm_event_handler(struct ib_cm_id *cm_id,
354 struct ib_cm_event *event) 354 const struct ib_cm_event *event)
355{ 355{
356 struct ib_ucm_event *uevent; 356 struct ib_ucm_event *uevent;
357 struct ib_ucm_context *ctx; 357 struct ib_ucm_context *ctx;
@@ -1000,14 +1000,11 @@ static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file,
1000 const char __user *inbuf, 1000 const char __user *inbuf,
1001 int in_len, int out_len) 1001 int in_len, int out_len)
1002{ 1002{
1003 struct ib_cm_sidr_req_param param; 1003 struct ib_cm_sidr_req_param param = {};
1004 struct ib_ucm_context *ctx; 1004 struct ib_ucm_context *ctx;
1005 struct ib_ucm_sidr_req cmd; 1005 struct ib_ucm_sidr_req cmd;
1006 int result; 1006 int result;
1007 1007
1008 param.private_data = NULL;
1009 param.path = NULL;
1010
1011 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1008 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1012 return -EFAULT; 1009 return -EFAULT;
1013 1010
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 54ab6335c48d..a41792dbae1f 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -84,7 +84,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
84 struct ib_umem *umem; 84 struct ib_umem *umem;
85 struct page **page_list; 85 struct page **page_list;
86 struct vm_area_struct **vma_list; 86 struct vm_area_struct **vma_list;
87 unsigned long locked;
88 unsigned long lock_limit; 87 unsigned long lock_limit;
89 unsigned long cur_base; 88 unsigned long cur_base;
90 unsigned long npages; 89 unsigned long npages;
@@ -92,7 +91,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
92 int i; 91 int i;
93 unsigned long dma_attrs = 0; 92 unsigned long dma_attrs = 0;
94 struct scatterlist *sg, *sg_list_start; 93 struct scatterlist *sg, *sg_list_start;
95 int need_release = 0;
96 unsigned int gup_flags = FOLL_WRITE; 94 unsigned int gup_flags = FOLL_WRITE;
97 95
98 if (dmasync) 96 if (dmasync)
@@ -121,10 +119,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
121 119
122 if (access & IB_ACCESS_ON_DEMAND) { 120 if (access & IB_ACCESS_ON_DEMAND) {
123 ret = ib_umem_odp_get(context, umem, access); 121 ret = ib_umem_odp_get(context, umem, access);
124 if (ret) { 122 if (ret)
125 kfree(umem); 123 goto umem_kfree;
126 return ERR_PTR(ret);
127 }
128 return umem; 124 return umem;
129 } 125 }
130 126
@@ -135,8 +131,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
135 131
136 page_list = (struct page **) __get_free_page(GFP_KERNEL); 132 page_list = (struct page **) __get_free_page(GFP_KERNEL);
137 if (!page_list) { 133 if (!page_list) {
138 kfree(umem); 134 ret = -ENOMEM;
139 return ERR_PTR(-ENOMEM); 135 goto umem_kfree;
140 } 136 }
141 137
142 /* 138 /*
@@ -149,41 +145,43 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
149 145
150 npages = ib_umem_num_pages(umem); 146 npages = ib_umem_num_pages(umem);
151 147
152 down_write(&current->mm->mmap_sem);
153
154 locked = npages + current->mm->pinned_vm;
155 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 148 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
156 149
157 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { 150 down_write(&current->mm->mmap_sem);
151 current->mm->pinned_vm += npages;
152 if ((current->mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) {
153 up_write(&current->mm->mmap_sem);
158 ret = -ENOMEM; 154 ret = -ENOMEM;
159 goto out; 155 goto vma;
160 } 156 }
157 up_write(&current->mm->mmap_sem);
161 158
162 cur_base = addr & PAGE_MASK; 159 cur_base = addr & PAGE_MASK;
163 160
164 if (npages == 0 || npages > UINT_MAX) { 161 if (npages == 0 || npages > UINT_MAX) {
165 ret = -EINVAL; 162 ret = -EINVAL;
166 goto out; 163 goto vma;
167 } 164 }
168 165
169 ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); 166 ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
170 if (ret) 167 if (ret)
171 goto out; 168 goto vma;
172 169
173 if (!umem->writable) 170 if (!umem->writable)
174 gup_flags |= FOLL_FORCE; 171 gup_flags |= FOLL_FORCE;
175 172
176 need_release = 1;
177 sg_list_start = umem->sg_head.sgl; 173 sg_list_start = umem->sg_head.sgl;
178 174
175 down_read(&current->mm->mmap_sem);
179 while (npages) { 176 while (npages) {
180 ret = get_user_pages_longterm(cur_base, 177 ret = get_user_pages_longterm(cur_base,
181 min_t(unsigned long, npages, 178 min_t(unsigned long, npages,
182 PAGE_SIZE / sizeof (struct page *)), 179 PAGE_SIZE / sizeof (struct page *)),
183 gup_flags, page_list, vma_list); 180 gup_flags, page_list, vma_list);
184 181 if (ret < 0) {
185 if (ret < 0) 182 up_read(&current->mm->mmap_sem);
186 goto out; 183 goto umem_release;
184 }
187 185
188 umem->npages += ret; 186 umem->npages += ret;
189 cur_base += ret * PAGE_SIZE; 187 cur_base += ret * PAGE_SIZE;
@@ -199,6 +197,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
199 /* preparing for next loop */ 197 /* preparing for next loop */
200 sg_list_start = sg; 198 sg_list_start = sg;
201 } 199 }
200 up_read(&current->mm->mmap_sem);
202 201
203 umem->nmap = ib_dma_map_sg_attrs(context->device, 202 umem->nmap = ib_dma_map_sg_attrs(context->device,
204 umem->sg_head.sgl, 203 umem->sg_head.sgl,
@@ -206,27 +205,28 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
206 DMA_BIDIRECTIONAL, 205 DMA_BIDIRECTIONAL,
207 dma_attrs); 206 dma_attrs);
208 207
209 if (umem->nmap <= 0) { 208 if (!umem->nmap) {
210 ret = -ENOMEM; 209 ret = -ENOMEM;
211 goto out; 210 goto umem_release;
212 } 211 }
213 212
214 ret = 0; 213 ret = 0;
214 goto out;
215 215
216out: 216umem_release:
217 if (ret < 0) { 217 __ib_umem_release(context->device, umem, 0);
218 if (need_release) 218vma:
219 __ib_umem_release(context->device, umem, 0); 219 down_write(&current->mm->mmap_sem);
220 kfree(umem); 220 current->mm->pinned_vm -= ib_umem_num_pages(umem);
221 } else
222 current->mm->pinned_vm = locked;
223
224 up_write(&current->mm->mmap_sem); 221 up_write(&current->mm->mmap_sem);
222out:
225 if (vma_list) 223 if (vma_list)
226 free_page((unsigned long) vma_list); 224 free_page((unsigned long) vma_list);
227 free_page((unsigned long) page_list); 225 free_page((unsigned long) page_list);
228 226umem_kfree:
229 return ret < 0 ? ERR_PTR(ret) : umem; 227 if (ret)
228 kfree(umem);
229 return ret ? ERR_PTR(ret) : umem;
230} 230}
231EXPORT_SYMBOL(ib_umem_get); 231EXPORT_SYMBOL(ib_umem_get);
232 232
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index bb98c9e4a7fd..c34a6852d691 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -268,6 +268,7 @@ static void recv_handler(struct ib_mad_agent *agent,
268 packet->mad.hdr.traffic_class = grh->traffic_class; 268 packet->mad.hdr.traffic_class = grh->traffic_class;
269 memcpy(packet->mad.hdr.gid, &grh->dgid, 16); 269 memcpy(packet->mad.hdr.gid, &grh->dgid, 16);
270 packet->mad.hdr.flow_label = cpu_to_be32(grh->flow_label); 270 packet->mad.hdr.flow_label = cpu_to_be32(grh->flow_label);
271 rdma_destroy_ah_attr(&ah_attr);
271 } 272 }
272 273
273 if (queue_packet(file, agent, packet)) 274 if (queue_packet(file, agent, packet))
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index c0d40fc3a53a..5df8e548cc14 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -111,7 +111,7 @@ struct ib_uverbs_device {
111 struct mutex lists_mutex; /* protect lists */ 111 struct mutex lists_mutex; /* protect lists */
112 struct list_head uverbs_file_list; 112 struct list_head uverbs_file_list;
113 struct list_head uverbs_events_file_list; 113 struct list_head uverbs_events_file_list;
114 struct uverbs_root_spec *specs_root; 114 struct uverbs_api *uapi;
115}; 115};
116 116
117struct ib_uverbs_event_queue { 117struct ib_uverbs_event_queue {
@@ -130,21 +130,37 @@ struct ib_uverbs_async_event_file {
130}; 130};
131 131
132struct ib_uverbs_completion_event_file { 132struct ib_uverbs_completion_event_file {
133 struct ib_uobject_file uobj_file; 133 struct ib_uobject uobj;
134 struct ib_uverbs_event_queue ev_queue; 134 struct ib_uverbs_event_queue ev_queue;
135}; 135};
136 136
137struct ib_uverbs_file { 137struct ib_uverbs_file {
138 struct kref ref; 138 struct kref ref;
139 struct mutex mutex;
140 struct mutex cleanup_mutex; /* protect cleanup */
141 struct ib_uverbs_device *device; 139 struct ib_uverbs_device *device;
140 struct mutex ucontext_lock;
141 /*
142 * ucontext must be accessed via ib_uverbs_get_ucontext() or with
143 * ucontext_lock held
144 */
142 struct ib_ucontext *ucontext; 145 struct ib_ucontext *ucontext;
143 struct ib_event_handler event_handler; 146 struct ib_event_handler event_handler;
144 struct ib_uverbs_async_event_file *async_file; 147 struct ib_uverbs_async_event_file *async_file;
145 struct list_head list; 148 struct list_head list;
146 int is_closed; 149 int is_closed;
147 150
151 /*
152 * To access the uobjects list hw_destroy_rwsem must be held for write
153 * OR hw_destroy_rwsem held for read AND uobjects_lock held.
154 * hw_destroy_rwsem should be called across any destruction of the HW
155 * object of an associated uobject.
156 */
157 struct rw_semaphore hw_destroy_rwsem;
158 spinlock_t uobjects_lock;
159 struct list_head uobjects;
160
161 u64 uverbs_cmd_mask;
162 u64 uverbs_ex_cmd_mask;
163
148 struct idr idr; 164 struct idr idr;
149 /* spinlock protects write access to idr */ 165 /* spinlock protects write access to idr */
150 spinlock_t idr_lock; 166 spinlock_t idr_lock;
@@ -196,7 +212,6 @@ struct ib_uwq_object {
196 212
197struct ib_ucq_object { 213struct ib_ucq_object {
198 struct ib_uobject uobject; 214 struct ib_uobject uobject;
199 struct ib_uverbs_file *uverbs_file;
200 struct list_head comp_list; 215 struct list_head comp_list;
201 struct list_head async_list; 216 struct list_head async_list;
202 u32 comp_events_reported; 217 u32 comp_events_reported;
@@ -230,7 +245,7 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
230void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); 245void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
231void ib_uverbs_event_handler(struct ib_event_handler *handler, 246void ib_uverbs_event_handler(struct ib_event_handler *handler,
232 struct ib_event *event); 247 struct ib_event *event);
233int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd, 248int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
234 enum rdma_remove_reason why); 249 enum rdma_remove_reason why);
235 250
236int uverbs_dealloc_mw(struct ib_mw *mw); 251int uverbs_dealloc_mw(struct ib_mw *mw);
@@ -238,12 +253,7 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp,
238 struct ib_uqp_object *uobj); 253 struct ib_uqp_object *uobj);
239 254
240void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata); 255void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata);
241extern const struct uverbs_attr_def uverbs_uhw_compat_in;
242extern const struct uverbs_attr_def uverbs_uhw_compat_out;
243long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); 256long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
244int uverbs_destroy_def_handler(struct ib_device *ib_dev,
245 struct ib_uverbs_file *file,
246 struct uverbs_attr_bundle *attrs);
247 257
248struct ib_uverbs_flow_spec { 258struct ib_uverbs_flow_spec {
249 union { 259 union {
@@ -292,7 +302,6 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS);
292 302
293#define IB_UVERBS_DECLARE_CMD(name) \ 303#define IB_UVERBS_DECLARE_CMD(name) \
294 ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ 304 ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
295 struct ib_device *ib_dev, \
296 const char __user *buf, int in_len, \ 305 const char __user *buf, int in_len, \
297 int out_len) 306 int out_len)
298 307
@@ -334,7 +343,6 @@ IB_UVERBS_DECLARE_CMD(close_xrcd);
334 343
335#define IB_UVERBS_DECLARE_EX_CMD(name) \ 344#define IB_UVERBS_DECLARE_EX_CMD(name) \
336 int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \ 345 int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \
337 struct ib_device *ib_dev, \
338 struct ib_udata *ucore, \ 346 struct ib_udata *ucore, \
339 struct ib_udata *uhw) 347 struct ib_udata *uhw)
340 348
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 583d3a10b940..a21d5214afc3 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -48,11 +48,10 @@
48#include "core_priv.h" 48#include "core_priv.h"
49 49
50static struct ib_uverbs_completion_event_file * 50static struct ib_uverbs_completion_event_file *
51ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context) 51_ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile)
52{ 52{
53 struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL, 53 struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL,
54 fd, context); 54 fd, ufile);
55 struct ib_uobject_file *uobj_file;
56 55
57 if (IS_ERR(uobj)) 56 if (IS_ERR(uobj))
58 return (void *)uobj; 57 return (void *)uobj;
@@ -60,13 +59,13 @@ ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
60 uverbs_uobject_get(uobj); 59 uverbs_uobject_get(uobj);
61 uobj_put_read(uobj); 60 uobj_put_read(uobj);
62 61
63 uobj_file = container_of(uobj, struct ib_uobject_file, uobj); 62 return container_of(uobj, struct ib_uverbs_completion_event_file,
64 return container_of(uobj_file, struct ib_uverbs_completion_event_file, 63 uobj);
65 uobj_file);
66} 64}
65#define ib_uverbs_lookup_comp_file(_fd, _ufile) \
66 _ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile)
67 67
68ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, 68ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
69 struct ib_device *ib_dev,
70 const char __user *buf, 69 const char __user *buf,
71 int in_len, int out_len) 70 int in_len, int out_len)
72{ 71{
@@ -76,6 +75,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
76 struct ib_ucontext *ucontext; 75 struct ib_ucontext *ucontext;
77 struct file *filp; 76 struct file *filp;
78 struct ib_rdmacg_object cg_obj; 77 struct ib_rdmacg_object cg_obj;
78 struct ib_device *ib_dev;
79 int ret; 79 int ret;
80 80
81 if (out_len < sizeof resp) 81 if (out_len < sizeof resp)
@@ -84,7 +84,13 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
84 if (copy_from_user(&cmd, buf, sizeof cmd)) 84 if (copy_from_user(&cmd, buf, sizeof cmd))
85 return -EFAULT; 85 return -EFAULT;
86 86
87 mutex_lock(&file->mutex); 87 mutex_lock(&file->ucontext_lock);
88 ib_dev = srcu_dereference(file->device->ib_dev,
89 &file->device->disassociate_srcu);
90 if (!ib_dev) {
91 ret = -EIO;
92 goto err;
93 }
88 94
89 if (file->ucontext) { 95 if (file->ucontext) {
90 ret = -EINVAL; 96 ret = -EINVAL;
@@ -110,12 +116,12 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
110 ucontext->cg_obj = cg_obj; 116 ucontext->cg_obj = cg_obj;
111 /* ufile is required when some objects are released */ 117 /* ufile is required when some objects are released */
112 ucontext->ufile = file; 118 ucontext->ufile = file;
113 uverbs_initialize_ucontext(ucontext);
114 119
115 rcu_read_lock(); 120 rcu_read_lock();
116 ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); 121 ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
117 rcu_read_unlock(); 122 rcu_read_unlock();
118 ucontext->closing = 0; 123 ucontext->closing = 0;
124 ucontext->cleanup_retryable = false;
119 125
120#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 126#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
121 ucontext->umem_tree = RB_ROOT_CACHED; 127 ucontext->umem_tree = RB_ROOT_CACHED;
@@ -146,11 +152,15 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
146 goto err_file; 152 goto err_file;
147 } 153 }
148 154
149 file->ucontext = ucontext;
150
151 fd_install(resp.async_fd, filp); 155 fd_install(resp.async_fd, filp);
152 156
153 mutex_unlock(&file->mutex); 157 /*
158 * Make sure that ib_uverbs_get_ucontext() sees the pointer update
159 * only after all writes to setup the ucontext have completed
160 */
161 smp_store_release(&file->ucontext, ucontext);
162
163 mutex_unlock(&file->ucontext_lock);
154 164
155 return in_len; 165 return in_len;
156 166
@@ -169,15 +179,16 @@ err_alloc:
169 ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE); 179 ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
170 180
171err: 181err:
172 mutex_unlock(&file->mutex); 182 mutex_unlock(&file->ucontext_lock);
173 return ret; 183 return ret;
174} 184}
175 185
176static void copy_query_dev_fields(struct ib_uverbs_file *file, 186static void copy_query_dev_fields(struct ib_ucontext *ucontext,
177 struct ib_device *ib_dev,
178 struct ib_uverbs_query_device_resp *resp, 187 struct ib_uverbs_query_device_resp *resp,
179 struct ib_device_attr *attr) 188 struct ib_device_attr *attr)
180{ 189{
190 struct ib_device *ib_dev = ucontext->device;
191
181 resp->fw_ver = attr->fw_ver; 192 resp->fw_ver = attr->fw_ver;
182 resp->node_guid = ib_dev->node_guid; 193 resp->node_guid = ib_dev->node_guid;
183 resp->sys_image_guid = attr->sys_image_guid; 194 resp->sys_image_guid = attr->sys_image_guid;
@@ -189,7 +200,7 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file,
189 resp->max_qp = attr->max_qp; 200 resp->max_qp = attr->max_qp;
190 resp->max_qp_wr = attr->max_qp_wr; 201 resp->max_qp_wr = attr->max_qp_wr;
191 resp->device_cap_flags = lower_32_bits(attr->device_cap_flags); 202 resp->device_cap_flags = lower_32_bits(attr->device_cap_flags);
192 resp->max_sge = attr->max_sge; 203 resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge);
193 resp->max_sge_rd = attr->max_sge_rd; 204 resp->max_sge_rd = attr->max_sge_rd;
194 resp->max_cq = attr->max_cq; 205 resp->max_cq = attr->max_cq;
195 resp->max_cqe = attr->max_cqe; 206 resp->max_cqe = attr->max_cqe;
@@ -221,12 +232,16 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file,
221} 232}
222 233
223ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, 234ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
224 struct ib_device *ib_dev,
225 const char __user *buf, 235 const char __user *buf,
226 int in_len, int out_len) 236 int in_len, int out_len)
227{ 237{
228 struct ib_uverbs_query_device cmd; 238 struct ib_uverbs_query_device cmd;
229 struct ib_uverbs_query_device_resp resp; 239 struct ib_uverbs_query_device_resp resp;
240 struct ib_ucontext *ucontext;
241
242 ucontext = ib_uverbs_get_ucontext(file);
243 if (IS_ERR(ucontext))
244 return PTR_ERR(ucontext);
230 245
231 if (out_len < sizeof resp) 246 if (out_len < sizeof resp)
232 return -ENOSPC; 247 return -ENOSPC;
@@ -235,7 +250,7 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
235 return -EFAULT; 250 return -EFAULT;
236 251
237 memset(&resp, 0, sizeof resp); 252 memset(&resp, 0, sizeof resp);
238 copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs); 253 copy_query_dev_fields(ucontext, &resp, &ucontext->device->attrs);
239 254
240 if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) 255 if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
241 return -EFAULT; 256 return -EFAULT;
@@ -243,8 +258,28 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
243 return in_len; 258 return in_len;
244} 259}
245 260
261/*
262 * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the
263 * PortInfo CapabilityMask, but was extended with unique bits.
264 */
265static u32 make_port_cap_flags(const struct ib_port_attr *attr)
266{
267 u32 res;
268
269 /* All IBA CapabilityMask bits are passed through here, except bit 26,
270 * which is overridden with IP_BASED_GIDS. This is due to a historical
271 * mistake in the implementation of IP_BASED_GIDS. Otherwise all other
272 * bits match the IBA definition across all kernel versions.
273 */
274 res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS;
275
276 if (attr->ip_gids)
277 res |= IB_UVERBS_PCF_IP_BASED_GIDS;
278
279 return res;
280}
281
246ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, 282ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
247 struct ib_device *ib_dev,
248 const char __user *buf, 283 const char __user *buf,
249 int in_len, int out_len) 284 int in_len, int out_len)
250{ 285{
@@ -252,6 +287,13 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
252 struct ib_uverbs_query_port_resp resp; 287 struct ib_uverbs_query_port_resp resp;
253 struct ib_port_attr attr; 288 struct ib_port_attr attr;
254 int ret; 289 int ret;
290 struct ib_ucontext *ucontext;
291 struct ib_device *ib_dev;
292
293 ucontext = ib_uverbs_get_ucontext(file);
294 if (IS_ERR(ucontext))
295 return PTR_ERR(ucontext);
296 ib_dev = ucontext->device;
255 297
256 if (out_len < sizeof resp) 298 if (out_len < sizeof resp)
257 return -ENOSPC; 299 return -ENOSPC;
@@ -269,12 +311,15 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
269 resp.max_mtu = attr.max_mtu; 311 resp.max_mtu = attr.max_mtu;
270 resp.active_mtu = attr.active_mtu; 312 resp.active_mtu = attr.active_mtu;
271 resp.gid_tbl_len = attr.gid_tbl_len; 313 resp.gid_tbl_len = attr.gid_tbl_len;
272 resp.port_cap_flags = attr.port_cap_flags; 314 resp.port_cap_flags = make_port_cap_flags(&attr);
273 resp.max_msg_sz = attr.max_msg_sz; 315 resp.max_msg_sz = attr.max_msg_sz;
274 resp.bad_pkey_cntr = attr.bad_pkey_cntr; 316 resp.bad_pkey_cntr = attr.bad_pkey_cntr;
275 resp.qkey_viol_cntr = attr.qkey_viol_cntr; 317 resp.qkey_viol_cntr = attr.qkey_viol_cntr;
276 resp.pkey_tbl_len = attr.pkey_tbl_len; 318 resp.pkey_tbl_len = attr.pkey_tbl_len;
277 319
320 if (rdma_is_grh_required(ib_dev, cmd.port_num))
321 resp.flags |= IB_UVERBS_QPF_GRH_REQUIRED;
322
278 if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) { 323 if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) {
279 resp.lid = OPA_TO_IB_UCAST_LID(attr.lid); 324 resp.lid = OPA_TO_IB_UCAST_LID(attr.lid);
280 resp.sm_lid = OPA_TO_IB_UCAST_LID(attr.sm_lid); 325 resp.sm_lid = OPA_TO_IB_UCAST_LID(attr.sm_lid);
@@ -300,7 +345,6 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
300} 345}
301 346
302ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, 347ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
303 struct ib_device *ib_dev,
304 const char __user *buf, 348 const char __user *buf,
305 int in_len, int out_len) 349 int in_len, int out_len)
306{ 350{
@@ -310,6 +354,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
310 struct ib_uobject *uobj; 354 struct ib_uobject *uobj;
311 struct ib_pd *pd; 355 struct ib_pd *pd;
312 int ret; 356 int ret;
357 struct ib_device *ib_dev;
313 358
314 if (out_len < sizeof resp) 359 if (out_len < sizeof resp)
315 return -ENOSPC; 360 return -ENOSPC;
@@ -322,11 +367,11 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
322 in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), 367 in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
323 out_len - sizeof(resp)); 368 out_len - sizeof(resp));
324 369
325 uobj = uobj_alloc(UVERBS_OBJECT_PD, file->ucontext); 370 uobj = uobj_alloc(UVERBS_OBJECT_PD, file, &ib_dev);
326 if (IS_ERR(uobj)) 371 if (IS_ERR(uobj))
327 return PTR_ERR(uobj); 372 return PTR_ERR(uobj);
328 373
329 pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata); 374 pd = ib_dev->alloc_pd(ib_dev, uobj->context, &udata);
330 if (IS_ERR(pd)) { 375 if (IS_ERR(pd)) {
331 ret = PTR_ERR(pd); 376 ret = PTR_ERR(pd);
332 goto err; 377 goto err;
@@ -348,9 +393,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
348 goto err_copy; 393 goto err_copy;
349 } 394 }
350 395
351 uobj_alloc_commit(uobj); 396 return uobj_alloc_commit(uobj, in_len);
352
353 return in_len;
354 397
355err_copy: 398err_copy:
356 ib_dealloc_pd(pd); 399 ib_dealloc_pd(pd);
@@ -361,25 +404,16 @@ err:
361} 404}
362 405
363ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, 406ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
364 struct ib_device *ib_dev,
365 const char __user *buf, 407 const char __user *buf,
366 int in_len, int out_len) 408 int in_len, int out_len)
367{ 409{
368 struct ib_uverbs_dealloc_pd cmd; 410 struct ib_uverbs_dealloc_pd cmd;
369 struct ib_uobject *uobj;
370 int ret;
371 411
372 if (copy_from_user(&cmd, buf, sizeof cmd)) 412 if (copy_from_user(&cmd, buf, sizeof cmd))
373 return -EFAULT; 413 return -EFAULT;
374 414
375 uobj = uobj_get_write(UVERBS_OBJECT_PD, cmd.pd_handle, 415 return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, file,
376 file->ucontext); 416 in_len);
377 if (IS_ERR(uobj))
378 return PTR_ERR(uobj);
379
380 ret = uobj_remove_commit(uobj);
381
382 return ret ?: in_len;
383} 417}
384 418
385struct xrcd_table_entry { 419struct xrcd_table_entry {
@@ -468,7 +502,6 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev,
468} 502}
469 503
470ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, 504ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
471 struct ib_device *ib_dev,
472 const char __user *buf, int in_len, 505 const char __user *buf, int in_len,
473 int out_len) 506 int out_len)
474{ 507{
@@ -481,6 +514,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
481 struct inode *inode = NULL; 514 struct inode *inode = NULL;
482 int ret = 0; 515 int ret = 0;
483 int new_xrcd = 0; 516 int new_xrcd = 0;
517 struct ib_device *ib_dev;
484 518
485 if (out_len < sizeof resp) 519 if (out_len < sizeof resp)
486 return -ENOSPC; 520 return -ENOSPC;
@@ -517,15 +551,15 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
517 } 551 }
518 } 552 }
519 553
520 obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, 554 obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file,
521 file->ucontext); 555 &ib_dev);
522 if (IS_ERR(obj)) { 556 if (IS_ERR(obj)) {
523 ret = PTR_ERR(obj); 557 ret = PTR_ERR(obj);
524 goto err_tree_mutex_unlock; 558 goto err_tree_mutex_unlock;
525 } 559 }
526 560
527 if (!xrcd) { 561 if (!xrcd) {
528 xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata); 562 xrcd = ib_dev->alloc_xrcd(ib_dev, obj->uobject.context, &udata);
529 if (IS_ERR(xrcd)) { 563 if (IS_ERR(xrcd)) {
530 ret = PTR_ERR(xrcd); 564 ret = PTR_ERR(xrcd);
531 goto err; 565 goto err;
@@ -564,9 +598,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
564 598
565 mutex_unlock(&file->device->xrcd_tree_mutex); 599 mutex_unlock(&file->device->xrcd_tree_mutex);
566 600
567 uobj_alloc_commit(&obj->uobject); 601 return uobj_alloc_commit(&obj->uobject, in_len);
568
569 return in_len;
570 602
571err_copy: 603err_copy:
572 if (inode) { 604 if (inode) {
@@ -591,32 +623,25 @@ err_tree_mutex_unlock:
591} 623}
592 624
593ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, 625ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
594 struct ib_device *ib_dev,
595 const char __user *buf, int in_len, 626 const char __user *buf, int in_len,
596 int out_len) 627 int out_len)
597{ 628{
598 struct ib_uverbs_close_xrcd cmd; 629 struct ib_uverbs_close_xrcd cmd;
599 struct ib_uobject *uobj;
600 int ret = 0;
601 630
602 if (copy_from_user(&cmd, buf, sizeof cmd)) 631 if (copy_from_user(&cmd, buf, sizeof cmd))
603 return -EFAULT; 632 return -EFAULT;
604 633
605 uobj = uobj_get_write(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, 634 return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, file,
606 file->ucontext); 635 in_len);
607 if (IS_ERR(uobj))
608 return PTR_ERR(uobj);
609
610 ret = uobj_remove_commit(uobj);
611 return ret ?: in_len;
612} 636}
613 637
614int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, 638int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject,
615 struct ib_xrcd *xrcd, 639 struct ib_xrcd *xrcd,
616 enum rdma_remove_reason why) 640 enum rdma_remove_reason why)
617{ 641{
618 struct inode *inode; 642 struct inode *inode;
619 int ret; 643 int ret;
644 struct ib_uverbs_device *dev = uobject->context->ufile->device;
620 645
621 inode = xrcd->inode; 646 inode = xrcd->inode;
622 if (inode && !atomic_dec_and_test(&xrcd->usecnt)) 647 if (inode && !atomic_dec_and_test(&xrcd->usecnt))
@@ -624,16 +649,18 @@ int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
624 649
625 ret = ib_dealloc_xrcd(xrcd); 650 ret = ib_dealloc_xrcd(xrcd);
626 651
627 if (why == RDMA_REMOVE_DESTROY && ret) 652 if (ib_is_destroy_retryable(ret, why, uobject)) {
628 atomic_inc(&xrcd->usecnt); 653 atomic_inc(&xrcd->usecnt);
629 else if (inode) 654 return ret;
655 }
656
657 if (inode)
630 xrcd_table_delete(dev, inode); 658 xrcd_table_delete(dev, inode);
631 659
632 return ret; 660 return ret;
633} 661}
634 662
635ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, 663ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
636 struct ib_device *ib_dev,
637 const char __user *buf, int in_len, 664 const char __user *buf, int in_len,
638 int out_len) 665 int out_len)
639{ 666{
@@ -644,6 +671,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
644 struct ib_pd *pd; 671 struct ib_pd *pd;
645 struct ib_mr *mr; 672 struct ib_mr *mr;
646 int ret; 673 int ret;
674 struct ib_device *ib_dev;
647 675
648 if (out_len < sizeof resp) 676 if (out_len < sizeof resp)
649 return -ENOSPC; 677 return -ENOSPC;
@@ -663,11 +691,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
663 if (ret) 691 if (ret)
664 return ret; 692 return ret;
665 693
666 uobj = uobj_alloc(UVERBS_OBJECT_MR, file->ucontext); 694 uobj = uobj_alloc(UVERBS_OBJECT_MR, file, &ib_dev);
667 if (IS_ERR(uobj)) 695 if (IS_ERR(uobj))
668 return PTR_ERR(uobj); 696 return PTR_ERR(uobj);
669 697
670 pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); 698 pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
671 if (!pd) { 699 if (!pd) {
672 ret = -EINVAL; 700 ret = -EINVAL;
673 goto err_free; 701 goto err_free;
@@ -711,9 +739,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
711 739
712 uobj_put_obj_read(pd); 740 uobj_put_obj_read(pd);
713 741
714 uobj_alloc_commit(uobj); 742 return uobj_alloc_commit(uobj, in_len);
715
716 return in_len;
717 743
718err_copy: 744err_copy:
719 ib_dereg_mr(mr); 745 ib_dereg_mr(mr);
@@ -727,7 +753,6 @@ err_free:
727} 753}
728 754
729ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, 755ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
730 struct ib_device *ib_dev,
731 const char __user *buf, int in_len, 756 const char __user *buf, int in_len,
732 int out_len) 757 int out_len)
733{ 758{
@@ -759,8 +784,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
759 (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) 784 (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
760 return -EINVAL; 785 return -EINVAL;
761 786
762 uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, 787 uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, file);
763 file->ucontext);
764 if (IS_ERR(uobj)) 788 if (IS_ERR(uobj))
765 return PTR_ERR(uobj); 789 return PTR_ERR(uobj);
766 790
@@ -778,7 +802,8 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
778 } 802 }
779 803
780 if (cmd.flags & IB_MR_REREG_PD) { 804 if (cmd.flags & IB_MR_REREG_PD) {
781 pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); 805 pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
806 file);
782 if (!pd) { 807 if (!pd) {
783 ret = -EINVAL; 808 ret = -EINVAL;
784 goto put_uobjs; 809 goto put_uobjs;
@@ -819,29 +844,19 @@ put_uobjs:
819} 844}
820 845
821ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, 846ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
822 struct ib_device *ib_dev,
823 const char __user *buf, int in_len, 847 const char __user *buf, int in_len,
824 int out_len) 848 int out_len)
825{ 849{
826 struct ib_uverbs_dereg_mr cmd; 850 struct ib_uverbs_dereg_mr cmd;
827 struct ib_uobject *uobj;
828 int ret = -EINVAL;
829 851
830 if (copy_from_user(&cmd, buf, sizeof cmd)) 852 if (copy_from_user(&cmd, buf, sizeof cmd))
831 return -EFAULT; 853 return -EFAULT;
832 854
833 uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, 855 return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, file,
834 file->ucontext); 856 in_len);
835 if (IS_ERR(uobj))
836 return PTR_ERR(uobj);
837
838 ret = uobj_remove_commit(uobj);
839
840 return ret ?: in_len;
841} 857}
842 858
843ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, 859ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
844 struct ib_device *ib_dev,
845 const char __user *buf, int in_len, 860 const char __user *buf, int in_len,
846 int out_len) 861 int out_len)
847{ 862{
@@ -852,6 +867,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
852 struct ib_mw *mw; 867 struct ib_mw *mw;
853 struct ib_udata udata; 868 struct ib_udata udata;
854 int ret; 869 int ret;
870 struct ib_device *ib_dev;
855 871
856 if (out_len < sizeof(resp)) 872 if (out_len < sizeof(resp))
857 return -ENOSPC; 873 return -ENOSPC;
@@ -859,11 +875,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
859 if (copy_from_user(&cmd, buf, sizeof(cmd))) 875 if (copy_from_user(&cmd, buf, sizeof(cmd)))
860 return -EFAULT; 876 return -EFAULT;
861 877
862 uobj = uobj_alloc(UVERBS_OBJECT_MW, file->ucontext); 878 uobj = uobj_alloc(UVERBS_OBJECT_MW, file, &ib_dev);
863 if (IS_ERR(uobj)) 879 if (IS_ERR(uobj))
864 return PTR_ERR(uobj); 880 return PTR_ERR(uobj);
865 881
866 pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); 882 pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
867 if (!pd) { 883 if (!pd) {
868 ret = -EINVAL; 884 ret = -EINVAL;
869 goto err_free; 885 goto err_free;
@@ -897,9 +913,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
897 } 913 }
898 914
899 uobj_put_obj_read(pd); 915 uobj_put_obj_read(pd);
900 uobj_alloc_commit(uobj); 916 return uobj_alloc_commit(uobj, in_len);
901
902 return in_len;
903 917
904err_copy: 918err_copy:
905 uverbs_dealloc_mw(mw); 919 uverbs_dealloc_mw(mw);
@@ -911,28 +925,19 @@ err_free:
911} 925}
912 926
913ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, 927ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
914 struct ib_device *ib_dev,
915 const char __user *buf, int in_len, 928 const char __user *buf, int in_len,
916 int out_len) 929 int out_len)
917{ 930{
918 struct ib_uverbs_dealloc_mw cmd; 931 struct ib_uverbs_dealloc_mw cmd;
919 struct ib_uobject *uobj;
920 int ret = -EINVAL;
921 932
922 if (copy_from_user(&cmd, buf, sizeof(cmd))) 933 if (copy_from_user(&cmd, buf, sizeof(cmd)))
923 return -EFAULT; 934 return -EFAULT;
924 935
925 uobj = uobj_get_write(UVERBS_OBJECT_MW, cmd.mw_handle, 936 return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, file,
926 file->ucontext); 937 in_len);
927 if (IS_ERR(uobj))
928 return PTR_ERR(uobj);
929
930 ret = uobj_remove_commit(uobj);
931 return ret ?: in_len;
932} 938}
933 939
934ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, 940ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
935 struct ib_device *ib_dev,
936 const char __user *buf, int in_len, 941 const char __user *buf, int in_len,
937 int out_len) 942 int out_len)
938{ 943{
@@ -940,6 +945,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
940 struct ib_uverbs_create_comp_channel_resp resp; 945 struct ib_uverbs_create_comp_channel_resp resp;
941 struct ib_uobject *uobj; 946 struct ib_uobject *uobj;
942 struct ib_uverbs_completion_event_file *ev_file; 947 struct ib_uverbs_completion_event_file *ev_file;
948 struct ib_device *ib_dev;
943 949
944 if (out_len < sizeof resp) 950 if (out_len < sizeof resp)
945 return -ENOSPC; 951 return -ENOSPC;
@@ -947,14 +953,14 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
947 if (copy_from_user(&cmd, buf, sizeof cmd)) 953 if (copy_from_user(&cmd, buf, sizeof cmd))
948 return -EFAULT; 954 return -EFAULT;
949 955
950 uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file->ucontext); 956 uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file, &ib_dev);
951 if (IS_ERR(uobj)) 957 if (IS_ERR(uobj))
952 return PTR_ERR(uobj); 958 return PTR_ERR(uobj);
953 959
954 resp.fd = uobj->id; 960 resp.fd = uobj->id;
955 961
956 ev_file = container_of(uobj, struct ib_uverbs_completion_event_file, 962 ev_file = container_of(uobj, struct ib_uverbs_completion_event_file,
957 uobj_file.uobj); 963 uobj);
958 ib_uverbs_init_event_queue(&ev_file->ev_queue); 964 ib_uverbs_init_event_queue(&ev_file->ev_queue);
959 965
960 if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) { 966 if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
@@ -962,12 +968,10 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
962 return -EFAULT; 968 return -EFAULT;
963 } 969 }
964 970
965 uobj_alloc_commit(uobj); 971 return uobj_alloc_commit(uobj, in_len);
966 return in_len;
967} 972}
968 973
969static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, 974static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
970 struct ib_device *ib_dev,
971 struct ib_udata *ucore, 975 struct ib_udata *ucore,
972 struct ib_udata *uhw, 976 struct ib_udata *uhw,
973 struct ib_uverbs_ex_create_cq *cmd, 977 struct ib_uverbs_ex_create_cq *cmd,
@@ -985,21 +989,23 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
985 int ret; 989 int ret;
986 struct ib_uverbs_ex_create_cq_resp resp; 990 struct ib_uverbs_ex_create_cq_resp resp;
987 struct ib_cq_init_attr attr = {}; 991 struct ib_cq_init_attr attr = {};
988 992 struct ib_device *ib_dev;
989 if (!ib_dev->create_cq)
990 return ERR_PTR(-EOPNOTSUPP);
991 993
992 if (cmd->comp_vector >= file->device->num_comp_vectors) 994 if (cmd->comp_vector >= file->device->num_comp_vectors)
993 return ERR_PTR(-EINVAL); 995 return ERR_PTR(-EINVAL);
994 996
995 obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, 997 obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file,
996 file->ucontext); 998 &ib_dev);
997 if (IS_ERR(obj)) 999 if (IS_ERR(obj))
998 return obj; 1000 return obj;
999 1001
1002 if (!ib_dev->create_cq) {
1003 ret = -EOPNOTSUPP;
1004 goto err;
1005 }
1006
1000 if (cmd->comp_channel >= 0) { 1007 if (cmd->comp_channel >= 0) {
1001 ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, 1008 ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, file);
1002 file->ucontext);
1003 if (IS_ERR(ev_file)) { 1009 if (IS_ERR(ev_file)) {
1004 ret = PTR_ERR(ev_file); 1010 ret = PTR_ERR(ev_file);
1005 goto err; 1011 goto err;
@@ -1007,7 +1013,6 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
1007 } 1013 }
1008 1014
1009 obj->uobject.user_handle = cmd->user_handle; 1015 obj->uobject.user_handle = cmd->user_handle;
1010 obj->uverbs_file = file;
1011 obj->comp_events_reported = 0; 1016 obj->comp_events_reported = 0;
1012 obj->async_events_reported = 0; 1017 obj->async_events_reported = 0;
1013 INIT_LIST_HEAD(&obj->comp_list); 1018 INIT_LIST_HEAD(&obj->comp_list);
@@ -1019,7 +1024,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
1019 if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags)) 1024 if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
1020 attr.flags = cmd->flags; 1025 attr.flags = cmd->flags;
1021 1026
1022 cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, uhw); 1027 cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, uhw);
1023 if (IS_ERR(cq)) { 1028 if (IS_ERR(cq)) {
1024 ret = PTR_ERR(cq); 1029 ret = PTR_ERR(cq);
1025 goto err_file; 1030 goto err_file;
@@ -1047,7 +1052,9 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
1047 if (ret) 1052 if (ret)
1048 goto err_cb; 1053 goto err_cb;
1049 1054
1050 uobj_alloc_commit(&obj->uobject); 1055 ret = uobj_alloc_commit(&obj->uobject, 0);
1056 if (ret)
1057 return ERR_PTR(ret);
1051 return obj; 1058 return obj;
1052 1059
1053err_cb: 1060err_cb:
@@ -1075,7 +1082,6 @@ static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file,
1075} 1082}
1076 1083
1077ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, 1084ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
1078 struct ib_device *ib_dev,
1079 const char __user *buf, int in_len, 1085 const char __user *buf, int in_len,
1080 int out_len) 1086 int out_len)
1081{ 1087{
@@ -1106,7 +1112,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
1106 cmd_ex.comp_vector = cmd.comp_vector; 1112 cmd_ex.comp_vector = cmd.comp_vector;
1107 cmd_ex.comp_channel = cmd.comp_channel; 1113 cmd_ex.comp_channel = cmd.comp_channel;
1108 1114
1109 obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex, 1115 obj = create_cq(file, &ucore, &uhw, &cmd_ex,
1110 offsetof(typeof(cmd_ex), comp_channel) + 1116 offsetof(typeof(cmd_ex), comp_channel) +
1111 sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb, 1117 sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb,
1112 NULL); 1118 NULL);
@@ -1129,7 +1135,6 @@ static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file,
1129} 1135}
1130 1136
1131int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, 1137int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
1132 struct ib_device *ib_dev,
1133 struct ib_udata *ucore, 1138 struct ib_udata *ucore,
1134 struct ib_udata *uhw) 1139 struct ib_udata *uhw)
1135{ 1140{
@@ -1155,7 +1160,7 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
1155 sizeof(resp.response_length))) 1160 sizeof(resp.response_length)))
1156 return -ENOSPC; 1161 return -ENOSPC;
1157 1162
1158 obj = create_cq(file, ib_dev, ucore, uhw, &cmd, 1163 obj = create_cq(file, ucore, uhw, &cmd,
1159 min(ucore->inlen, sizeof(cmd)), 1164 min(ucore->inlen, sizeof(cmd)),
1160 ib_uverbs_ex_create_cq_cb, NULL); 1165 ib_uverbs_ex_create_cq_cb, NULL);
1161 1166
@@ -1163,7 +1168,6 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
1163} 1168}
1164 1169
1165ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, 1170ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
1166 struct ib_device *ib_dev,
1167 const char __user *buf, int in_len, 1171 const char __user *buf, int in_len,
1168 int out_len) 1172 int out_len)
1169{ 1173{
@@ -1181,7 +1185,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
1181 in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), 1185 in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
1182 out_len - sizeof(resp)); 1186 out_len - sizeof(resp));
1183 1187
1184 cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); 1188 cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
1185 if (!cq) 1189 if (!cq)
1186 return -EINVAL; 1190 return -EINVAL;
1187 1191
@@ -1231,7 +1235,6 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
1231} 1235}
1232 1236
1233ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, 1237ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
1234 struct ib_device *ib_dev,
1235 const char __user *buf, int in_len, 1238 const char __user *buf, int in_len,
1236 int out_len) 1239 int out_len)
1237{ 1240{
@@ -1246,7 +1249,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
1246 if (copy_from_user(&cmd, buf, sizeof cmd)) 1249 if (copy_from_user(&cmd, buf, sizeof cmd))
1247 return -EFAULT; 1250 return -EFAULT;
1248 1251
1249 cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); 1252 cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
1250 if (!cq) 1253 if (!cq)
1251 return -EINVAL; 1254 return -EINVAL;
1252 1255
@@ -1262,7 +1265,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
1262 if (!ret) 1265 if (!ret)
1263 break; 1266 break;
1264 1267
1265 ret = copy_wc_to_user(ib_dev, data_ptr, &wc); 1268 ret = copy_wc_to_user(cq->device, data_ptr, &wc);
1266 if (ret) 1269 if (ret)
1267 goto out_put; 1270 goto out_put;
1268 1271
@@ -1283,7 +1286,6 @@ out_put:
1283} 1286}
1284 1287
1285ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, 1288ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
1286 struct ib_device *ib_dev,
1287 const char __user *buf, int in_len, 1289 const char __user *buf, int in_len,
1288 int out_len) 1290 int out_len)
1289{ 1291{
@@ -1293,7 +1295,7 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
1293 if (copy_from_user(&cmd, buf, sizeof cmd)) 1295 if (copy_from_user(&cmd, buf, sizeof cmd))
1294 return -EFAULT; 1296 return -EFAULT;
1295 1297
1296 cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); 1298 cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
1297 if (!cq) 1299 if (!cq)
1298 return -EINVAL; 1300 return -EINVAL;
1299 1301
@@ -1306,45 +1308,28 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
1306} 1308}
1307 1309
1308ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, 1310ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
1309 struct ib_device *ib_dev,
1310 const char __user *buf, int in_len, 1311 const char __user *buf, int in_len,
1311 int out_len) 1312 int out_len)
1312{ 1313{
1313 struct ib_uverbs_destroy_cq cmd; 1314 struct ib_uverbs_destroy_cq cmd;
1314 struct ib_uverbs_destroy_cq_resp resp; 1315 struct ib_uverbs_destroy_cq_resp resp;
1315 struct ib_uobject *uobj; 1316 struct ib_uobject *uobj;
1316 struct ib_cq *cq;
1317 struct ib_ucq_object *obj; 1317 struct ib_ucq_object *obj;
1318 int ret = -EINVAL;
1319 1318
1320 if (copy_from_user(&cmd, buf, sizeof cmd)) 1319 if (copy_from_user(&cmd, buf, sizeof cmd))
1321 return -EFAULT; 1320 return -EFAULT;
1322 1321
1323 uobj = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle, 1322 uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, file);
1324 file->ucontext);
1325 if (IS_ERR(uobj)) 1323 if (IS_ERR(uobj))
1326 return PTR_ERR(uobj); 1324 return PTR_ERR(uobj);
1327 1325
1328 /* 1326 obj = container_of(uobj, struct ib_ucq_object, uobject);
1329 * Make sure we don't free the memory in remove_commit as we still
1330 * needs the uobject memory to create the response.
1331 */
1332 uverbs_uobject_get(uobj);
1333 cq = uobj->object;
1334 obj = container_of(cq->uobject, struct ib_ucq_object, uobject);
1335
1336 memset(&resp, 0, sizeof(resp)); 1327 memset(&resp, 0, sizeof(resp));
1337
1338 ret = uobj_remove_commit(uobj);
1339 if (ret) {
1340 uverbs_uobject_put(uobj);
1341 return ret;
1342 }
1343
1344 resp.comp_events_reported = obj->comp_events_reported; 1328 resp.comp_events_reported = obj->comp_events_reported;
1345 resp.async_events_reported = obj->async_events_reported; 1329 resp.async_events_reported = obj->async_events_reported;
1346 1330
1347 uverbs_uobject_put(uobj); 1331 uobj_put_destroy(uobj);
1332
1348 if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) 1333 if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
1349 return -EFAULT; 1334 return -EFAULT;
1350 1335
@@ -1375,12 +1360,13 @@ static int create_qp(struct ib_uverbs_file *file,
1375 int ret; 1360 int ret;
1376 struct ib_rwq_ind_table *ind_tbl = NULL; 1361 struct ib_rwq_ind_table *ind_tbl = NULL;
1377 bool has_sq = true; 1362 bool has_sq = true;
1363 struct ib_device *ib_dev;
1378 1364
1379 if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) 1365 if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
1380 return -EPERM; 1366 return -EPERM;
1381 1367
1382 obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, 1368 obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file,
1383 file->ucontext); 1369 &ib_dev);
1384 if (IS_ERR(obj)) 1370 if (IS_ERR(obj))
1385 return PTR_ERR(obj); 1371 return PTR_ERR(obj);
1386 obj->uxrcd = NULL; 1372 obj->uxrcd = NULL;
@@ -1390,9 +1376,9 @@ static int create_qp(struct ib_uverbs_file *file,
1390 if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + 1376 if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
1391 sizeof(cmd->rwq_ind_tbl_handle) && 1377 sizeof(cmd->rwq_ind_tbl_handle) &&
1392 (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { 1378 (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
1393 ind_tbl = uobj_get_obj_read(rwq_ind_table, UVERBS_OBJECT_RWQ_IND_TBL, 1379 ind_tbl = uobj_get_obj_read(rwq_ind_table,
1394 cmd->rwq_ind_tbl_handle, 1380 UVERBS_OBJECT_RWQ_IND_TBL,
1395 file->ucontext); 1381 cmd->rwq_ind_tbl_handle, file);
1396 if (!ind_tbl) { 1382 if (!ind_tbl) {
1397 ret = -EINVAL; 1383 ret = -EINVAL;
1398 goto err_put; 1384 goto err_put;
@@ -1418,7 +1404,7 @@ static int create_qp(struct ib_uverbs_file *file,
1418 1404
1419 if (cmd->qp_type == IB_QPT_XRC_TGT) { 1405 if (cmd->qp_type == IB_QPT_XRC_TGT) {
1420 xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle, 1406 xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle,
1421 file->ucontext); 1407 file);
1422 1408
1423 if (IS_ERR(xrcd_uobj)) { 1409 if (IS_ERR(xrcd_uobj)) {
1424 ret = -EINVAL; 1410 ret = -EINVAL;
@@ -1437,8 +1423,8 @@ static int create_qp(struct ib_uverbs_file *file,
1437 cmd->max_recv_sge = 0; 1423 cmd->max_recv_sge = 0;
1438 } else { 1424 } else {
1439 if (cmd->is_srq) { 1425 if (cmd->is_srq) {
1440 srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd->srq_handle, 1426 srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ,
1441 file->ucontext); 1427 cmd->srq_handle, file);
1442 if (!srq || srq->srq_type == IB_SRQT_XRC) { 1428 if (!srq || srq->srq_type == IB_SRQT_XRC) {
1443 ret = -EINVAL; 1429 ret = -EINVAL;
1444 goto err_put; 1430 goto err_put;
@@ -1447,8 +1433,9 @@ static int create_qp(struct ib_uverbs_file *file,
1447 1433
1448 if (!ind_tbl) { 1434 if (!ind_tbl) {
1449 if (cmd->recv_cq_handle != cmd->send_cq_handle) { 1435 if (cmd->recv_cq_handle != cmd->send_cq_handle) {
1450 rcq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->recv_cq_handle, 1436 rcq = uobj_get_obj_read(
1451 file->ucontext); 1437 cq, UVERBS_OBJECT_CQ,
1438 cmd->recv_cq_handle, file);
1452 if (!rcq) { 1439 if (!rcq) {
1453 ret = -EINVAL; 1440 ret = -EINVAL;
1454 goto err_put; 1441 goto err_put;
@@ -1458,11 +1445,12 @@ static int create_qp(struct ib_uverbs_file *file,
1458 } 1445 }
1459 1446
1460 if (has_sq) 1447 if (has_sq)
1461 scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle, 1448 scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
1462 file->ucontext); 1449 cmd->send_cq_handle, file);
1463 if (!ind_tbl) 1450 if (!ind_tbl)
1464 rcq = rcq ?: scq; 1451 rcq = rcq ?: scq;
1465 pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext); 1452 pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle,
1453 file);
1466 if (!pd || (!scq && has_sq)) { 1454 if (!pd || (!scq && has_sq)) {
1467 ret = -EINVAL; 1455 ret = -EINVAL;
1468 goto err_put; 1456 goto err_put;
@@ -1602,9 +1590,7 @@ static int create_qp(struct ib_uverbs_file *file,
1602 if (ind_tbl) 1590 if (ind_tbl)
1603 uobj_put_obj_read(ind_tbl); 1591 uobj_put_obj_read(ind_tbl);
1604 1592
1605 uobj_alloc_commit(&obj->uevent.uobject); 1593 return uobj_alloc_commit(&obj->uevent.uobject, 0);
1606
1607 return 0;
1608err_cb: 1594err_cb:
1609 ib_destroy_qp(qp); 1595 ib_destroy_qp(qp);
1610 1596
@@ -1637,7 +1623,6 @@ static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
1637} 1623}
1638 1624
1639ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, 1625ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1640 struct ib_device *ib_dev,
1641 const char __user *buf, int in_len, 1626 const char __user *buf, int in_len,
1642 int out_len) 1627 int out_len)
1643{ 1628{
@@ -1698,7 +1683,6 @@ static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
1698} 1683}
1699 1684
1700int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, 1685int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
1701 struct ib_device *ib_dev,
1702 struct ib_udata *ucore, 1686 struct ib_udata *ucore,
1703 struct ib_udata *uhw) 1687 struct ib_udata *uhw)
1704{ 1688{
@@ -1735,7 +1719,6 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
1735} 1719}
1736 1720
1737ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, 1721ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
1738 struct ib_device *ib_dev,
1739 const char __user *buf, int in_len, int out_len) 1722 const char __user *buf, int in_len, int out_len)
1740{ 1723{
1741 struct ib_uverbs_open_qp cmd; 1724 struct ib_uverbs_open_qp cmd;
@@ -1747,6 +1730,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
1747 struct ib_qp *qp; 1730 struct ib_qp *qp;
1748 struct ib_qp_open_attr attr; 1731 struct ib_qp_open_attr attr;
1749 int ret; 1732 int ret;
1733 struct ib_device *ib_dev;
1750 1734
1751 if (out_len < sizeof resp) 1735 if (out_len < sizeof resp)
1752 return -ENOSPC; 1736 return -ENOSPC;
@@ -1759,13 +1743,12 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
1759 in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), 1743 in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
1760 out_len - sizeof(resp)); 1744 out_len - sizeof(resp));
1761 1745
1762 obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, 1746 obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file,
1763 file->ucontext); 1747 &ib_dev);
1764 if (IS_ERR(obj)) 1748 if (IS_ERR(obj))
1765 return PTR_ERR(obj); 1749 return PTR_ERR(obj);
1766 1750
1767 xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, 1751 xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, file);
1768 file->ucontext);
1769 if (IS_ERR(xrcd_uobj)) { 1752 if (IS_ERR(xrcd_uobj)) {
1770 ret = -EINVAL; 1753 ret = -EINVAL;
1771 goto err_put; 1754 goto err_put;
@@ -1809,10 +1792,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
1809 qp->uobject = &obj->uevent.uobject; 1792 qp->uobject = &obj->uevent.uobject;
1810 uobj_put_read(xrcd_uobj); 1793 uobj_put_read(xrcd_uobj);
1811 1794
1812 1795 return uobj_alloc_commit(&obj->uevent.uobject, in_len);
1813 uobj_alloc_commit(&obj->uevent.uobject);
1814
1815 return in_len;
1816 1796
1817err_destroy: 1797err_destroy:
1818 ib_destroy_qp(qp); 1798 ib_destroy_qp(qp);
@@ -1846,7 +1826,6 @@ static void copy_ah_attr_to_uverbs(struct ib_uverbs_qp_dest *uverb_attr,
1846} 1826}
1847 1827
1848ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, 1828ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
1849 struct ib_device *ib_dev,
1850 const char __user *buf, int in_len, 1829 const char __user *buf, int in_len,
1851 int out_len) 1830 int out_len)
1852{ 1831{
@@ -1867,7 +1846,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
1867 goto out; 1846 goto out;
1868 } 1847 }
1869 1848
1870 qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); 1849 qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
1871 if (!qp) { 1850 if (!qp) {
1872 ret = -EINVAL; 1851 ret = -EINVAL;
1873 goto out; 1852 goto out;
@@ -1968,11 +1947,11 @@ static int modify_qp(struct ib_uverbs_file *file,
1968 struct ib_qp *qp; 1947 struct ib_qp *qp;
1969 int ret; 1948 int ret;
1970 1949
1971 attr = kmalloc(sizeof *attr, GFP_KERNEL); 1950 attr = kzalloc(sizeof(*attr), GFP_KERNEL);
1972 if (!attr) 1951 if (!attr)
1973 return -ENOMEM; 1952 return -ENOMEM;
1974 1953
1975 qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file->ucontext); 1954 qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file);
1976 if (!qp) { 1955 if (!qp) {
1977 ret = -EINVAL; 1956 ret = -EINVAL;
1978 goto out; 1957 goto out;
@@ -2098,7 +2077,6 @@ out:
2098} 2077}
2099 2078
2100ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, 2079ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
2101 struct ib_device *ib_dev,
2102 const char __user *buf, int in_len, 2080 const char __user *buf, int in_len,
2103 int out_len) 2081 int out_len)
2104{ 2082{
@@ -2125,7 +2103,6 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
2125} 2103}
2126 2104
2127int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file, 2105int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
2128 struct ib_device *ib_dev,
2129 struct ib_udata *ucore, 2106 struct ib_udata *ucore,
2130 struct ib_udata *uhw) 2107 struct ib_udata *uhw)
2131{ 2108{
@@ -2161,7 +2138,6 @@ int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
2161} 2138}
2162 2139
2163ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, 2140ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
2164 struct ib_device *ib_dev,
2165 const char __user *buf, int in_len, 2141 const char __user *buf, int in_len,
2166 int out_len) 2142 int out_len)
2167{ 2143{
@@ -2169,33 +2145,19 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
2169 struct ib_uverbs_destroy_qp_resp resp; 2145 struct ib_uverbs_destroy_qp_resp resp;
2170 struct ib_uobject *uobj; 2146 struct ib_uobject *uobj;
2171 struct ib_uqp_object *obj; 2147 struct ib_uqp_object *obj;
2172 int ret = -EINVAL;
2173 2148
2174 if (copy_from_user(&cmd, buf, sizeof cmd)) 2149 if (copy_from_user(&cmd, buf, sizeof cmd))
2175 return -EFAULT; 2150 return -EFAULT;
2176 2151
2177 memset(&resp, 0, sizeof resp); 2152 uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, file);
2178
2179 uobj = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle,
2180 file->ucontext);
2181 if (IS_ERR(uobj)) 2153 if (IS_ERR(uobj))
2182 return PTR_ERR(uobj); 2154 return PTR_ERR(uobj);
2183 2155
2184 obj = container_of(uobj, struct ib_uqp_object, uevent.uobject); 2156 obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
2185 /* 2157 memset(&resp, 0, sizeof(resp));
2186 * Make sure we don't free the memory in remove_commit as we still
2187 * needs the uobject memory to create the response.
2188 */
2189 uverbs_uobject_get(uobj);
2190
2191 ret = uobj_remove_commit(uobj);
2192 if (ret) {
2193 uverbs_uobject_put(uobj);
2194 return ret;
2195 }
2196
2197 resp.events_reported = obj->uevent.events_reported; 2158 resp.events_reported = obj->uevent.events_reported;
2198 uverbs_uobject_put(uobj); 2159
2160 uobj_put_destroy(uobj);
2199 2161
2200 if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) 2162 if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
2201 return -EFAULT; 2163 return -EFAULT;
@@ -2214,14 +2176,14 @@ static void *alloc_wr(size_t wr_size, __u32 num_sge)
2214} 2176}
2215 2177
2216ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, 2178ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
2217 struct ib_device *ib_dev,
2218 const char __user *buf, int in_len, 2179 const char __user *buf, int in_len,
2219 int out_len) 2180 int out_len)
2220{ 2181{
2221 struct ib_uverbs_post_send cmd; 2182 struct ib_uverbs_post_send cmd;
2222 struct ib_uverbs_post_send_resp resp; 2183 struct ib_uverbs_post_send_resp resp;
2223 struct ib_uverbs_send_wr *user_wr; 2184 struct ib_uverbs_send_wr *user_wr;
2224 struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; 2185 struct ib_send_wr *wr = NULL, *last, *next;
2186 const struct ib_send_wr *bad_wr;
2225 struct ib_qp *qp; 2187 struct ib_qp *qp;
2226 int i, sg_ind; 2188 int i, sg_ind;
2227 int is_ud; 2189 int is_ud;
@@ -2242,7 +2204,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
2242 if (!user_wr) 2204 if (!user_wr)
2243 return -ENOMEM; 2205 return -ENOMEM;
2244 2206
2245 qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); 2207 qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
2246 if (!qp) 2208 if (!qp)
2247 goto out; 2209 goto out;
2248 2210
@@ -2278,8 +2240,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
2278 goto out_put; 2240 goto out_put;
2279 } 2241 }
2280 2242
2281 ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, user_wr->wr.ud.ah, 2243 ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH,
2282 file->ucontext); 2244 user_wr->wr.ud.ah, file);
2283 if (!ud->ah) { 2245 if (!ud->ah) {
2284 kfree(ud); 2246 kfree(ud);
2285 ret = -EINVAL; 2247 ret = -EINVAL;
@@ -2494,13 +2456,13 @@ err:
2494} 2456}
2495 2457
2496ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, 2458ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
2497 struct ib_device *ib_dev,
2498 const char __user *buf, int in_len, 2459 const char __user *buf, int in_len,
2499 int out_len) 2460 int out_len)
2500{ 2461{
2501 struct ib_uverbs_post_recv cmd; 2462 struct ib_uverbs_post_recv cmd;
2502 struct ib_uverbs_post_recv_resp resp; 2463 struct ib_uverbs_post_recv_resp resp;
2503 struct ib_recv_wr *wr, *next, *bad_wr; 2464 struct ib_recv_wr *wr, *next;
2465 const struct ib_recv_wr *bad_wr;
2504 struct ib_qp *qp; 2466 struct ib_qp *qp;
2505 ssize_t ret = -EINVAL; 2467 ssize_t ret = -EINVAL;
2506 2468
@@ -2513,7 +2475,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
2513 if (IS_ERR(wr)) 2475 if (IS_ERR(wr))
2514 return PTR_ERR(wr); 2476 return PTR_ERR(wr);
2515 2477
2516 qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); 2478 qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
2517 if (!qp) 2479 if (!qp)
2518 goto out; 2480 goto out;
2519 2481
@@ -2543,13 +2505,13 @@ out:
2543} 2505}
2544 2506
2545ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, 2507ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
2546 struct ib_device *ib_dev,
2547 const char __user *buf, int in_len, 2508 const char __user *buf, int in_len,
2548 int out_len) 2509 int out_len)
2549{ 2510{
2550 struct ib_uverbs_post_srq_recv cmd; 2511 struct ib_uverbs_post_srq_recv cmd;
2551 struct ib_uverbs_post_srq_recv_resp resp; 2512 struct ib_uverbs_post_srq_recv_resp resp;
2552 struct ib_recv_wr *wr, *next, *bad_wr; 2513 struct ib_recv_wr *wr, *next;
2514 const struct ib_recv_wr *bad_wr;
2553 struct ib_srq *srq; 2515 struct ib_srq *srq;
2554 ssize_t ret = -EINVAL; 2516 ssize_t ret = -EINVAL;
2555 2517
@@ -2562,12 +2524,13 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
2562 if (IS_ERR(wr)) 2524 if (IS_ERR(wr))
2563 return PTR_ERR(wr); 2525 return PTR_ERR(wr);
2564 2526
2565 srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); 2527 srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
2566 if (!srq) 2528 if (!srq)
2567 goto out; 2529 goto out;
2568 2530
2569 resp.bad_wr = 0; 2531 resp.bad_wr = 0;
2570 ret = srq->device->post_srq_recv(srq, wr, &bad_wr); 2532 ret = srq->device->post_srq_recv ?
2533 srq->device->post_srq_recv(srq, wr, &bad_wr) : -EOPNOTSUPP;
2571 2534
2572 uobj_put_obj_read(srq); 2535 uobj_put_obj_read(srq);
2573 2536
@@ -2592,7 +2555,6 @@ out:
2592} 2555}
2593 2556
2594ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, 2557ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
2595 struct ib_device *ib_dev,
2596 const char __user *buf, int in_len, 2558 const char __user *buf, int in_len,
2597 int out_len) 2559 int out_len)
2598{ 2560{
@@ -2601,9 +2563,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
2601 struct ib_uobject *uobj; 2563 struct ib_uobject *uobj;
2602 struct ib_pd *pd; 2564 struct ib_pd *pd;
2603 struct ib_ah *ah; 2565 struct ib_ah *ah;
2604 struct rdma_ah_attr attr; 2566 struct rdma_ah_attr attr = {};
2605 int ret; 2567 int ret;
2606 struct ib_udata udata; 2568 struct ib_udata udata;
2569 struct ib_device *ib_dev;
2607 2570
2608 if (out_len < sizeof resp) 2571 if (out_len < sizeof resp)
2609 return -ENOSPC; 2572 return -ENOSPC;
@@ -2611,19 +2574,21 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
2611 if (copy_from_user(&cmd, buf, sizeof cmd)) 2574 if (copy_from_user(&cmd, buf, sizeof cmd))
2612 return -EFAULT; 2575 return -EFAULT;
2613 2576
2614 if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num))
2615 return -EINVAL;
2616
2617 ib_uverbs_init_udata(&udata, buf + sizeof(cmd), 2577 ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
2618 u64_to_user_ptr(cmd.response) + sizeof(resp), 2578 u64_to_user_ptr(cmd.response) + sizeof(resp),
2619 in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), 2579 in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
2620 out_len - sizeof(resp)); 2580 out_len - sizeof(resp));
2621 2581
2622 uobj = uobj_alloc(UVERBS_OBJECT_AH, file->ucontext); 2582 uobj = uobj_alloc(UVERBS_OBJECT_AH, file, &ib_dev);
2623 if (IS_ERR(uobj)) 2583 if (IS_ERR(uobj))
2624 return PTR_ERR(uobj); 2584 return PTR_ERR(uobj);
2625 2585
2626 pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); 2586 if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num)) {
2587 ret = -EINVAL;
2588 goto err;
2589 }
2590
2591 pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
2627 if (!pd) { 2592 if (!pd) {
2628 ret = -EINVAL; 2593 ret = -EINVAL;
2629 goto err; 2594 goto err;
@@ -2665,9 +2630,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
2665 } 2630 }
2666 2631
2667 uobj_put_obj_read(pd); 2632 uobj_put_obj_read(pd);
2668 uobj_alloc_commit(uobj); 2633 return uobj_alloc_commit(uobj, in_len);
2669
2670 return in_len;
2671 2634
2672err_copy: 2635err_copy:
2673 rdma_destroy_ah(ah); 2636 rdma_destroy_ah(ah);
@@ -2681,27 +2644,18 @@ err:
2681} 2644}
2682 2645
2683ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, 2646ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
2684 struct ib_device *ib_dev,
2685 const char __user *buf, int in_len, int out_len) 2647 const char __user *buf, int in_len, int out_len)
2686{ 2648{
2687 struct ib_uverbs_destroy_ah cmd; 2649 struct ib_uverbs_destroy_ah cmd;
2688 struct ib_uobject *uobj;
2689 int ret;
2690 2650
2691 if (copy_from_user(&cmd, buf, sizeof cmd)) 2651 if (copy_from_user(&cmd, buf, sizeof cmd))
2692 return -EFAULT; 2652 return -EFAULT;
2693 2653
2694 uobj = uobj_get_write(UVERBS_OBJECT_AH, cmd.ah_handle, 2654 return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, file,
2695 file->ucontext); 2655 in_len);
2696 if (IS_ERR(uobj))
2697 return PTR_ERR(uobj);
2698
2699 ret = uobj_remove_commit(uobj);
2700 return ret ?: in_len;
2701} 2656}
2702 2657
2703ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, 2658ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
2704 struct ib_device *ib_dev,
2705 const char __user *buf, int in_len, 2659 const char __user *buf, int in_len,
2706 int out_len) 2660 int out_len)
2707{ 2661{
@@ -2714,7 +2668,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
2714 if (copy_from_user(&cmd, buf, sizeof cmd)) 2668 if (copy_from_user(&cmd, buf, sizeof cmd))
2715 return -EFAULT; 2669 return -EFAULT;
2716 2670
2717 qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); 2671 qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
2718 if (!qp) 2672 if (!qp)
2719 return -EINVAL; 2673 return -EINVAL;
2720 2674
@@ -2751,7 +2705,6 @@ out_put:
2751} 2705}
2752 2706
2753ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, 2707ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
2754 struct ib_device *ib_dev,
2755 const char __user *buf, int in_len, 2708 const char __user *buf, int in_len,
2756 int out_len) 2709 int out_len)
2757{ 2710{
@@ -2765,7 +2718,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
2765 if (copy_from_user(&cmd, buf, sizeof cmd)) 2718 if (copy_from_user(&cmd, buf, sizeof cmd))
2766 return -EFAULT; 2719 return -EFAULT;
2767 2720
2768 qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); 2721 qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
2769 if (!qp) 2722 if (!qp)
2770 return -EINVAL; 2723 return -EINVAL;
2771 2724
@@ -2810,29 +2763,27 @@ static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
2810 resources = kzalloc(sizeof(*resources), GFP_KERNEL); 2763 resources = kzalloc(sizeof(*resources), GFP_KERNEL);
2811 2764
2812 if (!resources) 2765 if (!resources)
2813 goto err_res; 2766 return NULL;
2767
2768 if (!num_specs)
2769 goto out;
2814 2770
2815 resources->counters = 2771 resources->counters =
2816 kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL); 2772 kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL);
2817
2818 if (!resources->counters)
2819 goto err_cnt;
2820
2821 resources->collection = 2773 resources->collection =
2822 kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL); 2774 kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL);
2823 2775
2824 if (!resources->collection) 2776 if (!resources->counters || !resources->collection)
2825 goto err_collection; 2777 goto err;
2826 2778
2779out:
2827 resources->max = num_specs; 2780 resources->max = num_specs;
2828
2829 return resources; 2781 return resources;
2830 2782
2831err_collection: 2783err:
2832 kfree(resources->counters); 2784 kfree(resources->counters);
2833err_cnt:
2834 kfree(resources); 2785 kfree(resources);
2835err_res: 2786
2836 return NULL; 2787 return NULL;
2837} 2788}
2838 2789
@@ -2840,6 +2791,9 @@ void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
2840{ 2791{
2841 unsigned int i; 2792 unsigned int i;
2842 2793
2794 if (!uflow_res)
2795 return;
2796
2843 for (i = 0; i < uflow_res->collection_num; i++) 2797 for (i = 0; i < uflow_res->collection_num; i++)
2844 atomic_dec(&uflow_res->collection[i]->usecnt); 2798 atomic_dec(&uflow_res->collection[i]->usecnt);
2845 2799
@@ -2875,7 +2829,7 @@ static void flow_resources_add(struct ib_uflow_resources *uflow_res,
2875 uflow_res->num++; 2829 uflow_res->num++;
2876} 2830}
2877 2831
2878static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext, 2832static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
2879 struct ib_uverbs_flow_spec *kern_spec, 2833 struct ib_uverbs_flow_spec *kern_spec,
2880 union ib_flow_spec *ib_spec, 2834 union ib_flow_spec *ib_spec,
2881 struct ib_uflow_resources *uflow_res) 2835 struct ib_uflow_resources *uflow_res)
@@ -2904,7 +2858,7 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
2904 ib_spec->action.act = uobj_get_obj_read(flow_action, 2858 ib_spec->action.act = uobj_get_obj_read(flow_action,
2905 UVERBS_OBJECT_FLOW_ACTION, 2859 UVERBS_OBJECT_FLOW_ACTION,
2906 kern_spec->action.handle, 2860 kern_spec->action.handle,
2907 ucontext); 2861 ufile);
2908 if (!ib_spec->action.act) 2862 if (!ib_spec->action.act)
2909 return -EINVAL; 2863 return -EINVAL;
2910 ib_spec->action.size = 2864 ib_spec->action.size =
@@ -2922,7 +2876,7 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
2922 uobj_get_obj_read(counters, 2876 uobj_get_obj_read(counters,
2923 UVERBS_OBJECT_COUNTERS, 2877 UVERBS_OBJECT_COUNTERS,
2924 kern_spec->flow_count.handle, 2878 kern_spec->flow_count.handle,
2925 ucontext); 2879 ufile);
2926 if (!ib_spec->flow_count.counters) 2880 if (!ib_spec->flow_count.counters)
2927 return -EINVAL; 2881 return -EINVAL;
2928 ib_spec->flow_count.size = 2882 ib_spec->flow_count.size =
@@ -3091,9 +3045,6 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
3091 void *kern_spec_mask; 3045 void *kern_spec_mask;
3092 void *kern_spec_val; 3046 void *kern_spec_val;
3093 3047
3094 if (kern_spec->reserved)
3095 return -EINVAL;
3096
3097 kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr); 3048 kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
3098 3049
3099 kern_spec_val = (void *)kern_spec + 3050 kern_spec_val = (void *)kern_spec +
@@ -3106,7 +3057,7 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
3106 kern_filter_sz, ib_spec); 3057 kern_filter_sz, ib_spec);
3107} 3058}
3108 3059
3109static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext, 3060static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile,
3110 struct ib_uverbs_flow_spec *kern_spec, 3061 struct ib_uverbs_flow_spec *kern_spec,
3111 union ib_flow_spec *ib_spec, 3062 union ib_flow_spec *ib_spec,
3112 struct ib_uflow_resources *uflow_res) 3063 struct ib_uflow_resources *uflow_res)
@@ -3115,14 +3066,13 @@ static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext,
3115 return -EINVAL; 3066 return -EINVAL;
3116 3067
3117 if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG) 3068 if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
3118 return kern_spec_to_ib_spec_action(ucontext, kern_spec, ib_spec, 3069 return kern_spec_to_ib_spec_action(ufile, kern_spec, ib_spec,
3119 uflow_res); 3070 uflow_res);
3120 else 3071 else
3121 return kern_spec_to_ib_spec_filter(kern_spec, ib_spec); 3072 return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
3122} 3073}
3123 3074
3124int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, 3075int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
3125 struct ib_device *ib_dev,
3126 struct ib_udata *ucore, 3076 struct ib_udata *ucore,
3127 struct ib_udata *uhw) 3077 struct ib_udata *uhw)
3128{ 3078{
@@ -3136,6 +3086,7 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
3136 struct ib_wq_init_attr wq_init_attr = {}; 3086 struct ib_wq_init_attr wq_init_attr = {};
3137 size_t required_cmd_sz; 3087 size_t required_cmd_sz;
3138 size_t required_resp_len; 3088 size_t required_resp_len;
3089 struct ib_device *ib_dev;
3139 3090
3140 required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge); 3091 required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge);
3141 required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn); 3092 required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn);
@@ -3158,18 +3109,18 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
3158 if (cmd.comp_mask) 3109 if (cmd.comp_mask)
3159 return -EOPNOTSUPP; 3110 return -EOPNOTSUPP;
3160 3111
3161 obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, 3112 obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file,
3162 file->ucontext); 3113 &ib_dev);
3163 if (IS_ERR(obj)) 3114 if (IS_ERR(obj))
3164 return PTR_ERR(obj); 3115 return PTR_ERR(obj);
3165 3116
3166 pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext); 3117 pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
3167 if (!pd) { 3118 if (!pd) {
3168 err = -EINVAL; 3119 err = -EINVAL;
3169 goto err_uobj; 3120 goto err_uobj;
3170 } 3121 }
3171 3122
3172 cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); 3123 cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
3173 if (!cq) { 3124 if (!cq) {
3174 err = -EINVAL; 3125 err = -EINVAL;
3175 goto err_put_pd; 3126 goto err_put_pd;
@@ -3223,8 +3174,7 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
3223 3174
3224 uobj_put_obj_read(pd); 3175 uobj_put_obj_read(pd);
3225 uobj_put_obj_read(cq); 3176 uobj_put_obj_read(cq);
3226 uobj_alloc_commit(&obj->uevent.uobject); 3177 return uobj_alloc_commit(&obj->uevent.uobject, 0);
3227 return 0;
3228 3178
3229err_copy: 3179err_copy:
3230 ib_destroy_wq(wq); 3180 ib_destroy_wq(wq);
@@ -3239,7 +3189,6 @@ err_uobj:
3239} 3189}
3240 3190
3241int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, 3191int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
3242 struct ib_device *ib_dev,
3243 struct ib_udata *ucore, 3192 struct ib_udata *ucore,
3244 struct ib_udata *uhw) 3193 struct ib_udata *uhw)
3245{ 3194{
@@ -3273,29 +3222,19 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
3273 return -EOPNOTSUPP; 3222 return -EOPNOTSUPP;
3274 3223
3275 resp.response_length = required_resp_len; 3224 resp.response_length = required_resp_len;
3276 uobj = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle, 3225 uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, file);
3277 file->ucontext);
3278 if (IS_ERR(uobj)) 3226 if (IS_ERR(uobj))
3279 return PTR_ERR(uobj); 3227 return PTR_ERR(uobj);
3280 3228
3281 obj = container_of(uobj, struct ib_uwq_object, uevent.uobject); 3229 obj = container_of(uobj, struct ib_uwq_object, uevent.uobject);
3282 /*
3283 * Make sure we don't free the memory in remove_commit as we still
3284 * needs the uobject memory to create the response.
3285 */
3286 uverbs_uobject_get(uobj);
3287
3288 ret = uobj_remove_commit(uobj);
3289 resp.events_reported = obj->uevent.events_reported; 3230 resp.events_reported = obj->uevent.events_reported;
3290 uverbs_uobject_put(uobj); 3231
3291 if (ret) 3232 uobj_put_destroy(uobj);
3292 return ret;
3293 3233
3294 return ib_copy_to_udata(ucore, &resp, resp.response_length); 3234 return ib_copy_to_udata(ucore, &resp, resp.response_length);
3295} 3235}
3296 3236
3297int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, 3237int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
3298 struct ib_device *ib_dev,
3299 struct ib_udata *ucore, 3238 struct ib_udata *ucore,
3300 struct ib_udata *uhw) 3239 struct ib_udata *uhw)
3301{ 3240{
@@ -3324,7 +3263,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
3324 if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS)) 3263 if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
3325 return -EINVAL; 3264 return -EINVAL;
3326 3265
3327 wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file->ucontext); 3266 wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file);
3328 if (!wq) 3267 if (!wq)
3329 return -EINVAL; 3268 return -EINVAL;
3330 3269
@@ -3345,7 +3284,6 @@ out:
3345} 3284}
3346 3285
3347int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, 3286int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
3348 struct ib_device *ib_dev,
3349 struct ib_udata *ucore, 3287 struct ib_udata *ucore,
3350 struct ib_udata *uhw) 3288 struct ib_udata *uhw)
3351{ 3289{
@@ -3363,6 +3301,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
3363 u32 expected_in_size; 3301 u32 expected_in_size;
3364 size_t required_cmd_sz_header; 3302 size_t required_cmd_sz_header;
3365 size_t required_resp_len; 3303 size_t required_resp_len;
3304 struct ib_device *ib_dev;
3366 3305
3367 required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size); 3306 required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size);
3368 required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num); 3307 required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num);
@@ -3418,8 +3357,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
3418 3357
3419 for (num_read_wqs = 0; num_read_wqs < num_wq_handles; 3358 for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
3420 num_read_wqs++) { 3359 num_read_wqs++) {
3421 wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, wqs_handles[num_read_wqs], 3360 wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ,
3422 file->ucontext); 3361 wqs_handles[num_read_wqs], file);
3423 if (!wq) { 3362 if (!wq) {
3424 err = -EINVAL; 3363 err = -EINVAL;
3425 goto put_wqs; 3364 goto put_wqs;
@@ -3428,7 +3367,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
3428 wqs[num_read_wqs] = wq; 3367 wqs[num_read_wqs] = wq;
3429 } 3368 }
3430 3369
3431 uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file->ucontext); 3370 uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file, &ib_dev);
3432 if (IS_ERR(uobj)) { 3371 if (IS_ERR(uobj)) {
3433 err = PTR_ERR(uobj); 3372 err = PTR_ERR(uobj);
3434 goto put_wqs; 3373 goto put_wqs;
@@ -3472,8 +3411,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
3472 for (j = 0; j < num_read_wqs; j++) 3411 for (j = 0; j < num_read_wqs; j++)
3473 uobj_put_obj_read(wqs[j]); 3412 uobj_put_obj_read(wqs[j]);
3474 3413
3475 uobj_alloc_commit(uobj); 3414 return uobj_alloc_commit(uobj, 0);
3476 return 0;
3477 3415
3478err_copy: 3416err_copy:
3479 ib_destroy_rwq_ind_table(rwq_ind_tbl); 3417 ib_destroy_rwq_ind_table(rwq_ind_tbl);
@@ -3489,12 +3427,10 @@ err_free:
3489} 3427}
3490 3428
3491int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, 3429int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
3492 struct ib_device *ib_dev,
3493 struct ib_udata *ucore, 3430 struct ib_udata *ucore,
3494 struct ib_udata *uhw) 3431 struct ib_udata *uhw)
3495{ 3432{
3496 struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {}; 3433 struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {};
3497 struct ib_uobject *uobj;
3498 int ret; 3434 int ret;
3499 size_t required_cmd_sz; 3435 size_t required_cmd_sz;
3500 3436
@@ -3515,16 +3451,11 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
3515 if (cmd.comp_mask) 3451 if (cmd.comp_mask)
3516 return -EOPNOTSUPP; 3452 return -EOPNOTSUPP;
3517 3453
3518 uobj = uobj_get_write(UVERBS_OBJECT_RWQ_IND_TBL, cmd.ind_tbl_handle, 3454 return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL,
3519 file->ucontext); 3455 cmd.ind_tbl_handle, file, 0);
3520 if (IS_ERR(uobj))
3521 return PTR_ERR(uobj);
3522
3523 return uobj_remove_commit(uobj);
3524} 3456}
3525 3457
3526int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, 3458int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
3527 struct ib_device *ib_dev,
3528 struct ib_udata *ucore, 3459 struct ib_udata *ucore,
3529 struct ib_udata *uhw) 3460 struct ib_udata *uhw)
3530{ 3461{
@@ -3541,6 +3472,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
3541 int err = 0; 3472 int err = 0;
3542 void *ib_spec; 3473 void *ib_spec;
3543 int i; 3474 int i;
3475 struct ib_device *ib_dev;
3544 3476
3545 if (ucore->inlen < sizeof(cmd)) 3477 if (ucore->inlen < sizeof(cmd))
3546 return -EINVAL; 3478 return -EINVAL;
@@ -3596,13 +3528,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
3596 kern_flow_attr = &cmd.flow_attr; 3528 kern_flow_attr = &cmd.flow_attr;
3597 } 3529 }
3598 3530
3599 uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file->ucontext); 3531 uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file, &ib_dev);
3600 if (IS_ERR(uobj)) { 3532 if (IS_ERR(uobj)) {
3601 err = PTR_ERR(uobj); 3533 err = PTR_ERR(uobj);
3602 goto err_free_attr; 3534 goto err_free_attr;
3603 } 3535 }
3604 3536
3605 qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext); 3537 qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
3606 if (!qp) { 3538 if (!qp) {
3607 err = -EINVAL; 3539 err = -EINVAL;
3608 goto err_uobj; 3540 goto err_uobj;
@@ -3613,6 +3545,11 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
3613 goto err_put; 3545 goto err_put;
3614 } 3546 }
3615 3547
3548 if (!qp->device->create_flow) {
3549 err = -EOPNOTSUPP;
3550 goto err_put;
3551 }
3552
3616 flow_attr = kzalloc(struct_size(flow_attr, flows, 3553 flow_attr = kzalloc(struct_size(flow_attr, flows,
3617 cmd.flow_attr.num_of_specs), GFP_KERNEL); 3554 cmd.flow_attr.num_of_specs), GFP_KERNEL);
3618 if (!flow_attr) { 3555 if (!flow_attr) {
@@ -3639,7 +3576,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
3639 cmd.flow_attr.size >= kern_spec->size; 3576 cmd.flow_attr.size >= kern_spec->size;
3640 i++) { 3577 i++) {
3641 err = kern_spec_to_ib_spec( 3578 err = kern_spec_to_ib_spec(
3642 file->ucontext, (struct ib_uverbs_flow_spec *)kern_spec, 3579 file, (struct ib_uverbs_flow_spec *)kern_spec,
3643 ib_spec, uflow_res); 3580 ib_spec, uflow_res);
3644 if (err) 3581 if (err)
3645 goto err_free; 3582 goto err_free;
@@ -3666,6 +3603,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
3666 } 3603 }
3667 atomic_inc(&qp->usecnt); 3604 atomic_inc(&qp->usecnt);
3668 flow_id->qp = qp; 3605 flow_id->qp = qp;
3606 flow_id->device = qp->device;
3669 flow_id->uobject = uobj; 3607 flow_id->uobject = uobj;
3670 uobj->object = flow_id; 3608 uobj->object = flow_id;
3671 uflow = container_of(uobj, typeof(*uflow), uobject); 3609 uflow = container_of(uobj, typeof(*uflow), uobject);
@@ -3680,13 +3618,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
3680 goto err_copy; 3618 goto err_copy;
3681 3619
3682 uobj_put_obj_read(qp); 3620 uobj_put_obj_read(qp);
3683 uobj_alloc_commit(uobj);
3684 kfree(flow_attr); 3621 kfree(flow_attr);
3685 if (cmd.flow_attr.num_of_specs) 3622 if (cmd.flow_attr.num_of_specs)
3686 kfree(kern_flow_attr); 3623 kfree(kern_flow_attr);
3687 return 0; 3624 return uobj_alloc_commit(uobj, 0);
3688err_copy: 3625err_copy:
3689 ib_destroy_flow(flow_id); 3626 if (!qp->device->destroy_flow(flow_id))
3627 atomic_dec(&qp->usecnt);
3690err_free: 3628err_free:
3691 ib_uverbs_flow_resources_free(uflow_res); 3629 ib_uverbs_flow_resources_free(uflow_res);
3692err_free_flow_attr: 3630err_free_flow_attr:
@@ -3702,12 +3640,10 @@ err_free_attr:
3702} 3640}
3703 3641
3704int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, 3642int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
3705 struct ib_device *ib_dev,
3706 struct ib_udata *ucore, 3643 struct ib_udata *ucore,
3707 struct ib_udata *uhw) 3644 struct ib_udata *uhw)
3708{ 3645{
3709 struct ib_uverbs_destroy_flow cmd; 3646 struct ib_uverbs_destroy_flow cmd;
3710 struct ib_uobject *uobj;
3711 int ret; 3647 int ret;
3712 3648
3713 if (ucore->inlen < sizeof(cmd)) 3649 if (ucore->inlen < sizeof(cmd))
@@ -3720,17 +3656,11 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
3720 if (cmd.comp_mask) 3656 if (cmd.comp_mask)
3721 return -EINVAL; 3657 return -EINVAL;
3722 3658
3723 uobj = uobj_get_write(UVERBS_OBJECT_FLOW, cmd.flow_handle, 3659 return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, file,
3724 file->ucontext); 3660 0);
3725 if (IS_ERR(uobj))
3726 return PTR_ERR(uobj);
3727
3728 ret = uobj_remove_commit(uobj);
3729 return ret;
3730} 3661}
3731 3662
3732static int __uverbs_create_xsrq(struct ib_uverbs_file *file, 3663static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
3733 struct ib_device *ib_dev,
3734 struct ib_uverbs_create_xsrq *cmd, 3664 struct ib_uverbs_create_xsrq *cmd,
3735 struct ib_udata *udata) 3665 struct ib_udata *udata)
3736{ 3666{
@@ -3741,9 +3671,10 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
3741 struct ib_uobject *uninitialized_var(xrcd_uobj); 3671 struct ib_uobject *uninitialized_var(xrcd_uobj);
3742 struct ib_srq_init_attr attr; 3672 struct ib_srq_init_attr attr;
3743 int ret; 3673 int ret;
3674 struct ib_device *ib_dev;
3744 3675
3745 obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, 3676 obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file,
3746 file->ucontext); 3677 &ib_dev);
3747 if (IS_ERR(obj)) 3678 if (IS_ERR(obj))
3748 return PTR_ERR(obj); 3679 return PTR_ERR(obj);
3749 3680
@@ -3752,7 +3683,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
3752 3683
3753 if (cmd->srq_type == IB_SRQT_XRC) { 3684 if (cmd->srq_type == IB_SRQT_XRC) {
3754 xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle, 3685 xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle,
3755 file->ucontext); 3686 file);
3756 if (IS_ERR(xrcd_uobj)) { 3687 if (IS_ERR(xrcd_uobj)) {
3757 ret = -EINVAL; 3688 ret = -EINVAL;
3758 goto err; 3689 goto err;
@@ -3769,15 +3700,15 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
3769 } 3700 }
3770 3701
3771 if (ib_srq_has_cq(cmd->srq_type)) { 3702 if (ib_srq_has_cq(cmd->srq_type)) {
3772 attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->cq_handle, 3703 attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
3773 file->ucontext); 3704 cmd->cq_handle, file);
3774 if (!attr.ext.cq) { 3705 if (!attr.ext.cq) {
3775 ret = -EINVAL; 3706 ret = -EINVAL;
3776 goto err_put_xrcd; 3707 goto err_put_xrcd;
3777 } 3708 }
3778 } 3709 }
3779 3710
3780 pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext); 3711 pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file);
3781 if (!pd) { 3712 if (!pd) {
3782 ret = -EINVAL; 3713 ret = -EINVAL;
3783 goto err_put_cq; 3714 goto err_put_cq;
@@ -3842,9 +3773,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
3842 uobj_put_obj_read(attr.ext.cq); 3773 uobj_put_obj_read(attr.ext.cq);
3843 3774
3844 uobj_put_obj_read(pd); 3775 uobj_put_obj_read(pd);
3845 uobj_alloc_commit(&obj->uevent.uobject); 3776 return uobj_alloc_commit(&obj->uevent.uobject, 0);
3846
3847 return 0;
3848 3777
3849err_copy: 3778err_copy:
3850 ib_destroy_srq(srq); 3779 ib_destroy_srq(srq);
@@ -3868,7 +3797,6 @@ err:
3868} 3797}
3869 3798
3870ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, 3799ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
3871 struct ib_device *ib_dev,
3872 const char __user *buf, int in_len, 3800 const char __user *buf, int in_len,
3873 int out_len) 3801 int out_len)
3874{ 3802{
@@ -3898,7 +3826,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
3898 in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), 3826 in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
3899 out_len - sizeof(resp)); 3827 out_len - sizeof(resp));
3900 3828
3901 ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata); 3829 ret = __uverbs_create_xsrq(file, &xcmd, &udata);
3902 if (ret) 3830 if (ret)
3903 return ret; 3831 return ret;
3904 3832
@@ -3906,7 +3834,6 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
3906} 3834}
3907 3835
3908ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, 3836ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
3909 struct ib_device *ib_dev,
3910 const char __user *buf, int in_len, int out_len) 3837 const char __user *buf, int in_len, int out_len)
3911{ 3838{
3912 struct ib_uverbs_create_xsrq cmd; 3839 struct ib_uverbs_create_xsrq cmd;
@@ -3925,7 +3852,7 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
3925 in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), 3852 in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
3926 out_len - sizeof(resp)); 3853 out_len - sizeof(resp));
3927 3854
3928 ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata); 3855 ret = __uverbs_create_xsrq(file, &cmd, &udata);
3929 if (ret) 3856 if (ret)
3930 return ret; 3857 return ret;
3931 3858
@@ -3933,7 +3860,6 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
3933} 3860}
3934 3861
3935ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, 3862ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
3936 struct ib_device *ib_dev,
3937 const char __user *buf, int in_len, 3863 const char __user *buf, int in_len,
3938 int out_len) 3864 int out_len)
3939{ 3865{
@@ -3949,7 +3875,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
3949 ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, 3875 ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
3950 out_len); 3876 out_len);
3951 3877
3952 srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); 3878 srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
3953 if (!srq) 3879 if (!srq)
3954 return -EINVAL; 3880 return -EINVAL;
3955 3881
@@ -3964,7 +3890,6 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
3964} 3890}
3965 3891
3966ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, 3892ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
3967 struct ib_device *ib_dev,
3968 const char __user *buf, 3893 const char __user *buf,
3969 int in_len, int out_len) 3894 int in_len, int out_len)
3970{ 3895{
@@ -3980,7 +3905,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
3980 if (copy_from_user(&cmd, buf, sizeof cmd)) 3905 if (copy_from_user(&cmd, buf, sizeof cmd))
3981 return -EFAULT; 3906 return -EFAULT;
3982 3907
3983 srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext); 3908 srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
3984 if (!srq) 3909 if (!srq)
3985 return -EINVAL; 3910 return -EINVAL;
3986 3911
@@ -4004,7 +3929,6 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
4004} 3929}
4005 3930
4006ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, 3931ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
4007 struct ib_device *ib_dev,
4008 const char __user *buf, int in_len, 3932 const char __user *buf, int in_len,
4009 int out_len) 3933 int out_len)
4010{ 3934{
@@ -4012,32 +3936,20 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
4012 struct ib_uverbs_destroy_srq_resp resp; 3936 struct ib_uverbs_destroy_srq_resp resp;
4013 struct ib_uobject *uobj; 3937 struct ib_uobject *uobj;
4014 struct ib_uevent_object *obj; 3938 struct ib_uevent_object *obj;
4015 int ret = -EINVAL;
4016 3939
4017 if (copy_from_user(&cmd, buf, sizeof cmd)) 3940 if (copy_from_user(&cmd, buf, sizeof cmd))
4018 return -EFAULT; 3941 return -EFAULT;
4019 3942
4020 uobj = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle, 3943 uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
4021 file->ucontext);
4022 if (IS_ERR(uobj)) 3944 if (IS_ERR(uobj))
4023 return PTR_ERR(uobj); 3945 return PTR_ERR(uobj);
4024 3946
4025 obj = container_of(uobj, struct ib_uevent_object, uobject); 3947 obj = container_of(uobj, struct ib_uevent_object, uobject);
4026 /*
4027 * Make sure we don't free the memory in remove_commit as we still
4028 * needs the uobject memory to create the response.
4029 */
4030 uverbs_uobject_get(uobj);
4031
4032 memset(&resp, 0, sizeof(resp)); 3948 memset(&resp, 0, sizeof(resp));
4033
4034 ret = uobj_remove_commit(uobj);
4035 if (ret) {
4036 uverbs_uobject_put(uobj);
4037 return ret;
4038 }
4039 resp.events_reported = obj->events_reported; 3949 resp.events_reported = obj->events_reported;
4040 uverbs_uobject_put(uobj); 3950
3951 uobj_put_destroy(uobj);
3952
4041 if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) 3953 if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp)))
4042 return -EFAULT; 3954 return -EFAULT;
4043 3955
@@ -4045,15 +3957,21 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
4045} 3957}
4046 3958
4047int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, 3959int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
4048 struct ib_device *ib_dev,
4049 struct ib_udata *ucore, 3960 struct ib_udata *ucore,
4050 struct ib_udata *uhw) 3961 struct ib_udata *uhw)
4051{ 3962{
4052 struct ib_uverbs_ex_query_device_resp resp = { {0} }; 3963 struct ib_uverbs_ex_query_device_resp resp = { {0} };
4053 struct ib_uverbs_ex_query_device cmd; 3964 struct ib_uverbs_ex_query_device cmd;
4054 struct ib_device_attr attr = {0}; 3965 struct ib_device_attr attr = {0};
3966 struct ib_ucontext *ucontext;
3967 struct ib_device *ib_dev;
4055 int err; 3968 int err;
4056 3969
3970 ucontext = ib_uverbs_get_ucontext(file);
3971 if (IS_ERR(ucontext))
3972 return PTR_ERR(ucontext);
3973 ib_dev = ucontext->device;
3974
4057 if (!ib_dev->query_device) 3975 if (!ib_dev->query_device)
4058 return -EOPNOTSUPP; 3976 return -EOPNOTSUPP;
4059 3977
@@ -4079,7 +3997,7 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
4079 if (err) 3997 if (err)
4080 return err; 3998 return err;
4081 3999
4082 copy_query_dev_fields(file, ib_dev, &resp.base, &attr); 4000 copy_query_dev_fields(ucontext, &resp.base, &attr);
4083 4001
4084 if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps)) 4002 if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
4085 goto end; 4003 goto end;
@@ -4166,7 +4084,6 @@ end:
4166} 4084}
4167 4085
4168int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file, 4086int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
4169 struct ib_device *ib_dev,
4170 struct ib_udata *ucore, 4087 struct ib_udata *ucore,
4171 struct ib_udata *uhw) 4088 struct ib_udata *uhw)
4172{ 4089{
@@ -4196,7 +4113,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
4196 if (cmd.attr_mask > IB_CQ_MODERATE) 4113 if (cmd.attr_mask > IB_CQ_MODERATE)
4197 return -EOPNOTSUPP; 4114 return -EOPNOTSUPP;
4198 4115
4199 cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext); 4116 cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
4200 if (!cq) 4117 if (!cq)
4201 return -EINVAL; 4118 return -EINVAL;
4202 4119
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 8d32c4ae368c..1a6b229e3db3 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -35,6 +35,103 @@
35#include "rdma_core.h" 35#include "rdma_core.h"
36#include "uverbs.h" 36#include "uverbs.h"
37 37
38struct bundle_alloc_head {
39 struct bundle_alloc_head *next;
40 u8 data[];
41};
42
43struct bundle_priv {
44 /* Must be first */
45 struct bundle_alloc_head alloc_head;
46 struct bundle_alloc_head *allocated_mem;
47 size_t internal_avail;
48 size_t internal_used;
49
50 struct radix_tree_root *radix;
51 const struct uverbs_api_ioctl_method *method_elm;
52 void __rcu **radix_slots;
53 unsigned long radix_slots_len;
54 u32 method_key;
55
56 struct ib_uverbs_attr __user *user_attrs;
57 struct ib_uverbs_attr *uattrs;
58
59 DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN);
60
61 /*
62 * Must be last. bundle ends in a flex array which overlaps
63 * internal_buffer.
64 */
65 struct uverbs_attr_bundle bundle;
66 u64 internal_buffer[32];
67};
68
69/*
70 * Each method has an absolute minimum amount of memory it needs to allocate,
71 * precompute that amount and determine if the onstack memory can be used or
72 * if allocation is need.
73 */
74void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
75 unsigned int num_attrs)
76{
77 struct bundle_priv *pbundle;
78 size_t bundle_size =
79 offsetof(struct bundle_priv, internal_buffer) +
80 sizeof(*pbundle->bundle.attrs) * method_elm->key_bitmap_len +
81 sizeof(*pbundle->uattrs) * num_attrs;
82
83 method_elm->use_stack = bundle_size <= sizeof(*pbundle);
84 method_elm->bundle_size =
85 ALIGN(bundle_size + 256, sizeof(*pbundle->internal_buffer));
86
87 /* Do not want order-2 allocations for this. */
88 WARN_ON_ONCE(method_elm->bundle_size > PAGE_SIZE);
89}
90
91/**
92 * uverbs_alloc() - Quickly allocate memory for use with a bundle
93 * @bundle: The bundle
94 * @size: Number of bytes to allocate
95 * @flags: Allocator flags
96 *
97 * The bundle allocator is intended for allocations that are connected with
98 * processing the system call related to the bundle. The allocated memory is
99 * always freed once the system call completes, and cannot be freed any other
100 * way.
101 *
102 * This tries to use a small pool of pre-allocated memory for performance.
103 */
104__malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size,
105 gfp_t flags)
106{
107 struct bundle_priv *pbundle =
108 container_of(bundle, struct bundle_priv, bundle);
109 size_t new_used;
110 void *res;
111
112 if (check_add_overflow(size, pbundle->internal_used, &new_used))
113 return ERR_PTR(-EOVERFLOW);
114
115 if (new_used > pbundle->internal_avail) {
116 struct bundle_alloc_head *buf;
117
118 buf = kvmalloc(struct_size(buf, data, size), flags);
119 if (!buf)
120 return ERR_PTR(-ENOMEM);
121 buf->next = pbundle->allocated_mem;
122 pbundle->allocated_mem = buf;
123 return buf->data;
124 }
125
126 res = (void *)pbundle->internal_buffer + pbundle->internal_used;
127 pbundle->internal_used =
128 ALIGN(new_used, sizeof(*pbundle->internal_buffer));
129 if (flags & __GFP_ZERO)
130 memset(res, 0, size);
131 return res;
132}
133EXPORT_SYMBOL(_uverbs_alloc);
134
38static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr, 135static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
39 u16 len) 136 u16 len)
40{ 137{
@@ -46,45 +143,24 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
46 0, uattr->len - len); 143 0, uattr->len - len);
47} 144}
48 145
49static int uverbs_process_attr(struct ib_device *ibdev, 146static int uverbs_process_attr(struct bundle_priv *pbundle,
50 struct ib_ucontext *ucontext, 147 const struct uverbs_api_attr *attr_uapi,
51 const struct ib_uverbs_attr *uattr, 148 struct ib_uverbs_attr *uattr, u32 attr_bkey)
52 u16 attr_id,
53 const struct uverbs_attr_spec_hash *attr_spec_bucket,
54 struct uverbs_attr_bundle_hash *attr_bundle_h,
55 struct ib_uverbs_attr __user *uattr_ptr)
56{ 149{
57 const struct uverbs_attr_spec *spec; 150 const struct uverbs_attr_spec *spec = &attr_uapi->spec;
58 const struct uverbs_attr_spec *val_spec; 151 struct uverbs_attr *e = &pbundle->bundle.attrs[attr_bkey];
59 struct uverbs_attr *e; 152 const struct uverbs_attr_spec *val_spec = spec;
60 const struct uverbs_object_spec *object;
61 struct uverbs_obj_attr *o_attr; 153 struct uverbs_obj_attr *o_attr;
62 struct uverbs_attr *elements = attr_bundle_h->attrs;
63
64 if (attr_id >= attr_spec_bucket->num_attrs) {
65 if (uattr->flags & UVERBS_ATTR_F_MANDATORY)
66 return -EINVAL;
67 else
68 return 0;
69 }
70
71 if (test_bit(attr_id, attr_bundle_h->valid_bitmap))
72 return -EINVAL;
73
74 spec = &attr_spec_bucket->attrs[attr_id];
75 val_spec = spec;
76 e = &elements[attr_id];
77 e->uattr = uattr_ptr;
78 154
79 switch (spec->type) { 155 switch (spec->type) {
80 case UVERBS_ATTR_TYPE_ENUM_IN: 156 case UVERBS_ATTR_TYPE_ENUM_IN:
81 if (uattr->attr_data.enum_data.elem_id >= spec->enum_def.num_elems) 157 if (uattr->attr_data.enum_data.elem_id >= spec->u.enum_def.num_elems)
82 return -EOPNOTSUPP; 158 return -EOPNOTSUPP;
83 159
84 if (uattr->attr_data.enum_data.reserved) 160 if (uattr->attr_data.enum_data.reserved)
85 return -EINVAL; 161 return -EINVAL;
86 162
87 val_spec = &spec->enum_def.ids[uattr->attr_data.enum_data.elem_id]; 163 val_spec = &spec->u2.enum_def.ids[uattr->attr_data.enum_data.elem_id];
88 164
89 /* Currently we only support PTR_IN based enums */ 165 /* Currently we only support PTR_IN based enums */
90 if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN) 166 if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN)
@@ -98,64 +174,75 @@ static int uverbs_process_attr(struct ib_device *ibdev,
98 * longer struct will fail here if used with an old kernel and 174 * longer struct will fail here if used with an old kernel and
99 * non-zero content, making ABI compat/discovery simpler. 175 * non-zero content, making ABI compat/discovery simpler.
100 */ 176 */
101 if (uattr->len > val_spec->ptr.len && 177 if (uattr->len > val_spec->u.ptr.len &&
102 val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO && 178 val_spec->zero_trailing &&
103 !uverbs_is_attr_cleared(uattr, val_spec->ptr.len)) 179 !uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len))
104 return -EOPNOTSUPP; 180 return -EOPNOTSUPP;
105 181
106 /* fall through */ 182 /* fall through */
107 case UVERBS_ATTR_TYPE_PTR_OUT: 183 case UVERBS_ATTR_TYPE_PTR_OUT:
108 if (uattr->len < val_spec->ptr.min_len || 184 if (uattr->len < val_spec->u.ptr.min_len ||
109 (!(val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO) && 185 (!val_spec->zero_trailing &&
110 uattr->len > val_spec->ptr.len)) 186 uattr->len > val_spec->u.ptr.len))
111 return -EINVAL; 187 return -EINVAL;
112 188
113 if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN && 189 if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN &&
114 uattr->attr_data.reserved) 190 uattr->attr_data.reserved)
115 return -EINVAL; 191 return -EINVAL;
116 192
117 e->ptr_attr.data = uattr->data; 193 e->ptr_attr.uattr_idx = uattr - pbundle->uattrs;
118 e->ptr_attr.len = uattr->len; 194 e->ptr_attr.len = uattr->len;
119 e->ptr_attr.flags = uattr->flags; 195
196 if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) {
197 void *p;
198
199 p = uverbs_alloc(&pbundle->bundle, uattr->len);
200 if (IS_ERR(p))
201 return PTR_ERR(p);
202
203 e->ptr_attr.ptr = p;
204
205 if (copy_from_user(p, u64_to_user_ptr(uattr->data),
206 uattr->len))
207 return -EFAULT;
208 } else {
209 e->ptr_attr.data = uattr->data;
210 }
120 break; 211 break;
121 212
122 case UVERBS_ATTR_TYPE_IDR: 213 case UVERBS_ATTR_TYPE_IDR:
123 if (uattr->data >> 32)
124 return -EINVAL;
125 /* fall through */
126 case UVERBS_ATTR_TYPE_FD: 214 case UVERBS_ATTR_TYPE_FD:
127 if (uattr->attr_data.reserved) 215 if (uattr->attr_data.reserved)
128 return -EINVAL; 216 return -EINVAL;
129 217
130 if (uattr->len != 0 || !ucontext || uattr->data > INT_MAX) 218 if (uattr->len != 0)
131 return -EINVAL; 219 return -EINVAL;
132 220
133 o_attr = &e->obj_attr; 221 o_attr = &e->obj_attr;
134 object = uverbs_get_object(ibdev, spec->obj.obj_type); 222 o_attr->attr_elm = attr_uapi;
135 if (!object)
136 return -EINVAL;
137 o_attr->type = object->type_attrs;
138
139 o_attr->id = (int)uattr->data;
140 o_attr->uobject = uverbs_get_uobject_from_context(
141 o_attr->type,
142 ucontext,
143 spec->obj.access,
144 o_attr->id);
145 223
224 /*
225 * The type of uattr->data is u64 for UVERBS_ATTR_TYPE_IDR and
226 * s64 for UVERBS_ATTR_TYPE_FD. We can cast the u64 to s64
227 * here without caring about truncation as we know that the
228 * IDR implementation today rejects negative IDs
229 */
230 o_attr->uobject = uverbs_get_uobject_from_file(
231 spec->u.obj.obj_type,
232 pbundle->bundle.ufile,
233 spec->u.obj.access,
234 uattr->data_s64);
146 if (IS_ERR(o_attr->uobject)) 235 if (IS_ERR(o_attr->uobject))
147 return PTR_ERR(o_attr->uobject); 236 return PTR_ERR(o_attr->uobject);
237 __set_bit(attr_bkey, pbundle->uobj_finalize);
148 238
149 if (spec->obj.access == UVERBS_ACCESS_NEW) { 239 if (spec->u.obj.access == UVERBS_ACCESS_NEW) {
150 u64 id = o_attr->uobject->id; 240 unsigned int uattr_idx = uattr - pbundle->uattrs;
241 s64 id = o_attr->uobject->id;
151 242
152 /* Copy the allocated id to the user-space */ 243 /* Copy the allocated id to the user-space */
153 if (put_user(id, &e->uattr->data)) { 244 if (put_user(id, &pbundle->user_attrs[uattr_idx].data))
154 uverbs_finalize_object(o_attr->uobject,
155 UVERBS_ACCESS_NEW,
156 false);
157 return -EFAULT; 245 return -EFAULT;
158 }
159 } 246 }
160 247
161 break; 248 break;
@@ -163,220 +250,225 @@ static int uverbs_process_attr(struct ib_device *ibdev,
163 return -EOPNOTSUPP; 250 return -EOPNOTSUPP;
164 } 251 }
165 252
166 set_bit(attr_id, attr_bundle_h->valid_bitmap);
167 return 0; 253 return 0;
168} 254}
169 255
170static int uverbs_uattrs_process(struct ib_device *ibdev, 256/*
171 struct ib_ucontext *ucontext, 257 * We search the radix tree with the method prefix and now we want to fast
172 const struct ib_uverbs_attr *uattrs, 258 * search the suffix bits to get a particular attribute pointer. It is not
173 size_t num_uattrs, 259 * totally clear to me if this breaks the radix tree encasulation or not, but
174 const struct uverbs_method_spec *method, 260 * it uses the iter data to determine if the method iter points at the same
175 struct uverbs_attr_bundle *attr_bundle, 261 * chunk that will store the attribute, if so it just derefs it directly. By
176 struct ib_uverbs_attr __user *uattr_ptr) 262 * construction in most kernel configs the method and attrs will all fit in a
263 * single radix chunk, so in most cases this will have no search. Other cases
264 * this falls back to a full search.
265 */
266static void __rcu **uapi_get_attr_for_method(struct bundle_priv *pbundle,
267 u32 attr_key)
177{ 268{
178 size_t i; 269 void __rcu **slot;
179 int ret = 0;
180 int num_given_buckets = 0;
181
182 for (i = 0; i < num_uattrs; i++) {
183 const struct ib_uverbs_attr *uattr = &uattrs[i];
184 u16 attr_id = uattr->attr_id;
185 struct uverbs_attr_spec_hash *attr_spec_bucket;
186
187 ret = uverbs_ns_idx(&attr_id, method->num_buckets);
188 if (ret < 0) {
189 if (uattr->flags & UVERBS_ATTR_F_MANDATORY) {
190 uverbs_finalize_objects(attr_bundle,
191 method->attr_buckets,
192 num_given_buckets,
193 false);
194 return ret;
195 }
196 continue;
197 }
198 270
199 /* 271 if (likely(attr_key < pbundle->radix_slots_len)) {
200 * ret is the found ns, so increase num_given_buckets if 272 void *entry;
201 * necessary. 273
202 */ 274 slot = pbundle->radix_slots + attr_key;
203 if (ret >= num_given_buckets) 275 entry = rcu_dereference_raw(*slot);
204 num_given_buckets = ret + 1; 276 if (likely(!radix_tree_is_internal_node(entry) && entry))
205 277 return slot;
206 attr_spec_bucket = method->attr_buckets[ret];
207 ret = uverbs_process_attr(ibdev, ucontext, uattr, attr_id,
208 attr_spec_bucket, &attr_bundle->hash[ret],
209 uattr_ptr++);
210 if (ret) {
211 uverbs_finalize_objects(attr_bundle,
212 method->attr_buckets,
213 num_given_buckets,
214 false);
215 return ret;
216 }
217 } 278 }
218 279
219 return num_given_buckets; 280 return radix_tree_lookup_slot(pbundle->radix,
281 pbundle->method_key | attr_key);
220} 282}
221 283
222static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *method_spec, 284static int uverbs_set_attr(struct bundle_priv *pbundle,
223 struct uverbs_attr_bundle *attr_bundle) 285 struct ib_uverbs_attr *uattr)
224{ 286{
225 unsigned int i; 287 u32 attr_key = uapi_key_attr(uattr->attr_id);
226 288 u32 attr_bkey = uapi_bkey_attr(attr_key);
227 for (i = 0; i < attr_bundle->num_buckets; i++) { 289 const struct uverbs_api_attr *attr;
228 struct uverbs_attr_spec_hash *attr_spec_bucket = 290 void __rcu **slot;
229 method_spec->attr_buckets[i]; 291 int ret;
230 292
231 if (!bitmap_subset(attr_spec_bucket->mandatory_attrs_bitmask, 293 slot = uapi_get_attr_for_method(pbundle, attr_key);
232 attr_bundle->hash[i].valid_bitmap, 294 if (!slot) {
233 attr_spec_bucket->num_attrs)) 295 /*
234 return -EINVAL; 296 * Kernel does not support the attribute but user-space says it
297 * is mandatory
298 */
299 if (uattr->flags & UVERBS_ATTR_F_MANDATORY)
300 return -EPROTONOSUPPORT;
301 return 0;
235 } 302 }
303 attr = srcu_dereference(
304 *slot, &pbundle->bundle.ufile->device->disassociate_srcu);
236 305
237 for (; i < method_spec->num_buckets; i++) { 306 /* Reject duplicate attributes from user-space */
238 struct uverbs_attr_spec_hash *attr_spec_bucket = 307 if (test_bit(attr_bkey, pbundle->bundle.attr_present))
239 method_spec->attr_buckets[i]; 308 return -EINVAL;
240 309
241 if (!bitmap_empty(attr_spec_bucket->mandatory_attrs_bitmask, 310 ret = uverbs_process_attr(pbundle, attr, uattr, attr_bkey);
242 attr_spec_bucket->num_attrs)) 311 if (ret)
243 return -EINVAL; 312 return ret;
244 } 313
314 __set_bit(attr_bkey, pbundle->bundle.attr_present);
245 315
246 return 0; 316 return 0;
247} 317}
248 318
249static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr, 319static int ib_uverbs_run_method(struct bundle_priv *pbundle,
250 const struct ib_uverbs_attr *uattrs, 320 unsigned int num_attrs)
251 size_t num_uattrs,
252 struct ib_device *ibdev,
253 struct ib_uverbs_file *ufile,
254 const struct uverbs_method_spec *method_spec,
255 struct uverbs_attr_bundle *attr_bundle)
256{ 321{
322 int (*handler)(struct ib_uverbs_file *ufile,
323 struct uverbs_attr_bundle *ctx);
324 size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs);
325 unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey;
326 unsigned int i;
257 int ret; 327 int ret;
258 int finalize_ret;
259 int num_given_buckets;
260 328
261 num_given_buckets = uverbs_uattrs_process(ibdev, ufile->ucontext, uattrs, 329 /* See uverbs_disassociate_api() */
262 num_uattrs, method_spec, 330 handler = srcu_dereference(
263 attr_bundle, uattr_ptr); 331 pbundle->method_elm->handler,
264 if (num_given_buckets <= 0) 332 &pbundle->bundle.ufile->device->disassociate_srcu);
333 if (!handler)
334 return -EIO;
335
336 pbundle->uattrs = uverbs_alloc(&pbundle->bundle, uattrs_size);
337 if (IS_ERR(pbundle->uattrs))
338 return PTR_ERR(pbundle->uattrs);
339 if (copy_from_user(pbundle->uattrs, pbundle->user_attrs, uattrs_size))
340 return -EFAULT;
341
342 for (i = 0; i != num_attrs; i++) {
343 ret = uverbs_set_attr(pbundle, &pbundle->uattrs[i]);
344 if (unlikely(ret))
345 return ret;
346 }
347
348 /* User space did not provide all the mandatory attributes */
349 if (unlikely(!bitmap_subset(pbundle->method_elm->attr_mandatory,
350 pbundle->bundle.attr_present,
351 pbundle->method_elm->key_bitmap_len)))
265 return -EINVAL; 352 return -EINVAL;
266 353
267 attr_bundle->num_buckets = num_given_buckets; 354 if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) {
268 ret = uverbs_validate_kernel_mandatory(method_spec, attr_bundle); 355 struct uverbs_obj_attr *destroy_attr =
269 if (ret) 356 &pbundle->bundle.attrs[destroy_bkey].obj_attr;
270 goto cleanup;
271 357
272 ret = method_spec->handler(ibdev, ufile, attr_bundle); 358 ret = uobj_destroy(destroy_attr->uobject);
273cleanup: 359 if (ret)
274 finalize_ret = uverbs_finalize_objects(attr_bundle, 360 return ret;
275 method_spec->attr_buckets, 361 __clear_bit(destroy_bkey, pbundle->uobj_finalize);
276 attr_bundle->num_buckets,
277 !ret);
278 362
279 return ret ? ret : finalize_ret; 363 ret = handler(pbundle->bundle.ufile, &pbundle->bundle);
280} 364 uobj_put_destroy(destroy_attr->uobject);
365 } else {
366 ret = handler(pbundle->bundle.ufile, &pbundle->bundle);
367 }
281 368
282#define UVERBS_OPTIMIZE_USING_STACK_SZ 256 369 /*
283static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev, 370 * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can
284 struct ib_uverbs_file *file, 371 * not invoke the method because the request is not supported. No
285 struct ib_uverbs_ioctl_hdr *hdr, 372 * other cases should return this code.
286 void __user *buf) 373 */
287{ 374 if (WARN_ON_ONCE(ret == -EPROTONOSUPPORT))
288 const struct uverbs_object_spec *object_spec;
289 const struct uverbs_method_spec *method_spec;
290 long err = 0;
291 unsigned int i;
292 struct {
293 struct ib_uverbs_attr *uattrs;
294 struct uverbs_attr_bundle *uverbs_attr_bundle;
295 } *ctx = NULL;
296 struct uverbs_attr *curr_attr;
297 unsigned long *curr_bitmap;
298 size_t ctx_size;
299 uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)];
300
301 if (hdr->driver_id != ib_dev->driver_id)
302 return -EINVAL; 375 return -EINVAL;
303 376
304 object_spec = uverbs_get_object(ib_dev, hdr->object_id); 377 return ret;
305 if (!object_spec) 378}
306 return -EPROTONOSUPPORT;
307 379
308 method_spec = uverbs_get_method(object_spec, hdr->method_id); 380static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
309 if (!method_spec) 381{
310 return -EPROTONOSUPPORT; 382 unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len;
383 struct bundle_alloc_head *memblock;
384 unsigned int i;
385 int ret = 0;
311 386
312 if ((method_spec->flags & UVERBS_ACTION_FLAG_CREATE_ROOT) ^ !file->ucontext) 387 i = -1;
313 return -EINVAL; 388 while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len,
389 i + 1)) < key_bitmap_len) {
390 struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
391 int current_ret;
392
393 current_ret = uverbs_finalize_object(
394 attr->obj_attr.uobject,
395 attr->obj_attr.attr_elm->spec.u.obj.access, commit);
396 if (!ret)
397 ret = current_ret;
398 }
314 399
315 ctx_size = sizeof(*ctx) + 400 for (memblock = pbundle->allocated_mem; memblock;) {
316 sizeof(struct uverbs_attr_bundle) + 401 struct bundle_alloc_head *tmp = memblock;
317 sizeof(struct uverbs_attr_bundle_hash) * method_spec->num_buckets +
318 sizeof(*ctx->uattrs) * hdr->num_attrs +
319 sizeof(*ctx->uverbs_attr_bundle->hash[0].attrs) *
320 method_spec->num_child_attrs +
321 sizeof(*ctx->uverbs_attr_bundle->hash[0].valid_bitmap) *
322 (method_spec->num_child_attrs / BITS_PER_LONG +
323 method_spec->num_buckets);
324
325 if (ctx_size <= UVERBS_OPTIMIZE_USING_STACK_SZ)
326 ctx = (void *)data;
327 if (!ctx)
328 ctx = kmalloc(ctx_size, GFP_KERNEL);
329 if (!ctx)
330 return -ENOMEM;
331
332 ctx->uverbs_attr_bundle = (void *)ctx + sizeof(*ctx);
333 ctx->uattrs = (void *)(ctx->uverbs_attr_bundle + 1) +
334 (sizeof(ctx->uverbs_attr_bundle->hash[0]) *
335 method_spec->num_buckets);
336 curr_attr = (void *)(ctx->uattrs + hdr->num_attrs);
337 curr_bitmap = (void *)(curr_attr + method_spec->num_child_attrs);
338 402
339 /* 403 memblock = memblock->next;
340 * We just fill the pointers and num_attrs here. The data itself will be 404 kvfree(tmp);
341 * filled at a later stage (uverbs_process_attr)
342 */
343 for (i = 0; i < method_spec->num_buckets; i++) {
344 unsigned int curr_num_attrs = method_spec->attr_buckets[i]->num_attrs;
345
346 ctx->uverbs_attr_bundle->hash[i].attrs = curr_attr;
347 curr_attr += curr_num_attrs;
348 ctx->uverbs_attr_bundle->hash[i].num_attrs = curr_num_attrs;
349 ctx->uverbs_attr_bundle->hash[i].valid_bitmap = curr_bitmap;
350 bitmap_zero(curr_bitmap, curr_num_attrs);
351 curr_bitmap += BITS_TO_LONGS(curr_num_attrs);
352 } 405 }
353 406
354 err = copy_from_user(ctx->uattrs, buf, 407 return ret;
355 sizeof(*ctx->uattrs) * hdr->num_attrs); 408}
356 if (err) {
357 err = -EFAULT;
358 goto out;
359 }
360 409
361 err = uverbs_handle_method(buf, ctx->uattrs, hdr->num_attrs, ib_dev, 410static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
362 file, method_spec, ctx->uverbs_attr_bundle); 411 struct ib_uverbs_ioctl_hdr *hdr,
412 struct ib_uverbs_attr __user *user_attrs)
413{
414 const struct uverbs_api_ioctl_method *method_elm;
415 struct uverbs_api *uapi = ufile->device->uapi;
416 struct radix_tree_iter attrs_iter;
417 struct bundle_priv *pbundle;
418 struct bundle_priv onstack;
419 void __rcu **slot;
420 int destroy_ret;
421 int ret;
363 422
364 /* 423 if (unlikely(hdr->driver_id != uapi->driver_id))
365 * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can 424 return -EINVAL;
366 * not invoke the method because the request is not supported. No 425
367 * other cases should return this code. 426 slot = radix_tree_iter_lookup(
368 */ 427 &uapi->radix, &attrs_iter,
369 if (unlikely(err == -EPROTONOSUPPORT)) { 428 uapi_key_obj(hdr->object_id) |
370 WARN_ON_ONCE(err == -EPROTONOSUPPORT); 429 uapi_key_ioctl_method(hdr->method_id));
371 err = -EINVAL; 430 if (unlikely(!slot))
431 return -EPROTONOSUPPORT;
432 method_elm = srcu_dereference(*slot, &ufile->device->disassociate_srcu);
433
434 if (!method_elm->use_stack) {
435 pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL);
436 if (!pbundle)
437 return -ENOMEM;
438 pbundle->internal_avail =
439 method_elm->bundle_size -
440 offsetof(struct bundle_priv, internal_buffer);
441 pbundle->alloc_head.next = NULL;
442 pbundle->allocated_mem = &pbundle->alloc_head;
443 } else {
444 pbundle = &onstack;
445 pbundle->internal_avail = sizeof(pbundle->internal_buffer);
446 pbundle->allocated_mem = NULL;
372 } 447 }
373out:
374 if (ctx != (void *)data)
375 kfree(ctx);
376 return err;
377}
378 448
379#define IB_UVERBS_MAX_CMD_SZ 4096 449 /* Space for the pbundle->bundle.attrs flex array */
450 pbundle->method_elm = method_elm;
451 pbundle->method_key = attrs_iter.index;
452 pbundle->bundle.ufile = ufile;
453 pbundle->radix = &uapi->radix;
454 pbundle->radix_slots = slot;
455 pbundle->radix_slots_len = radix_tree_chunk_size(&attrs_iter);
456 pbundle->user_attrs = user_attrs;
457
458 pbundle->internal_used = ALIGN(pbundle->method_elm->key_bitmap_len *
459 sizeof(*pbundle->bundle.attrs),
460 sizeof(*pbundle->internal_buffer));
461 memset(pbundle->bundle.attr_present, 0,
462 sizeof(pbundle->bundle.attr_present));
463 memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize));
464
465 ret = ib_uverbs_run_method(pbundle, hdr->num_attrs);
466 destroy_ret = bundle_destroy(pbundle, ret == 0);
467 if (unlikely(destroy_ret && !ret))
468 return destroy_ret;
469
470 return ret;
471}
380 472
381long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 473long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
382{ 474{
@@ -384,39 +476,138 @@ long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
384 struct ib_uverbs_ioctl_hdr __user *user_hdr = 476 struct ib_uverbs_ioctl_hdr __user *user_hdr =
385 (struct ib_uverbs_ioctl_hdr __user *)arg; 477 (struct ib_uverbs_ioctl_hdr __user *)arg;
386 struct ib_uverbs_ioctl_hdr hdr; 478 struct ib_uverbs_ioctl_hdr hdr;
387 struct ib_device *ib_dev;
388 int srcu_key; 479 int srcu_key;
389 long err; 480 int err;
481
482 if (unlikely(cmd != RDMA_VERBS_IOCTL))
483 return -ENOIOCTLCMD;
484
485 err = copy_from_user(&hdr, user_hdr, sizeof(hdr));
486 if (err)
487 return -EFAULT;
488
489 if (hdr.length > PAGE_SIZE ||
490 hdr.length != struct_size(&hdr, attrs, hdr.num_attrs))
491 return -EINVAL;
492
493 if (hdr.reserved1 || hdr.reserved2)
494 return -EPROTONOSUPPORT;
390 495
391 srcu_key = srcu_read_lock(&file->device->disassociate_srcu); 496 srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
392 ib_dev = srcu_dereference(file->device->ib_dev, 497 err = ib_uverbs_cmd_verbs(file, &hdr, user_hdr->attrs);
393 &file->device->disassociate_srcu); 498 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
394 if (!ib_dev) { 499 return err;
395 err = -EIO; 500}
396 goto out; 501
502int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
503 size_t idx, u64 allowed_bits)
504{
505 const struct uverbs_attr *attr;
506 u64 flags;
507
508 attr = uverbs_attr_get(attrs_bundle, idx);
509 /* Missing attribute means 0 flags */
510 if (IS_ERR(attr)) {
511 *to = 0;
512 return 0;
397 } 513 }
398 514
399 if (cmd == RDMA_VERBS_IOCTL) { 515 /*
400 err = copy_from_user(&hdr, user_hdr, sizeof(hdr)); 516 * New userspace code should use 8 bytes to pass flags, but we
517 * transparently support old userspaces that were using 4 bytes as
518 * well.
519 */
520 if (attr->ptr_attr.len == 8)
521 flags = attr->ptr_attr.data;
522 else if (attr->ptr_attr.len == 4)
523 flags = *(u32 *)&attr->ptr_attr.data;
524 else
525 return -EINVAL;
401 526
402 if (err || hdr.length > IB_UVERBS_MAX_CMD_SZ || 527 if (flags & ~allowed_bits)
403 hdr.length != sizeof(hdr) + hdr.num_attrs * sizeof(struct ib_uverbs_attr)) { 528 return -EINVAL;
404 err = -EINVAL;
405 goto out;
406 }
407 529
408 if (hdr.reserved1 || hdr.reserved2) { 530 *to = flags;
409 err = -EPROTONOSUPPORT; 531 return 0;
410 goto out; 532}
411 } 533EXPORT_SYMBOL(uverbs_get_flags64);
412 534
413 err = ib_uverbs_cmd_verbs(ib_dev, file, &hdr, 535int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
414 (__user void *)arg + sizeof(hdr)); 536 size_t idx, u64 allowed_bits)
537{
538 u64 flags;
539 int ret;
540
541 ret = uverbs_get_flags64(&flags, attrs_bundle, idx, allowed_bits);
542 if (ret)
543 return ret;
544
545 if (flags > U32_MAX)
546 return -EINVAL;
547 *to = flags;
548
549 return 0;
550}
551EXPORT_SYMBOL(uverbs_get_flags32);
552
553/*
554 * This is for ease of conversion. The purpose is to convert all drivers to
555 * use uverbs_attr_bundle instead of ib_udata. Assume attr == 0 is input and
556 * attr == 1 is output.
557 */
558void create_udata(struct uverbs_attr_bundle *bundle, struct ib_udata *udata)
559{
560 struct bundle_priv *pbundle =
561 container_of(bundle, struct bundle_priv, bundle);
562 const struct uverbs_attr *uhw_in =
563 uverbs_attr_get(bundle, UVERBS_ATTR_UHW_IN);
564 const struct uverbs_attr *uhw_out =
565 uverbs_attr_get(bundle, UVERBS_ATTR_UHW_OUT);
566
567 if (!IS_ERR(uhw_in)) {
568 udata->inlen = uhw_in->ptr_attr.len;
569 if (uverbs_attr_ptr_is_inline(uhw_in))
570 udata->inbuf =
571 &pbundle->user_attrs[uhw_in->ptr_attr.uattr_idx]
572 .data;
573 else
574 udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data);
415 } else { 575 } else {
416 err = -ENOIOCTLCMD; 576 udata->inbuf = NULL;
577 udata->inlen = 0;
417 } 578 }
418out:
419 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
420 579
421 return err; 580 if (!IS_ERR(uhw_out)) {
581 udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data);
582 udata->outlen = uhw_out->ptr_attr.len;
583 } else {
584 udata->outbuf = NULL;
585 udata->outlen = 0;
586 }
587}
588
589int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx,
590 const void *from, size_t size)
591{
592 struct bundle_priv *pbundle =
593 container_of(bundle, struct bundle_priv, bundle);
594 const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx);
595 u16 flags;
596 size_t min_size;
597
598 if (IS_ERR(attr))
599 return PTR_ERR(attr);
600
601 min_size = min_t(size_t, attr->ptr_attr.len, size);
602 if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size))
603 return -EFAULT;
604
605 flags = pbundle->uattrs[attr->ptr_attr.uattr_idx].flags |
606 UVERBS_ATTR_F_VALID_OUTPUT;
607 if (put_user(flags,
608 &pbundle->user_attrs[attr->ptr_attr.uattr_idx].flags))
609 return -EFAULT;
610
611 return 0;
422} 612}
613EXPORT_SYMBOL(uverbs_copy_to);
diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c
deleted file mode 100644
index 6ceb672c4d46..000000000000
--- a/drivers/infiniband/core/uverbs_ioctl_merge.c
+++ /dev/null
@@ -1,664 +0,0 @@
1/*
2 * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/uverbs_ioctl.h>
34#include <rdma/rdma_user_ioctl.h>
35#include <linux/bitops.h>
36#include "uverbs.h"
37
38#define UVERBS_NUM_NS (UVERBS_ID_NS_MASK >> UVERBS_ID_NS_SHIFT)
39#define GET_NS_ID(idx) (((idx) & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT)
40#define GET_ID(idx) ((idx) & ~UVERBS_ID_NS_MASK)
41
42#define _for_each_element(elem, tmpi, tmpj, hashes, num_buckets_offset, \
43 buckets_offset) \
44 for (tmpj = 0, \
45 elem = (*(const void ***)((hashes)[tmpi] + \
46 (buckets_offset)))[0]; \
47 tmpj < *(size_t *)((hashes)[tmpi] + (num_buckets_offset)); \
48 tmpj++) \
49 if ((elem = ((*(const void ***)(hashes[tmpi] + \
50 (buckets_offset)))[tmpj])))
51
52/*
53 * Iterate all elements of a few @hashes. The number of given hashes is
54 * indicated by @num_hashes. The offset of the number of buckets in the hash is
55 * represented by @num_buckets_offset, while the offset of the buckets array in
56 * the hash structure is represented by @buckets_offset. tmpi and tmpj are two
57 * short (or int) based indices that are given by the user. tmpi iterates over
58 * the different hashes. @elem points the current element in the hashes[tmpi]
59 * bucket we are looping on. To be honest, @hashes representation isn't exactly
60 * a hash, but more a collection of elements. These elements' ids are treated
61 * in a hash like manner, where the first upper bits are the bucket number.
62 * These elements are later mapped into a perfect-hash.
63 */
64#define for_each_element(elem, tmpi, tmpj, hashes, num_hashes, \
65 num_buckets_offset, buckets_offset) \
66 for (tmpi = 0; tmpi < (num_hashes); tmpi++) \
67 _for_each_element(elem, tmpi, tmpj, hashes, num_buckets_offset,\
68 buckets_offset)
69
70#define get_elements_iterators_entry_above(iters, num_elements, elements, \
71 num_objects_fld, objects_fld, bucket,\
72 min_id) \
73 get_elements_above_id((const void **)iters, num_elements, \
74 (const void **)(elements), \
75 offsetof(typeof(**elements), \
76 num_objects_fld), \
77 offsetof(typeof(**elements), objects_fld),\
78 offsetof(typeof(***(*elements)->objects_fld), id),\
79 bucket, min_id)
80
81#define get_objects_above_id(iters, num_trees, trees, bucket, min_id) \
82 get_elements_iterators_entry_above(iters, num_trees, trees, \
83 num_objects, objects, bucket, min_id)
84
85#define get_methods_above_id(method_iters, num_iters, iters, bucket, min_id)\
86 get_elements_iterators_entry_above(method_iters, num_iters, iters, \
87 num_methods, methods, bucket, min_id)
88
89#define get_attrs_above_id(attrs_iters, num_iters, iters, bucket, min_id)\
90 get_elements_iterators_entry_above(attrs_iters, num_iters, iters, \
91 num_attrs, attrs, bucket, min_id)
92
93/*
94 * get_elements_above_id get a few hashes represented by @elements and
95 * @num_elements. The hashes fields are described by @num_offset, @data_offset
96 * and @id_offset in the same way as required by for_each_element. The function
97 * returns an array of @iters, represents an array of elements in the hashes
98 * buckets, which their ids are the smallest ids in all hashes but are all
99 * larger than the id given by min_id. Elements are only added to the iters
100 * array if their id belongs to the bucket @bucket. The number of elements in
101 * the returned array is returned by the function. @min_id is also updated to
102 * reflect the new min_id of all elements in iters.
103 */
104static size_t get_elements_above_id(const void **iters,
105 unsigned int num_elements,
106 const void **elements,
107 size_t num_offset,
108 size_t data_offset,
109 size_t id_offset,
110 u16 bucket,
111 short *min_id)
112{
113 size_t num_iters = 0;
114 short min = SHRT_MAX;
115 const void *elem;
116 int i, j, last_stored = -1;
117 unsigned int equal_min = 0;
118
119 for_each_element(elem, i, j, elements, num_elements, num_offset,
120 data_offset) {
121 u16 id = *(u16 *)(elem + id_offset);
122
123 if (GET_NS_ID(id) != bucket)
124 continue;
125
126 if (GET_ID(id) < *min_id ||
127 (min != SHRT_MAX && GET_ID(id) > min))
128 continue;
129
130 /*
131 * We first iterate all hashes represented by @elements. When
132 * we do, we try to find an element @elem in the bucket @bucket
133 * which its id is min. Since we can't ensure the user sorted
134 * the elements in increasing order, we override this hash's
135 * minimal id element we found, if a new element with a smaller
136 * id was just found.
137 */
138 iters[last_stored == i ? num_iters - 1 : num_iters++] = elem;
139 last_stored = i;
140 if (min == GET_ID(id))
141 equal_min++;
142 else
143 equal_min = 1;
144 min = GET_ID(id);
145 }
146
147 /*
148 * We only insert to our iters array an element, if its id is smaller
149 * than all previous ids. Therefore, the final iters array is sorted so
150 * that smaller ids are in the end of the array.
151 * Therefore, we need to clean the beginning of the array to make sure
152 * all ids of final elements are equal to min.
153 */
154 memmove(iters, iters + num_iters - equal_min, sizeof(*iters) * equal_min);
155
156 *min_id = min;
157 return equal_min;
158}
159
160#define find_max_element_entry_id(num_elements, elements, num_objects_fld, \
161 objects_fld, bucket) \
162 find_max_element_id(num_elements, (const void **)(elements), \
163 offsetof(typeof(**elements), num_objects_fld), \
164 offsetof(typeof(**elements), objects_fld), \
165 offsetof(typeof(***(*elements)->objects_fld), id),\
166 bucket)
167
168static short find_max_element_ns_id(unsigned int num_elements,
169 const void **elements,
170 size_t num_offset,
171 size_t data_offset,
172 size_t id_offset)
173{
174 short max_ns = SHRT_MIN;
175 const void *elem;
176 int i, j;
177
178 for_each_element(elem, i, j, elements, num_elements, num_offset,
179 data_offset) {
180 u16 id = *(u16 *)(elem + id_offset);
181
182 if (GET_NS_ID(id) > max_ns)
183 max_ns = GET_NS_ID(id);
184 }
185
186 return max_ns;
187}
188
189static short find_max_element_id(unsigned int num_elements,
190 const void **elements,
191 size_t num_offset,
192 size_t data_offset,
193 size_t id_offset,
194 u16 bucket)
195{
196 short max_id = SHRT_MIN;
197 const void *elem;
198 int i, j;
199
200 for_each_element(elem, i, j, elements, num_elements, num_offset,
201 data_offset) {
202 u16 id = *(u16 *)(elem + id_offset);
203
204 if (GET_NS_ID(id) == bucket &&
205 GET_ID(id) > max_id)
206 max_id = GET_ID(id);
207 }
208 return max_id;
209}
210
211#define find_max_element_entry_id(num_elements, elements, num_objects_fld, \
212 objects_fld, bucket) \
213 find_max_element_id(num_elements, (const void **)(elements), \
214 offsetof(typeof(**elements), num_objects_fld), \
215 offsetof(typeof(**elements), objects_fld), \
216 offsetof(typeof(***(*elements)->objects_fld), id),\
217 bucket)
218
219#define find_max_element_ns_entry_id(num_elements, elements, \
220 num_objects_fld, objects_fld) \
221 find_max_element_ns_id(num_elements, (const void **)(elements), \
222 offsetof(typeof(**elements), num_objects_fld),\
223 offsetof(typeof(**elements), objects_fld), \
224 offsetof(typeof(***(*elements)->objects_fld), id))
225
226/*
227 * find_max_xxxx_ns_id gets a few elements. Each element is described by an id
228 * which its upper bits represents a namespace. It finds the max namespace. This
229 * could be used in order to know how many buckets do we need to allocate. If no
230 * elements exist, SHRT_MIN is returned. Namespace represents here different
231 * buckets. The common example is "common bucket" and "driver bucket".
232 *
233 * find_max_xxxx_id gets a few elements and a bucket. Each element is described
234 * by an id which its upper bits represent a namespace. It returns the max id
235 * which is contained in the same namespace defined in @bucket. This could be
236 * used in order to know how many elements do we need to allocate in the bucket.
237 * If no elements exist, SHRT_MIN is returned.
238 */
239
240#define find_max_object_id(num_trees, trees, bucket) \
241 find_max_element_entry_id(num_trees, trees, num_objects,\
242 objects, bucket)
243#define find_max_object_ns_id(num_trees, trees) \
244 find_max_element_ns_entry_id(num_trees, trees, \
245 num_objects, objects)
246
247#define find_max_method_id(num_iters, iters, bucket) \
248 find_max_element_entry_id(num_iters, iters, num_methods,\
249 methods, bucket)
250#define find_max_method_ns_id(num_iters, iters) \
251 find_max_element_ns_entry_id(num_iters, iters, \
252 num_methods, methods)
253
254#define find_max_attr_id(num_iters, iters, bucket) \
255 find_max_element_entry_id(num_iters, iters, num_attrs, \
256 attrs, bucket)
257#define find_max_attr_ns_id(num_iters, iters) \
258 find_max_element_ns_entry_id(num_iters, iters, \
259 num_attrs, attrs)
260
261static void free_method(struct uverbs_method_spec *method)
262{
263 unsigned int i;
264
265 if (!method)
266 return;
267
268 for (i = 0; i < method->num_buckets; i++)
269 kfree(method->attr_buckets[i]);
270
271 kfree(method);
272}
273
274#define IS_ATTR_OBJECT(attr) ((attr)->type == UVERBS_ATTR_TYPE_IDR || \
275 (attr)->type == UVERBS_ATTR_TYPE_FD)
276
277/*
278 * This function gets array of size @num_method_defs which contains pointers to
279 * method definitions @method_defs. The function allocates an
280 * uverbs_method_spec structure and initializes its number of buckets and the
281 * elements in buckets to the correct attributes. While doing that, it
282 * validates that there aren't conflicts between attributes of different
283 * method_defs.
284 */
285static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_method_def **method_defs,
286 size_t num_method_defs)
287{
288 int bucket_idx;
289 int max_attr_buckets = 0;
290 size_t num_attr_buckets = 0;
291 int res = 0;
292 struct uverbs_method_spec *method = NULL;
293 const struct uverbs_attr_def **attr_defs;
294 unsigned int num_of_singularities = 0;
295
296 max_attr_buckets = find_max_attr_ns_id(num_method_defs, method_defs);
297 if (max_attr_buckets >= 0)
298 num_attr_buckets = max_attr_buckets + 1;
299
300 method = kzalloc(struct_size(method, attr_buckets, num_attr_buckets),
301 GFP_KERNEL);
302 if (!method)
303 return ERR_PTR(-ENOMEM);
304
305 method->num_buckets = num_attr_buckets;
306 attr_defs = kcalloc(num_method_defs, sizeof(*attr_defs), GFP_KERNEL);
307 if (!attr_defs) {
308 res = -ENOMEM;
309 goto free_method;
310 }
311 for (bucket_idx = 0; bucket_idx < method->num_buckets; bucket_idx++) {
312 short min_id = SHRT_MIN;
313 int attr_max_bucket = 0;
314 struct uverbs_attr_spec_hash *hash = NULL;
315
316 attr_max_bucket = find_max_attr_id(num_method_defs, method_defs,
317 bucket_idx);
318 if (attr_max_bucket < 0)
319 continue;
320
321 hash = kzalloc(sizeof(*hash) +
322 ALIGN(sizeof(*hash->attrs) * (attr_max_bucket + 1),
323 sizeof(long)) +
324 BITS_TO_LONGS(attr_max_bucket + 1) * sizeof(long),
325 GFP_KERNEL);
326 if (!hash) {
327 res = -ENOMEM;
328 goto free;
329 }
330 hash->num_attrs = attr_max_bucket + 1;
331 method->num_child_attrs += hash->num_attrs;
332 hash->mandatory_attrs_bitmask = (void *)(hash + 1) +
333 ALIGN(sizeof(*hash->attrs) *
334 (attr_max_bucket + 1),
335 sizeof(long));
336
337 method->attr_buckets[bucket_idx] = hash;
338
339 do {
340 size_t num_attr_defs;
341 struct uverbs_attr_spec *attr;
342 bool attr_obj_with_special_access;
343
344 num_attr_defs =
345 get_attrs_above_id(attr_defs,
346 num_method_defs,
347 method_defs,
348 bucket_idx,
349 &min_id);
350 /* Last attr in bucket */
351 if (!num_attr_defs)
352 break;
353
354 if (num_attr_defs > 1) {
355 /*
356 * We don't allow two attribute definitions for
357 * the same attribute. This is usually a
358 * programmer error. If required, it's better to
359 * just add a new attribute to capture the new
360 * semantics.
361 */
362 res = -EEXIST;
363 goto free;
364 }
365
366 attr = &hash->attrs[min_id];
367 memcpy(attr, &attr_defs[0]->attr, sizeof(*attr));
368
369 attr_obj_with_special_access = IS_ATTR_OBJECT(attr) &&
370 (attr->obj.access == UVERBS_ACCESS_NEW ||
371 attr->obj.access == UVERBS_ACCESS_DESTROY);
372 num_of_singularities += !!attr_obj_with_special_access;
373 if (WARN(num_of_singularities > 1,
374 "ib_uverbs: Method contains more than one object attr (%d) with new/destroy access\n",
375 min_id) ||
376 WARN(attr_obj_with_special_access &&
377 !(attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY),
378 "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n",
379 min_id) ||
380 WARN(IS_ATTR_OBJECT(attr) &&
381 attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
382 "ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n",
383 min_id)) {
384 res = -EINVAL;
385 goto free;
386 }
387
388 if (attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY)
389 set_bit(min_id, hash->mandatory_attrs_bitmask);
390 min_id++;
391
392 } while (1);
393 }
394 kfree(attr_defs);
395 return method;
396
397free:
398 kfree(attr_defs);
399free_method:
400 free_method(method);
401 return ERR_PTR(res);
402}
403
404static void free_object(struct uverbs_object_spec *object)
405{
406 unsigned int i, j;
407
408 if (!object)
409 return;
410
411 for (i = 0; i < object->num_buckets; i++) {
412 struct uverbs_method_spec_hash *method_buckets =
413 object->method_buckets[i];
414
415 if (!method_buckets)
416 continue;
417
418 for (j = 0; j < method_buckets->num_methods; j++)
419 free_method(method_buckets->methods[j]);
420
421 kfree(method_buckets);
422 }
423
424 kfree(object);
425}
426
427/*
428 * This function gets array of size @num_object_defs which contains pointers to
429 * object definitions @object_defs. The function allocated an
430 * uverbs_object_spec structure and initialize its number of buckets and the
431 * elements in buckets to the correct methods. While doing that, it
432 * sorts out the correct relationship between conflicts in the same method.
433 */
434static struct uverbs_object_spec *build_object_with_methods(const struct uverbs_object_def **object_defs,
435 size_t num_object_defs)
436{
437 u16 bucket_idx;
438 int max_method_buckets = 0;
439 u16 num_method_buckets = 0;
440 int res = 0;
441 struct uverbs_object_spec *object = NULL;
442 const struct uverbs_method_def **method_defs;
443
444 max_method_buckets = find_max_method_ns_id(num_object_defs, object_defs);
445 if (max_method_buckets >= 0)
446 num_method_buckets = max_method_buckets + 1;
447
448 object = kzalloc(struct_size(object, method_buckets,
449 num_method_buckets),
450 GFP_KERNEL);
451 if (!object)
452 return ERR_PTR(-ENOMEM);
453
454 object->num_buckets = num_method_buckets;
455 method_defs = kcalloc(num_object_defs, sizeof(*method_defs), GFP_KERNEL);
456 if (!method_defs) {
457 res = -ENOMEM;
458 goto free_object;
459 }
460
461 for (bucket_idx = 0; bucket_idx < object->num_buckets; bucket_idx++) {
462 short min_id = SHRT_MIN;
463 int methods_max_bucket = 0;
464 struct uverbs_method_spec_hash *hash = NULL;
465
466 methods_max_bucket = find_max_method_id(num_object_defs, object_defs,
467 bucket_idx);
468 if (methods_max_bucket < 0)
469 continue;
470
471 hash = kzalloc(struct_size(hash, methods,
472 methods_max_bucket + 1),
473 GFP_KERNEL);
474 if (!hash) {
475 res = -ENOMEM;
476 goto free;
477 }
478
479 hash->num_methods = methods_max_bucket + 1;
480 object->method_buckets[bucket_idx] = hash;
481
482 do {
483 size_t num_method_defs;
484 struct uverbs_method_spec *method;
485 int i;
486
487 num_method_defs =
488 get_methods_above_id(method_defs,
489 num_object_defs,
490 object_defs,
491 bucket_idx,
492 &min_id);
493 /* Last method in bucket */
494 if (!num_method_defs)
495 break;
496
497 method = build_method_with_attrs(method_defs,
498 num_method_defs);
499 if (IS_ERR(method)) {
500 res = PTR_ERR(method);
501 goto free;
502 }
503
504 /*
505 * The last tree which is given as an argument to the
506 * merge overrides previous method handler.
507 * Therefore, we iterate backwards and search for the
508 * first handler which != NULL. This also defines the
509 * set of flags used for this handler.
510 */
511 for (i = num_method_defs - 1;
512 i >= 0 && !method_defs[i]->handler; i--)
513 ;
514 hash->methods[min_id++] = method;
515 /* NULL handler isn't allowed */
516 if (WARN(i < 0,
517 "ib_uverbs: tried to merge function id %d, but all handlers are NULL\n",
518 min_id)) {
519 res = -EINVAL;
520 goto free;
521 }
522 method->handler = method_defs[i]->handler;
523 method->flags = method_defs[i]->flags;
524
525 } while (1);
526 }
527 kfree(method_defs);
528 return object;
529
530free:
531 kfree(method_defs);
532free_object:
533 free_object(object);
534 return ERR_PTR(res);
535}
536
537void uverbs_free_spec_tree(struct uverbs_root_spec *root)
538{
539 unsigned int i, j;
540
541 if (!root)
542 return;
543
544 for (i = 0; i < root->num_buckets; i++) {
545 struct uverbs_object_spec_hash *object_hash =
546 root->object_buckets[i];
547
548 if (!object_hash)
549 continue;
550
551 for (j = 0; j < object_hash->num_objects; j++)
552 free_object(object_hash->objects[j]);
553
554 kfree(object_hash);
555 }
556
557 kfree(root);
558}
559EXPORT_SYMBOL(uverbs_free_spec_tree);
560
561struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees,
562 const struct uverbs_object_tree_def **trees)
563{
564 u16 bucket_idx;
565 short max_object_buckets = 0;
566 size_t num_objects_buckets = 0;
567 struct uverbs_root_spec *root_spec = NULL;
568 const struct uverbs_object_def **object_defs;
569 int i;
570 int res = 0;
571
572 max_object_buckets = find_max_object_ns_id(num_trees, trees);
573 /*
574 * Devices which don't want to support ib_uverbs, should just allocate
575 * an empty parsing tree. Every user-space command won't hit any valid
576 * entry in the parsing tree and thus will fail.
577 */
578 if (max_object_buckets >= 0)
579 num_objects_buckets = max_object_buckets + 1;
580
581 root_spec = kzalloc(struct_size(root_spec, object_buckets,
582 num_objects_buckets),
583 GFP_KERNEL);
584 if (!root_spec)
585 return ERR_PTR(-ENOMEM);
586 root_spec->num_buckets = num_objects_buckets;
587
588 object_defs = kcalloc(num_trees, sizeof(*object_defs),
589 GFP_KERNEL);
590 if (!object_defs) {
591 res = -ENOMEM;
592 goto free_root;
593 }
594
595 for (bucket_idx = 0; bucket_idx < root_spec->num_buckets; bucket_idx++) {
596 short min_id = SHRT_MIN;
597 short objects_max_bucket;
598 struct uverbs_object_spec_hash *hash = NULL;
599
600 objects_max_bucket = find_max_object_id(num_trees, trees,
601 bucket_idx);
602 if (objects_max_bucket < 0)
603 continue;
604
605 hash = kzalloc(struct_size(hash, objects,
606 objects_max_bucket + 1),
607 GFP_KERNEL);
608 if (!hash) {
609 res = -ENOMEM;
610 goto free;
611 }
612 hash->num_objects = objects_max_bucket + 1;
613 root_spec->object_buckets[bucket_idx] = hash;
614
615 do {
616 size_t num_object_defs;
617 struct uverbs_object_spec *object;
618
619 num_object_defs = get_objects_above_id(object_defs,
620 num_trees,
621 trees,
622 bucket_idx,
623 &min_id);
624 /* Last object in bucket */
625 if (!num_object_defs)
626 break;
627
628 object = build_object_with_methods(object_defs,
629 num_object_defs);
630 if (IS_ERR(object)) {
631 res = PTR_ERR(object);
632 goto free;
633 }
634
635 /*
636 * The last tree which is given as an argument to the
637 * merge overrides previous object's type_attrs.
638 * Therefore, we iterate backwards and search for the
639 * first type_attrs which != NULL.
640 */
641 for (i = num_object_defs - 1;
642 i >= 0 && !object_defs[i]->type_attrs; i--)
643 ;
644 /*
645 * NULL is a valid type_attrs. It means an object we
646 * can't instantiate (like DEVICE).
647 */
648 object->type_attrs = i < 0 ? NULL :
649 object_defs[i]->type_attrs;
650
651 hash->objects[min_id++] = object;
652 } while (1);
653 }
654
655 kfree(object_defs);
656 return root_spec;
657
658free:
659 kfree(object_defs);
660free_root:
661 uverbs_free_spec_tree(root_spec);
662 return ERR_PTR(res);
663}
664EXPORT_SYMBOL(uverbs_alloc_spec_tree);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 2094d136513d..823beca448e1 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -41,8 +41,6 @@
41#include <linux/fs.h> 41#include <linux/fs.h>
42#include <linux/poll.h> 42#include <linux/poll.h>
43#include <linux/sched.h> 43#include <linux/sched.h>
44#include <linux/sched/mm.h>
45#include <linux/sched/task.h>
46#include <linux/file.h> 44#include <linux/file.h>
47#include <linux/cdev.h> 45#include <linux/cdev.h>
48#include <linux/anon_inodes.h> 46#include <linux/anon_inodes.h>
@@ -77,7 +75,6 @@ static struct class *uverbs_class;
77static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); 75static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
78 76
79static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, 77static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
80 struct ib_device *ib_dev,
81 const char __user *buf, int in_len, 78 const char __user *buf, int in_len,
82 int out_len) = { 79 int out_len) = {
83 [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, 80 [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
@@ -118,7 +115,6 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
118}; 115};
119 116
120static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, 117static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
121 struct ib_device *ib_dev,
122 struct ib_udata *ucore, 118 struct ib_udata *ucore,
123 struct ib_udata *uhw) = { 119 struct ib_udata *uhw) = {
124 [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, 120 [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
@@ -138,6 +134,30 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
138static void ib_uverbs_add_one(struct ib_device *device); 134static void ib_uverbs_add_one(struct ib_device *device);
139static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); 135static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
140 136
137/*
138 * Must be called with the ufile->device->disassociate_srcu held, and the lock
139 * must be held until use of the ucontext is finished.
140 */
141struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile)
142{
143 /*
144 * We do not hold the hw_destroy_rwsem lock for this flow, instead
145 * srcu is used. It does not matter if someone races this with
146 * get_context, we get NULL or valid ucontext.
147 */
148 struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext);
149
150 if (!srcu_dereference(ufile->device->ib_dev,
151 &ufile->device->disassociate_srcu))
152 return ERR_PTR(-EIO);
153
154 if (!ucontext)
155 return ERR_PTR(-EINVAL);
156
157 return ucontext;
158}
159EXPORT_SYMBOL(ib_uverbs_get_ucontext);
160
141int uverbs_dealloc_mw(struct ib_mw *mw) 161int uverbs_dealloc_mw(struct ib_mw *mw)
142{ 162{
143 struct ib_pd *pd = mw->pd; 163 struct ib_pd *pd = mw->pd;
@@ -154,6 +174,7 @@ static void ib_uverbs_release_dev(struct kobject *kobj)
154 struct ib_uverbs_device *dev = 174 struct ib_uverbs_device *dev =
155 container_of(kobj, struct ib_uverbs_device, kobj); 175 container_of(kobj, struct ib_uverbs_device, kobj);
156 176
177 uverbs_destroy_api(dev->uapi);
157 cleanup_srcu_struct(&dev->disassociate_srcu); 178 cleanup_srcu_struct(&dev->disassociate_srcu);
158 kfree(dev); 179 kfree(dev);
159} 180}
@@ -184,7 +205,7 @@ void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
184 } 205 }
185 spin_unlock_irq(&ev_file->ev_queue.lock); 206 spin_unlock_irq(&ev_file->ev_queue.lock);
186 207
187 uverbs_uobject_put(&ev_file->uobj_file.uobj); 208 uverbs_uobject_put(&ev_file->uobj);
188 } 209 }
189 210
190 spin_lock_irq(&file->async_file->ev_queue.lock); 211 spin_lock_irq(&file->async_file->ev_queue.lock);
@@ -220,20 +241,6 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp,
220 } 241 }
221} 242}
222 243
223static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
224 struct ib_ucontext *context,
225 bool device_removed)
226{
227 context->closing = 1;
228 uverbs_cleanup_ucontext(context, device_removed);
229 put_pid(context->tgid);
230
231 ib_rdmacg_uncharge(&context->cg_obj, context->device,
232 RDMACG_RESOURCE_HCA_HANDLE);
233
234 return context->device->dealloc_ucontext(context);
235}
236
237static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) 244static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
238{ 245{
239 complete(&dev->comp); 246 complete(&dev->comp);
@@ -246,6 +253,8 @@ void ib_uverbs_release_file(struct kref *ref)
246 struct ib_device *ib_dev; 253 struct ib_device *ib_dev;
247 int srcu_key; 254 int srcu_key;
248 255
256 release_ufile_idr_uobject(file);
257
249 srcu_key = srcu_read_lock(&file->device->disassociate_srcu); 258 srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
250 ib_dev = srcu_dereference(file->device->ib_dev, 259 ib_dev = srcu_dereference(file->device->ib_dev,
251 &file->device->disassociate_srcu); 260 &file->device->disassociate_srcu);
@@ -338,7 +347,7 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
338 filp->private_data; 347 filp->private_data;
339 348
340 return ib_uverbs_event_read(&comp_ev_file->ev_queue, 349 return ib_uverbs_event_read(&comp_ev_file->ev_queue,
341 comp_ev_file->uobj_file.ufile, filp, 350 comp_ev_file->uobj.ufile, filp,
342 buf, count, pos, 351 buf, count, pos,
343 sizeof(struct ib_uverbs_comp_event_desc)); 352 sizeof(struct ib_uverbs_comp_event_desc));
344} 353}
@@ -420,7 +429,9 @@ static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
420 429
421static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp) 430static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
422{ 431{
423 struct ib_uverbs_completion_event_file *file = filp->private_data; 432 struct ib_uobject *uobj = filp->private_data;
433 struct ib_uverbs_completion_event_file *file = container_of(
434 uobj, struct ib_uverbs_completion_event_file, uobj);
424 struct ib_uverbs_event *entry, *tmp; 435 struct ib_uverbs_event *entry, *tmp;
425 436
426 spin_lock_irq(&file->ev_queue.lock); 437 spin_lock_irq(&file->ev_queue.lock);
@@ -528,7 +539,7 @@ void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
528 struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, 539 struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
529 struct ib_ucq_object, uobject); 540 struct ib_ucq_object, uobject);
530 541
531 ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle, 542 ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle,
532 event->event, &uobj->async_list, 543 event->event, &uobj->async_list,
533 &uobj->async_events_reported); 544 &uobj->async_events_reported);
534} 545}
@@ -637,13 +648,13 @@ err_put_refs:
637 return filp; 648 return filp;
638} 649}
639 650
640static bool verify_command_mask(struct ib_device *ib_dev, 651static bool verify_command_mask(struct ib_uverbs_file *ufile, u32 command,
641 u32 command, bool extended) 652 bool extended)
642{ 653{
643 if (!extended) 654 if (!extended)
644 return ib_dev->uverbs_cmd_mask & BIT_ULL(command); 655 return ufile->uverbs_cmd_mask & BIT_ULL(command);
645 656
646 return ib_dev->uverbs_ex_cmd_mask & BIT_ULL(command); 657 return ufile->uverbs_ex_cmd_mask & BIT_ULL(command);
647} 658}
648 659
649static bool verify_command_idx(u32 command, bool extended) 660static bool verify_command_idx(u32 command, bool extended)
@@ -713,7 +724,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
713{ 724{
714 struct ib_uverbs_file *file = filp->private_data; 725 struct ib_uverbs_file *file = filp->private_data;
715 struct ib_uverbs_ex_cmd_hdr ex_hdr; 726 struct ib_uverbs_ex_cmd_hdr ex_hdr;
716 struct ib_device *ib_dev;
717 struct ib_uverbs_cmd_hdr hdr; 727 struct ib_uverbs_cmd_hdr hdr;
718 bool extended; 728 bool extended;
719 int srcu_key; 729 int srcu_key;
@@ -748,24 +758,8 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
748 return ret; 758 return ret;
749 759
750 srcu_key = srcu_read_lock(&file->device->disassociate_srcu); 760 srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
751 ib_dev = srcu_dereference(file->device->ib_dev,
752 &file->device->disassociate_srcu);
753 if (!ib_dev) {
754 ret = -EIO;
755 goto out;
756 }
757
758 /*
759 * Must be after the ib_dev check, as once the RCU clears ib_dev ==
760 * NULL means ucontext == NULL
761 */
762 if (!file->ucontext &&
763 (command != IB_USER_VERBS_CMD_GET_CONTEXT || extended)) {
764 ret = -EINVAL;
765 goto out;
766 }
767 761
768 if (!verify_command_mask(ib_dev, command, extended)) { 762 if (!verify_command_mask(file, command, extended)) {
769 ret = -EOPNOTSUPP; 763 ret = -EOPNOTSUPP;
770 goto out; 764 goto out;
771 } 765 }
@@ -773,7 +767,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
773 buf += sizeof(hdr); 767 buf += sizeof(hdr);
774 768
775 if (!extended) { 769 if (!extended) {
776 ret = uverbs_cmd_table[command](file, ib_dev, buf, 770 ret = uverbs_cmd_table[command](file, buf,
777 hdr.in_words * 4, 771 hdr.in_words * 4,
778 hdr.out_words * 4); 772 hdr.out_words * 4);
779 } else { 773 } else {
@@ -792,7 +786,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
792 ex_hdr.provider_in_words * 8, 786 ex_hdr.provider_in_words * 8,
793 ex_hdr.provider_out_words * 8); 787 ex_hdr.provider_out_words * 8);
794 788
795 ret = uverbs_ex_cmd_table[command](file, ib_dev, &ucore, &uhw); 789 ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw);
796 ret = (ret) ? : count; 790 ret = (ret) ? : count;
797 } 791 }
798 792
@@ -804,22 +798,18 @@ out:
804static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) 798static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
805{ 799{
806 struct ib_uverbs_file *file = filp->private_data; 800 struct ib_uverbs_file *file = filp->private_data;
807 struct ib_device *ib_dev; 801 struct ib_ucontext *ucontext;
808 int ret = 0; 802 int ret = 0;
809 int srcu_key; 803 int srcu_key;
810 804
811 srcu_key = srcu_read_lock(&file->device->disassociate_srcu); 805 srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
812 ib_dev = srcu_dereference(file->device->ib_dev, 806 ucontext = ib_uverbs_get_ucontext(file);
813 &file->device->disassociate_srcu); 807 if (IS_ERR(ucontext)) {
814 if (!ib_dev) { 808 ret = PTR_ERR(ucontext);
815 ret = -EIO;
816 goto out; 809 goto out;
817 } 810 }
818 811
819 if (!file->ucontext) 812 ret = ucontext->device->mmap(ucontext, vma);
820 ret = -ENODEV;
821 else
822 ret = ib_dev->mmap(file->ucontext, vma);
823out: 813out:
824 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); 814 srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
825 return ret; 815 return ret;
@@ -879,13 +869,12 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
879 } 869 }
880 870
881 file->device = dev; 871 file->device = dev;
882 spin_lock_init(&file->idr_lock);
883 idr_init(&file->idr);
884 file->ucontext = NULL;
885 file->async_file = NULL;
886 kref_init(&file->ref); 872 kref_init(&file->ref);
887 mutex_init(&file->mutex); 873 mutex_init(&file->ucontext_lock);
888 mutex_init(&file->cleanup_mutex); 874
875 spin_lock_init(&file->uobjects_lock);
876 INIT_LIST_HEAD(&file->uobjects);
877 init_rwsem(&file->hw_destroy_rwsem);
889 878
890 filp->private_data = file; 879 filp->private_data = file;
891 kobject_get(&dev->kobj); 880 kobject_get(&dev->kobj);
@@ -893,6 +882,11 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
893 mutex_unlock(&dev->lists_mutex); 882 mutex_unlock(&dev->lists_mutex);
894 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 883 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
895 884
885 file->uverbs_cmd_mask = ib_dev->uverbs_cmd_mask;
886 file->uverbs_ex_cmd_mask = ib_dev->uverbs_ex_cmd_mask;
887
888 setup_ufile_idr_uobject(file);
889
896 return nonseekable_open(inode, filp); 890 return nonseekable_open(inode, filp);
897 891
898err_module: 892err_module:
@@ -911,13 +905,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
911{ 905{
912 struct ib_uverbs_file *file = filp->private_data; 906 struct ib_uverbs_file *file = filp->private_data;
913 907
914 mutex_lock(&file->cleanup_mutex); 908 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
915 if (file->ucontext) {
916 ib_uverbs_cleanup_ucontext(file, file->ucontext, false);
917 file->ucontext = NULL;
918 }
919 mutex_unlock(&file->cleanup_mutex);
920 idr_destroy(&file->idr);
921 909
922 mutex_lock(&file->device->lists_mutex); 910 mutex_lock(&file->device->lists_mutex);
923 if (!file->is_closed) { 911 if (!file->is_closed) {
@@ -1006,6 +994,19 @@ static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
1006static CLASS_ATTR_STRING(abi_version, S_IRUGO, 994static CLASS_ATTR_STRING(abi_version, S_IRUGO,
1007 __stringify(IB_USER_VERBS_ABI_VERSION)); 995 __stringify(IB_USER_VERBS_ABI_VERSION));
1008 996
997static int ib_uverbs_create_uapi(struct ib_device *device,
998 struct ib_uverbs_device *uverbs_dev)
999{
1000 struct uverbs_api *uapi;
1001
1002 uapi = uverbs_alloc_api(device->driver_specs, device->driver_id);
1003 if (IS_ERR(uapi))
1004 return PTR_ERR(uapi);
1005
1006 uverbs_dev->uapi = uapi;
1007 return 0;
1008}
1009
1009static void ib_uverbs_add_one(struct ib_device *device) 1010static void ib_uverbs_add_one(struct ib_device *device)
1010{ 1011{
1011 int devnum; 1012 int devnum;
@@ -1048,6 +1049,9 @@ static void ib_uverbs_add_one(struct ib_device *device)
1048 rcu_assign_pointer(uverbs_dev->ib_dev, device); 1049 rcu_assign_pointer(uverbs_dev->ib_dev, device);
1049 uverbs_dev->num_comp_vectors = device->num_comp_vectors; 1050 uverbs_dev->num_comp_vectors = device->num_comp_vectors;
1050 1051
1052 if (ib_uverbs_create_uapi(device, uverbs_dev))
1053 goto err;
1054
1051 cdev_init(&uverbs_dev->cdev, NULL); 1055 cdev_init(&uverbs_dev->cdev, NULL);
1052 uverbs_dev->cdev.owner = THIS_MODULE; 1056 uverbs_dev->cdev.owner = THIS_MODULE;
1053 uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; 1057 uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
@@ -1067,18 +1071,6 @@ static void ib_uverbs_add_one(struct ib_device *device)
1067 if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) 1071 if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
1068 goto err_class; 1072 goto err_class;
1069 1073
1070 if (!device->specs_root) {
1071 const struct uverbs_object_tree_def *default_root[] = {
1072 uverbs_default_get_objects()};
1073
1074 uverbs_dev->specs_root = uverbs_alloc_spec_tree(1,
1075 default_root);
1076 if (IS_ERR(uverbs_dev->specs_root))
1077 goto err_class;
1078
1079 device->specs_root = uverbs_dev->specs_root;
1080 }
1081
1082 ib_set_client_data(device, &uverbs_client, uverbs_dev); 1074 ib_set_client_data(device, &uverbs_client, uverbs_dev);
1083 1075
1084 return; 1076 return;
@@ -1098,44 +1090,6 @@ err:
1098 return; 1090 return;
1099} 1091}
1100 1092
1101static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext)
1102{
1103 struct ib_device *ib_dev = ibcontext->device;
1104 struct task_struct *owning_process = NULL;
1105 struct mm_struct *owning_mm = NULL;
1106
1107 owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
1108 if (!owning_process)
1109 return;
1110
1111 owning_mm = get_task_mm(owning_process);
1112 if (!owning_mm) {
1113 pr_info("no mm, disassociate ucontext is pending task termination\n");
1114 while (1) {
1115 put_task_struct(owning_process);
1116 usleep_range(1000, 2000);
1117 owning_process = get_pid_task(ibcontext->tgid,
1118 PIDTYPE_PID);
1119 if (!owning_process ||
1120 owning_process->state == TASK_DEAD) {
1121 pr_info("disassociate ucontext done, task was terminated\n");
1122 /* in case task was dead need to release the
1123 * task struct.
1124 */
1125 if (owning_process)
1126 put_task_struct(owning_process);
1127 return;
1128 }
1129 }
1130 }
1131
1132 down_write(&owning_mm->mmap_sem);
1133 ib_dev->disassociate_ucontext(ibcontext);
1134 up_write(&owning_mm->mmap_sem);
1135 mmput(owning_mm);
1136 put_task_struct(owning_process);
1137}
1138
1139static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, 1093static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
1140 struct ib_device *ib_dev) 1094 struct ib_device *ib_dev)
1141{ 1095{
@@ -1144,46 +1098,31 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
1144 struct ib_event event; 1098 struct ib_event event;
1145 1099
1146 /* Pending running commands to terminate */ 1100 /* Pending running commands to terminate */
1147 synchronize_srcu(&uverbs_dev->disassociate_srcu); 1101 uverbs_disassociate_api_pre(uverbs_dev);
1148 event.event = IB_EVENT_DEVICE_FATAL; 1102 event.event = IB_EVENT_DEVICE_FATAL;
1149 event.element.port_num = 0; 1103 event.element.port_num = 0;
1150 event.device = ib_dev; 1104 event.device = ib_dev;
1151 1105
1152 mutex_lock(&uverbs_dev->lists_mutex); 1106 mutex_lock(&uverbs_dev->lists_mutex);
1153 while (!list_empty(&uverbs_dev->uverbs_file_list)) { 1107 while (!list_empty(&uverbs_dev->uverbs_file_list)) {
1154 struct ib_ucontext *ucontext;
1155 file = list_first_entry(&uverbs_dev->uverbs_file_list, 1108 file = list_first_entry(&uverbs_dev->uverbs_file_list,
1156 struct ib_uverbs_file, list); 1109 struct ib_uverbs_file, list);
1157 file->is_closed = 1; 1110 file->is_closed = 1;
1158 list_del(&file->list); 1111 list_del(&file->list);
1159 kref_get(&file->ref); 1112 kref_get(&file->ref);
1160 mutex_unlock(&uverbs_dev->lists_mutex);
1161
1162
1163 mutex_lock(&file->cleanup_mutex);
1164 ucontext = file->ucontext;
1165 file->ucontext = NULL;
1166 mutex_unlock(&file->cleanup_mutex);
1167 1113
1168 /* At this point ib_uverbs_close cannot be running 1114 /* We must release the mutex before going ahead and calling
1169 * ib_uverbs_cleanup_ucontext 1115 * uverbs_cleanup_ufile, as it might end up indirectly calling
1116 * uverbs_close, for example due to freeing the resources (e.g
1117 * mmput).
1170 */ 1118 */
1171 if (ucontext) { 1119 mutex_unlock(&uverbs_dev->lists_mutex);
1172 /* We must release the mutex before going ahead and
1173 * calling disassociate_ucontext. disassociate_ucontext
1174 * might end up indirectly calling uverbs_close,
1175 * for example due to freeing the resources
1176 * (e.g mmput).
1177 */
1178 ib_uverbs_event_handler(&file->event_handler, &event);
1179 ib_uverbs_disassociate_ucontext(ucontext);
1180 mutex_lock(&file->cleanup_mutex);
1181 ib_uverbs_cleanup_ucontext(file, ucontext, true);
1182 mutex_unlock(&file->cleanup_mutex);
1183 }
1184 1120
1185 mutex_lock(&uverbs_dev->lists_mutex); 1121 ib_uverbs_event_handler(&file->event_handler, &event);
1122 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE);
1186 kref_put(&file->ref, ib_uverbs_release_file); 1123 kref_put(&file->ref, ib_uverbs_release_file);
1124
1125 mutex_lock(&uverbs_dev->lists_mutex);
1187 } 1126 }
1188 1127
1189 while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { 1128 while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
@@ -1205,6 +1144,8 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
1205 kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN); 1144 kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN);
1206 } 1145 }
1207 mutex_unlock(&uverbs_dev->lists_mutex); 1146 mutex_unlock(&uverbs_dev->lists_mutex);
1147
1148 uverbs_disassociate_api(uverbs_dev->uapi);
1208} 1149}
1209 1150
1210static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) 1151static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
@@ -1232,7 +1173,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
1232 * cdev was deleted, however active clients can still issue 1173 * cdev was deleted, however active clients can still issue
1233 * commands and close their open files. 1174 * commands and close their open files.
1234 */ 1175 */
1235 rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
1236 ib_uverbs_free_hw_resources(uverbs_dev, device); 1176 ib_uverbs_free_hw_resources(uverbs_dev, device);
1237 wait_clients = 0; 1177 wait_clients = 0;
1238 } 1178 }
@@ -1241,10 +1181,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
1241 ib_uverbs_comp_dev(uverbs_dev); 1181 ib_uverbs_comp_dev(uverbs_dev);
1242 if (wait_clients) 1182 if (wait_clients)
1243 wait_for_completion(&uverbs_dev->comp); 1183 wait_for_completion(&uverbs_dev->comp);
1244 if (uverbs_dev->specs_root) {
1245 uverbs_free_spec_tree(uverbs_dev->specs_root);
1246 device->specs_root = NULL;
1247 }
1248 1184
1249 kobject_put(&uverbs_dev->kobj); 1185 kobject_put(&uverbs_dev->kobj);
1250} 1186}
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index bb372b4713a4..b8d715c68ca4 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -211,7 +211,5 @@ void ib_copy_path_rec_from_user(struct sa_path_rec *dst,
211 211
212 /* TODO: No need to set this */ 212 /* TODO: No need to set this */
213 sa_path_set_dmac_zero(dst); 213 sa_path_set_dmac_zero(dst);
214 sa_path_set_ndev(dst, NULL);
215 sa_path_set_ifindex(dst, 0);
216} 214}
217EXPORT_SYMBOL(ib_copy_path_rec_from_user); 215EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index b570acbd94af..203cc96ac6f5 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -48,14 +48,18 @@ static int uverbs_free_ah(struct ib_uobject *uobject,
48static int uverbs_free_flow(struct ib_uobject *uobject, 48static int uverbs_free_flow(struct ib_uobject *uobject,
49 enum rdma_remove_reason why) 49 enum rdma_remove_reason why)
50{ 50{
51 int ret;
52 struct ib_flow *flow = (struct ib_flow *)uobject->object; 51 struct ib_flow *flow = (struct ib_flow *)uobject->object;
53 struct ib_uflow_object *uflow = 52 struct ib_uflow_object *uflow =
54 container_of(uobject, struct ib_uflow_object, uobject); 53 container_of(uobject, struct ib_uflow_object, uobject);
54 struct ib_qp *qp = flow->qp;
55 int ret;
55 56
56 ret = ib_destroy_flow(flow); 57 ret = flow->device->destroy_flow(flow);
57 if (!ret) 58 if (!ret) {
59 if (qp)
60 atomic_dec(&qp->usecnt);
58 ib_uverbs_flow_resources_free(uflow->resources); 61 ib_uverbs_flow_resources_free(uflow->resources);
62 }
59 63
60 return ret; 64 return ret;
61} 65}
@@ -74,6 +78,13 @@ static int uverbs_free_qp(struct ib_uobject *uobject,
74 container_of(uobject, struct ib_uqp_object, uevent.uobject); 78 container_of(uobject, struct ib_uqp_object, uevent.uobject);
75 int ret; 79 int ret;
76 80
81 /*
82 * If this is a user triggered destroy then do not allow destruction
83 * until the user cleans up all the mcast bindings. Unlike in other
84 * places we forcibly clean up the mcast attachments for !DESTROY
85 * because the mcast attaches are not ubojects and will not be
86 * destroyed by anything else during cleanup processing.
87 */
77 if (why == RDMA_REMOVE_DESTROY) { 88 if (why == RDMA_REMOVE_DESTROY) {
78 if (!list_empty(&uqp->mcast_list)) 89 if (!list_empty(&uqp->mcast_list))
79 return -EBUSY; 90 return -EBUSY;
@@ -82,7 +93,7 @@ static int uverbs_free_qp(struct ib_uobject *uobject,
82 } 93 }
83 94
84 ret = ib_destroy_qp(qp); 95 ret = ib_destroy_qp(qp);
85 if (ret && why == RDMA_REMOVE_DESTROY) 96 if (ib_is_destroy_retryable(ret, why, uobject))
86 return ret; 97 return ret;
87 98
88 if (uqp->uxrcd) 99 if (uqp->uxrcd)
@@ -100,8 +111,10 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
100 int ret; 111 int ret;
101 112
102 ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); 113 ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
103 if (!ret || why != RDMA_REMOVE_DESTROY) 114 if (ib_is_destroy_retryable(ret, why, uobject))
104 kfree(ind_tbl); 115 return ret;
116
117 kfree(ind_tbl);
105 return ret; 118 return ret;
106} 119}
107 120
@@ -114,8 +127,10 @@ static int uverbs_free_wq(struct ib_uobject *uobject,
114 int ret; 127 int ret;
115 128
116 ret = ib_destroy_wq(wq); 129 ret = ib_destroy_wq(wq);
117 if (!ret || why != RDMA_REMOVE_DESTROY) 130 if (ib_is_destroy_retryable(ret, why, uobject))
118 ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent); 131 return ret;
132
133 ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent);
119 return ret; 134 return ret;
120} 135}
121 136
@@ -129,8 +144,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject,
129 int ret; 144 int ret;
130 145
131 ret = ib_destroy_srq(srq); 146 ret = ib_destroy_srq(srq);
132 147 if (ib_is_destroy_retryable(ret, why, uobject))
133 if (ret && why == RDMA_REMOVE_DESTROY)
134 return ret; 148 return ret;
135 149
136 if (srq_type == IB_SRQT_XRC) { 150 if (srq_type == IB_SRQT_XRC) {
@@ -152,12 +166,12 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject,
152 container_of(uobject, struct ib_uxrcd_object, uobject); 166 container_of(uobject, struct ib_uxrcd_object, uobject);
153 int ret; 167 int ret;
154 168
169 ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject);
170 if (ret)
171 return ret;
172
155 mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex); 173 mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex);
156 if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt)) 174 ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why);
157 ret = -EBUSY;
158 else
159 ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device,
160 xrcd, why);
161 mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex); 175 mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex);
162 176
163 return ret; 177 return ret;
@@ -167,20 +181,22 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
167 enum rdma_remove_reason why) 181 enum rdma_remove_reason why)
168{ 182{
169 struct ib_pd *pd = uobject->object; 183 struct ib_pd *pd = uobject->object;
184 int ret;
170 185
171 if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt)) 186 ret = ib_destroy_usecnt(&pd->usecnt, why, uobject);
172 return -EBUSY; 187 if (ret)
188 return ret;
173 189
174 ib_dealloc_pd((struct ib_pd *)uobject->object); 190 ib_dealloc_pd((struct ib_pd *)uobject->object);
175 return 0; 191 return 0;
176} 192}
177 193
178static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file, 194static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj,
179 enum rdma_remove_reason why) 195 enum rdma_remove_reason why)
180{ 196{
181 struct ib_uverbs_completion_event_file *comp_event_file = 197 struct ib_uverbs_completion_event_file *comp_event_file =
182 container_of(uobj_file, struct ib_uverbs_completion_event_file, 198 container_of(uobj, struct ib_uverbs_completion_event_file,
183 uobj_file); 199 uobj);
184 struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue; 200 struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue;
185 201
186 spin_lock_irq(&event_queue->lock); 202 spin_lock_irq(&event_queue->lock);
@@ -194,119 +210,77 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_
194 return 0; 210 return 0;
195}; 211};
196 212
197int uverbs_destroy_def_handler(struct ib_device *ib_dev, 213int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
198 struct ib_uverbs_file *file,
199 struct uverbs_attr_bundle *attrs) 214 struct uverbs_attr_bundle *attrs)
200{ 215{
201 return 0; 216 return 0;
202} 217}
218EXPORT_SYMBOL(uverbs_destroy_def_handler);
203 219
204/* 220DECLARE_UVERBS_NAMED_OBJECT(
205 * This spec is used in order to pass information to the hardware driver in a 221 UVERBS_OBJECT_COMP_CHANNEL,
206 * legacy way. Every verb that could get driver specific data should get this 222 UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file),
207 * spec. 223 uverbs_hot_unplug_completion_event_file,
208 */ 224 &uverbs_event_fops,
209const struct uverbs_attr_def uverbs_uhw_compat_in = 225 "[infinibandevent]",
210 UVERBS_ATTR_PTR_IN_SZ(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_SIZE(0, USHRT_MAX), 226 O_RDONLY));
211 UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
212const struct uverbs_attr_def uverbs_uhw_compat_out =
213 UVERBS_ATTR_PTR_OUT_SZ(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_SIZE(0, USHRT_MAX),
214 UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
215
216void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata)
217{
218 /*
219 * This is for ease of conversion. The purpose is to convert all drivers
220 * to use uverbs_attr_bundle instead of ib_udata.
221 * Assume attr == 0 is input and attr == 1 is output.
222 */
223 const struct uverbs_attr *uhw_in =
224 uverbs_attr_get(ctx, UVERBS_ATTR_UHW_IN);
225 const struct uverbs_attr *uhw_out =
226 uverbs_attr_get(ctx, UVERBS_ATTR_UHW_OUT);
227
228 if (!IS_ERR(uhw_in)) {
229 udata->inlen = uhw_in->ptr_attr.len;
230 if (uverbs_attr_ptr_is_inline(uhw_in))
231 udata->inbuf = &uhw_in->uattr->data;
232 else
233 udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data);
234 } else {
235 udata->inbuf = NULL;
236 udata->inlen = 0;
237 }
238
239 if (!IS_ERR(uhw_out)) {
240 udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data);
241 udata->outlen = uhw_out->ptr_attr.len;
242 } else {
243 udata->outbuf = NULL;
244 udata->outlen = 0;
245 }
246}
247
248DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COMP_CHANNEL,
249 &UVERBS_TYPE_ALLOC_FD(0,
250 sizeof(struct ib_uverbs_completion_event_file),
251 uverbs_hot_unplug_completion_event_file,
252 &uverbs_event_fops,
253 "[infinibandevent]", O_RDONLY));
254 227
255DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_QP, 228DECLARE_UVERBS_NAMED_OBJECT(
256 &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0, 229 UVERBS_OBJECT_QP,
257 uverbs_free_qp)); 230 UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp));
258 231
259DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW, 232DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW,
260 &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw)); 233 UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw));
261 234
262DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_SRQ, 235DECLARE_UVERBS_NAMED_OBJECT(
263 &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0, 236 UVERBS_OBJECT_SRQ,
264 uverbs_free_srq)); 237 UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object),
238 uverbs_free_srq));
265 239
266DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH, 240DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH,
267 &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah)); 241 UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah));
268 242
269DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW, 243DECLARE_UVERBS_NAMED_OBJECT(
270 &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object), 244 UVERBS_OBJECT_FLOW,
271 0, uverbs_free_flow)); 245 UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
246 uverbs_free_flow));
272 247
273DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_WQ, 248DECLARE_UVERBS_NAMED_OBJECT(
274 &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0, 249 UVERBS_OBJECT_WQ,
275 uverbs_free_wq)); 250 UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq));
276 251
277DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL, 252DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL,
278 &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl)); 253 UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl));
279 254
280DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_XRCD, 255DECLARE_UVERBS_NAMED_OBJECT(
281 &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0, 256 UVERBS_OBJECT_XRCD,
282 uverbs_free_xrcd)); 257 UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object),
258 uverbs_free_xrcd));
283 259
284DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD, 260DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD,
285 /* 2 is used in order to free the PD after MRs */ 261 UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd));
286 &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd)); 262
287 263DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE);
288DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL); 264
289 265DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
290static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects, 266 &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE),
291 &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE), 267 &UVERBS_OBJECT(UVERBS_OBJECT_PD),
292 &UVERBS_OBJECT(UVERBS_OBJECT_PD), 268 &UVERBS_OBJECT(UVERBS_OBJECT_MR),
293 &UVERBS_OBJECT(UVERBS_OBJECT_MR), 269 &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL),
294 &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL), 270 &UVERBS_OBJECT(UVERBS_OBJECT_CQ),
295 &UVERBS_OBJECT(UVERBS_OBJECT_CQ), 271 &UVERBS_OBJECT(UVERBS_OBJECT_QP),
296 &UVERBS_OBJECT(UVERBS_OBJECT_QP), 272 &UVERBS_OBJECT(UVERBS_OBJECT_AH),
297 &UVERBS_OBJECT(UVERBS_OBJECT_AH), 273 &UVERBS_OBJECT(UVERBS_OBJECT_MW),
298 &UVERBS_OBJECT(UVERBS_OBJECT_MW), 274 &UVERBS_OBJECT(UVERBS_OBJECT_SRQ),
299 &UVERBS_OBJECT(UVERBS_OBJECT_SRQ), 275 &UVERBS_OBJECT(UVERBS_OBJECT_FLOW),
300 &UVERBS_OBJECT(UVERBS_OBJECT_FLOW), 276 &UVERBS_OBJECT(UVERBS_OBJECT_WQ),
301 &UVERBS_OBJECT(UVERBS_OBJECT_WQ), 277 &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
302 &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL), 278 &UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
303 &UVERBS_OBJECT(UVERBS_OBJECT_XRCD), 279 &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
304 &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION), 280 &UVERBS_OBJECT(UVERBS_OBJECT_DM),
305 &UVERBS_OBJECT(UVERBS_OBJECT_DM), 281 &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS));
306 &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS));
307 282
308const struct uverbs_object_tree_def *uverbs_default_get_objects(void) 283const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
309{ 284{
310 return &uverbs_default_objects; 285 return &uverbs_default_objects;
311} 286}
312EXPORT_SYMBOL_GPL(uverbs_default_get_objects);
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index 03b182a684a6..a0ffdcf9a51c 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -38,20 +38,22 @@ static int uverbs_free_counters(struct ib_uobject *uobject,
38 enum rdma_remove_reason why) 38 enum rdma_remove_reason why)
39{ 39{
40 struct ib_counters *counters = uobject->object; 40 struct ib_counters *counters = uobject->object;
41 int ret;
41 42
42 if (why == RDMA_REMOVE_DESTROY && 43 ret = ib_destroy_usecnt(&counters->usecnt, why, uobject);
43 atomic_read(&counters->usecnt)) 44 if (ret)
44 return -EBUSY; 45 return ret;
45 46
46 return counters->device->destroy_counters(counters); 47 return counters->device->destroy_counters(counters);
47} 48}
48 49
49static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_dev, 50static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
50 struct ib_uverbs_file *file, 51 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
51 struct uverbs_attr_bundle *attrs)
52{ 52{
53 struct ib_uobject *uobj = uverbs_attr_get_uobject(
54 attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE);
55 struct ib_device *ib_dev = uobj->context->device;
53 struct ib_counters *counters; 56 struct ib_counters *counters;
54 struct ib_uobject *uobj;
55 int ret; 57 int ret;
56 58
57 /* 59 /*
@@ -62,7 +64,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_de
62 if (!ib_dev->create_counters) 64 if (!ib_dev->create_counters)
63 return -EOPNOTSUPP; 65 return -EOPNOTSUPP;
64 66
65 uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE);
66 counters = ib_dev->create_counters(ib_dev, attrs); 67 counters = ib_dev->create_counters(ib_dev, attrs);
67 if (IS_ERR(counters)) { 68 if (IS_ERR(counters)) {
68 ret = PTR_ERR(counters); 69 ret = PTR_ERR(counters);
@@ -80,9 +81,8 @@ err_create_counters:
80 return ret; 81 return ret;
81} 82}
82 83
83static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev, 84static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(
84 struct ib_uverbs_file *file, 85 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
85 struct uverbs_attr_bundle *attrs)
86{ 86{
87 struct ib_counters_read_attr read_attr = {}; 87 struct ib_counters_read_attr read_attr = {};
88 const struct uverbs_attr *uattr; 88 const struct uverbs_attr *uattr;
@@ -90,68 +90,62 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev,
90 uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE); 90 uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE);
91 int ret; 91 int ret;
92 92
93 if (!ib_dev->read_counters) 93 if (!counters->device->read_counters)
94 return -EOPNOTSUPP; 94 return -EOPNOTSUPP;
95 95
96 if (!atomic_read(&counters->usecnt)) 96 if (!atomic_read(&counters->usecnt))
97 return -EINVAL; 97 return -EINVAL;
98 98
99 ret = uverbs_copy_from(&read_attr.flags, attrs, 99 ret = uverbs_get_flags32(&read_attr.flags, attrs,
100 UVERBS_ATTR_READ_COUNTERS_FLAGS); 100 UVERBS_ATTR_READ_COUNTERS_FLAGS,
101 IB_UVERBS_READ_COUNTERS_PREFER_CACHED);
101 if (ret) 102 if (ret)
102 return ret; 103 return ret;
103 104
104 uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF); 105 uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF);
105 read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64); 106 read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64);
106 read_attr.counters_buff = kcalloc(read_attr.ncounters, 107 read_attr.counters_buff = uverbs_zalloc(
107 sizeof(u64), GFP_KERNEL); 108 attrs, array_size(read_attr.ncounters, sizeof(u64)));
108 if (!read_attr.counters_buff) 109 if (IS_ERR(read_attr.counters_buff))
109 return -ENOMEM; 110 return PTR_ERR(read_attr.counters_buff);
110
111 ret = ib_dev->read_counters(counters,
112 &read_attr,
113 attrs);
114 if (ret)
115 goto err_read;
116 111
117 ret = uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF, 112 ret = counters->device->read_counters(counters, &read_attr, attrs);
118 read_attr.counters_buff, 113 if (ret)
119 read_attr.ncounters * sizeof(u64)); 114 return ret;
120 115
121err_read: 116 return uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF,
122 kfree(read_attr.counters_buff); 117 read_attr.counters_buff,
123 return ret; 118 read_attr.ncounters * sizeof(u64));
124} 119}
125 120
126static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE, 121DECLARE_UVERBS_NAMED_METHOD(
127 &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE, 122 UVERBS_METHOD_COUNTERS_CREATE,
128 UVERBS_OBJECT_COUNTERS, 123 UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
129 UVERBS_ACCESS_NEW, 124 UVERBS_OBJECT_COUNTERS,
130 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); 125 UVERBS_ACCESS_NEW,
131 126 UA_MANDATORY));
132static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_COUNTERS_DESTROY, 127
133 uverbs_destroy_def_handler, 128DECLARE_UVERBS_NAMED_METHOD_DESTROY(
134 &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE, 129 UVERBS_METHOD_COUNTERS_DESTROY,
135 UVERBS_OBJECT_COUNTERS, 130 UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
136 UVERBS_ACCESS_DESTROY, 131 UVERBS_OBJECT_COUNTERS,
137 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); 132 UVERBS_ACCESS_DESTROY,
138 133 UA_MANDATORY));
139#define MAX_COUNTERS_BUFF_SIZE USHRT_MAX 134
140static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ, 135DECLARE_UVERBS_NAMED_METHOD(
141 &UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE, 136 UVERBS_METHOD_COUNTERS_READ,
142 UVERBS_OBJECT_COUNTERS, 137 UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE,
143 UVERBS_ACCESS_READ, 138 UVERBS_OBJECT_COUNTERS,
144 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), 139 UVERBS_ACCESS_READ,
145 &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF, 140 UA_MANDATORY),
146 UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE), 141 UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF,
147 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), 142 UVERBS_ATTR_MIN_SIZE(0),
148 &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS, 143 UA_MANDATORY),
149 UVERBS_ATTR_TYPE(__u32), 144 UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
150 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); 145 enum ib_uverbs_read_counters_flags));
151 146
152DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS, 147DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
153 &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters), 148 UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters),
154 &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE), 149 &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
155 &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY), 150 &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY),
156 &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ)); 151 &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ));
157
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index 3d293d01afea..5b5f2052cd52 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -44,21 +44,26 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
44 int ret; 44 int ret;
45 45
46 ret = ib_destroy_cq(cq); 46 ret = ib_destroy_cq(cq);
47 if (!ret || why != RDMA_REMOVE_DESTROY) 47 if (ib_is_destroy_retryable(ret, why, uobject))
48 ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ? 48 return ret;
49 container_of(ev_queue, 49
50 struct ib_uverbs_completion_event_file, 50 ib_uverbs_release_ucq(
51 ev_queue) : NULL, 51 uobject->context->ufile,
52 ucq); 52 ev_queue ? container_of(ev_queue,
53 struct ib_uverbs_completion_event_file,
54 ev_queue) :
55 NULL,
56 ucq);
53 return ret; 57 return ret;
54} 58}
55 59
56static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev, 60static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
57 struct ib_uverbs_file *file, 61 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
58 struct uverbs_attr_bundle *attrs)
59{ 62{
60 struct ib_ucontext *ucontext = file->ucontext; 63 struct ib_ucq_object *obj = container_of(
61 struct ib_ucq_object *obj; 64 uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE),
65 typeof(*obj), uobject);
66 struct ib_device *ib_dev = obj->uobject.context->device;
62 struct ib_udata uhw; 67 struct ib_udata uhw;
63 int ret; 68 int ret;
64 u64 user_handle; 69 u64 user_handle;
@@ -67,7 +72,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
67 struct ib_uverbs_completion_event_file *ev_file = NULL; 72 struct ib_uverbs_completion_event_file *ev_file = NULL;
68 struct ib_uobject *ev_file_uobj; 73 struct ib_uobject *ev_file_uobj;
69 74
70 if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ)) 75 if (!ib_dev->create_cq || !ib_dev->destroy_cq)
71 return -EOPNOTSUPP; 76 return -EOPNOTSUPP;
72 77
73 ret = uverbs_copy_from(&attr.comp_vector, attrs, 78 ret = uverbs_copy_from(&attr.comp_vector, attrs,
@@ -81,28 +86,26 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
81 if (ret) 86 if (ret)
82 return ret; 87 return ret;
83 88
84 /* Optional param, if it doesn't exist, we get -ENOENT and skip it */ 89 ret = uverbs_get_flags32(&attr.flags, attrs,
85 if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&attr.flags, attrs, 90 UVERBS_ATTR_CREATE_CQ_FLAGS,
86 UVERBS_ATTR_CREATE_CQ_FLAGS))) 91 IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION |
87 return -EFAULT; 92 IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN);
93 if (ret)
94 return ret;
88 95
89 ev_file_uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL); 96 ev_file_uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL);
90 if (!IS_ERR(ev_file_uobj)) { 97 if (!IS_ERR(ev_file_uobj)) {
91 ev_file = container_of(ev_file_uobj, 98 ev_file = container_of(ev_file_uobj,
92 struct ib_uverbs_completion_event_file, 99 struct ib_uverbs_completion_event_file,
93 uobj_file.uobj); 100 uobj);
94 uverbs_uobject_get(ev_file_uobj); 101 uverbs_uobject_get(ev_file_uobj);
95 } 102 }
96 103
97 if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) { 104 if (attr.comp_vector >= file->device->num_comp_vectors) {
98 ret = -EINVAL; 105 ret = -EINVAL;
99 goto err_event_file; 106 goto err_event_file;
100 } 107 }
101 108
102 obj = container_of(uverbs_attr_get_uobject(attrs,
103 UVERBS_ATTR_CREATE_CQ_HANDLE),
104 typeof(*obj), uobject);
105 obj->uverbs_file = ucontext->ufile;
106 obj->comp_events_reported = 0; 109 obj->comp_events_reported = 0;
107 obj->async_events_reported = 0; 110 obj->async_events_reported = 0;
108 INIT_LIST_HEAD(&obj->comp_list); 111 INIT_LIST_HEAD(&obj->comp_list);
@@ -111,7 +114,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
111 /* Temporary, only until drivers get the new uverbs_attr_bundle */ 114 /* Temporary, only until drivers get the new uverbs_attr_bundle */
112 create_udata(attrs, &uhw); 115 create_udata(attrs, &uhw);
113 116
114 cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw); 117 cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, &uhw);
115 if (IS_ERR(cq)) { 118 if (IS_ERR(cq)) {
116 ret = PTR_ERR(cq); 119 ret = PTR_ERR(cq);
117 goto err_event_file; 120 goto err_event_file;
@@ -143,69 +146,64 @@ err_event_file:
143 return ret; 146 return ret;
144}; 147};
145 148
146static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE, 149DECLARE_UVERBS_NAMED_METHOD(
147 &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ, 150 UVERBS_METHOD_CQ_CREATE,
148 UVERBS_ACCESS_NEW, 151 UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE,
149 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), 152 UVERBS_OBJECT_CQ,
150 &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE, 153 UVERBS_ACCESS_NEW,
154 UA_MANDATORY),
155 UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE,
156 UVERBS_ATTR_TYPE(u32),
157 UA_MANDATORY),
158 UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE,
159 UVERBS_ATTR_TYPE(u64),
160 UA_MANDATORY),
161 UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL,
162 UVERBS_OBJECT_COMP_CHANNEL,
163 UVERBS_ACCESS_READ,
164 UA_OPTIONAL),
165 UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR,
166 UVERBS_ATTR_TYPE(u32),
167 UA_MANDATORY),
168 UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_CQ_FLAGS,
169 enum ib_uverbs_ex_create_cq_flags),
170 UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE,
151 UVERBS_ATTR_TYPE(u32), 171 UVERBS_ATTR_TYPE(u32),
152 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), 172 UA_MANDATORY),
153 &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE, 173 UVERBS_ATTR_UHW());
154 UVERBS_ATTR_TYPE(u64), 174
155 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), 175static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(
156 &UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL, 176 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
157 UVERBS_OBJECT_COMP_CHANNEL,
158 UVERBS_ACCESS_READ),
159 &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, UVERBS_ATTR_TYPE(u32),
160 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
161 &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)),
162 &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32),
163 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
164 &uverbs_uhw_compat_in, &uverbs_uhw_compat_out);
165
166static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
167 struct ib_uverbs_file *file,
168 struct uverbs_attr_bundle *attrs)
169{ 177{
170 struct ib_uobject *uobj = 178 struct ib_uobject *uobj =
171 uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE); 179 uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE);
172 struct ib_uverbs_destroy_cq_resp resp; 180 struct ib_ucq_object *obj =
173 struct ib_ucq_object *obj; 181 container_of(uobj, struct ib_ucq_object, uobject);
174 int ret; 182 struct ib_uverbs_destroy_cq_resp resp = {
175 183 .comp_events_reported = obj->comp_events_reported,
176 if (IS_ERR(uobj)) 184 .async_events_reported = obj->async_events_reported
177 return PTR_ERR(uobj); 185 };
178
179 obj = container_of(uobj, struct ib_ucq_object, uobject);
180
181 if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ))
182 return -EOPNOTSUPP;
183
184 ret = rdma_explicit_destroy(uobj);
185 if (ret)
186 return ret;
187
188 resp.comp_events_reported = obj->comp_events_reported;
189 resp.async_events_reported = obj->async_events_reported;
190 186
191 return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp, 187 return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp,
192 sizeof(resp)); 188 sizeof(resp));
193} 189}
194 190
195static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY, 191DECLARE_UVERBS_NAMED_METHOD(
196 &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ, 192 UVERBS_METHOD_CQ_DESTROY,
197 UVERBS_ACCESS_DESTROY, 193 UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE,
198 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), 194 UVERBS_OBJECT_CQ,
199 &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP, 195 UVERBS_ACCESS_DESTROY,
200 UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp), 196 UA_MANDATORY),
201 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); 197 UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP,
202 198 UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp),
203DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_CQ, 199 UA_MANDATORY));
204 &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0, 200
205 uverbs_free_cq), 201DECLARE_UVERBS_NAMED_OBJECT(
202 UVERBS_OBJECT_CQ,
203 UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq),
204
206#if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI) 205#if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI)
207 &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE), 206 &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
208 &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY) 207 &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
209#endif 208#endif
210 ); 209);
211
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
index 8b681575b615..edc3ff7733d4 100644
--- a/drivers/infiniband/core/uverbs_std_types_dm.c
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -37,20 +37,24 @@ static int uverbs_free_dm(struct ib_uobject *uobject,
37 enum rdma_remove_reason why) 37 enum rdma_remove_reason why)
38{ 38{
39 struct ib_dm *dm = uobject->object; 39 struct ib_dm *dm = uobject->object;
40 int ret;
40 41
41 if (why == RDMA_REMOVE_DESTROY && atomic_read(&dm->usecnt)) 42 ret = ib_destroy_usecnt(&dm->usecnt, why, uobject);
42 return -EBUSY; 43 if (ret)
44 return ret;
43 45
44 return dm->device->dealloc_dm(dm); 46 return dm->device->dealloc_dm(dm);
45} 47}
46 48
47static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev, 49static int
48 struct ib_uverbs_file *file, 50UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file,
49 struct uverbs_attr_bundle *attrs) 51 struct uverbs_attr_bundle *attrs)
50{ 52{
51 struct ib_ucontext *ucontext = file->ucontext;
52 struct ib_dm_alloc_attr attr = {}; 53 struct ib_dm_alloc_attr attr = {};
53 struct ib_uobject *uobj; 54 struct ib_uobject *uobj =
55 uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)
56 ->obj_attr.uobject;
57 struct ib_device *ib_dev = uobj->context->device;
54 struct ib_dm *dm; 58 struct ib_dm *dm;
55 int ret; 59 int ret;
56 60
@@ -67,9 +71,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
67 if (ret) 71 if (ret)
68 return ret; 72 return ret;
69 73
70 uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)->obj_attr.uobject; 74 dm = ib_dev->alloc_dm(ib_dev, uobj->context, &attr, attrs);
71
72 dm = ib_dev->alloc_dm(ib_dev, ucontext, &attr, attrs);
73 if (IS_ERR(dm)) 75 if (IS_ERR(dm))
74 return PTR_ERR(dm); 76 return PTR_ERR(dm);
75 77
@@ -83,26 +85,27 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
83 return 0; 85 return 0;
84} 86}
85 87
86static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC, 88DECLARE_UVERBS_NAMED_METHOD(
87 &UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, UVERBS_OBJECT_DM, 89 UVERBS_METHOD_DM_ALLOC,
88 UVERBS_ACCESS_NEW, 90 UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE,
89 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), 91 UVERBS_OBJECT_DM,
90 &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH, 92 UVERBS_ACCESS_NEW,
91 UVERBS_ATTR_TYPE(u64), 93 UA_MANDATORY),
92 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), 94 UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH,
93 &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT, 95 UVERBS_ATTR_TYPE(u64),
94 UVERBS_ATTR_TYPE(u32), 96 UA_MANDATORY),
95 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); 97 UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT,
96 98 UVERBS_ATTR_TYPE(u32),
97static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_DM_FREE, 99 UA_MANDATORY));
98 uverbs_destroy_def_handler, 100
99 &UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE, 101DECLARE_UVERBS_NAMED_METHOD_DESTROY(
100 UVERBS_OBJECT_DM, 102 UVERBS_METHOD_DM_FREE,
101 UVERBS_ACCESS_DESTROY, 103 UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE,
102 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); 104 UVERBS_OBJECT_DM,
105 UVERBS_ACCESS_DESTROY,
106 UA_MANDATORY));
103 107
104DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM, 108DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM,
105 /* 1 is used in order to free the DM after MRs */ 109 UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm),
106 &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_dm),
107 &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC), 110 &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC),
108 &UVERBS_METHOD(UVERBS_METHOD_DM_FREE)); 111 &UVERBS_METHOD(UVERBS_METHOD_DM_FREE));
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index a7be51cf2e42..d8cfafe23bd9 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -37,10 +37,11 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject,
37 enum rdma_remove_reason why) 37 enum rdma_remove_reason why)
38{ 38{
39 struct ib_flow_action *action = uobject->object; 39 struct ib_flow_action *action = uobject->object;
40 int ret;
40 41
41 if (why == RDMA_REMOVE_DESTROY && 42 ret = ib_destroy_usecnt(&action->usecnt, why, uobject);
42 atomic_read(&action->usecnt)) 43 if (ret)
43 return -EBUSY; 44 return ret;
44 45
45 return action->device->destroy_flow_action(action); 46 return action->device->destroy_flow_action(action);
46} 47}
@@ -303,12 +304,13 @@ static int parse_flow_action_esp(struct ib_device *ib_dev,
303 return 0; 304 return 0;
304} 305}
305 306
306static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device *ib_dev, 307static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
307 struct ib_uverbs_file *file, 308 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
308 struct uverbs_attr_bundle *attrs)
309{ 309{
310 struct ib_uobject *uobj = uverbs_attr_get_uobject(
311 attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE);
312 struct ib_device *ib_dev = uobj->context->device;
310 int ret; 313 int ret;
311 struct ib_uobject *uobj;
312 struct ib_flow_action *action; 314 struct ib_flow_action *action;
313 struct ib_flow_action_esp_attr esp_attr = {}; 315 struct ib_flow_action_esp_attr esp_attr = {};
314 316
@@ -320,7 +322,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device
320 return ret; 322 return ret;
321 323
322 /* No need to check as this attribute is marked as MANDATORY */ 324 /* No need to check as this attribute is marked as MANDATORY */
323 uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE);
324 action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs); 325 action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs);
325 if (IS_ERR(action)) 326 if (IS_ERR(action))
326 return PTR_ERR(action); 327 return PTR_ERR(action);
@@ -334,102 +335,109 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device
334 return 0; 335 return 0;
335} 336}
336 337
337static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device *ib_dev, 338static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(
338 struct ib_uverbs_file *file, 339 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
339 struct uverbs_attr_bundle *attrs)
340{ 340{
341 struct ib_uobject *uobj = uverbs_attr_get_uobject(
342 attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE);
343 struct ib_flow_action *action = uobj->object;
341 int ret; 344 int ret;
342 struct ib_uobject *uobj;
343 struct ib_flow_action *action;
344 struct ib_flow_action_esp_attr esp_attr = {}; 345 struct ib_flow_action_esp_attr esp_attr = {};
345 346
346 if (!ib_dev->modify_flow_action_esp) 347 if (!action->device->modify_flow_action_esp)
347 return -EOPNOTSUPP; 348 return -EOPNOTSUPP;
348 349
349 ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, true); 350 ret = parse_flow_action_esp(action->device, file, attrs, &esp_attr,
351 true);
350 if (ret) 352 if (ret)
351 return ret; 353 return ret;
352 354
353 uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE);
354 action = uobj->object;
355
356 if (action->type != IB_FLOW_ACTION_ESP) 355 if (action->type != IB_FLOW_ACTION_ESP)
357 return -EINVAL; 356 return -EINVAL;
358 357
359 return ib_dev->modify_flow_action_esp(action, 358 return action->device->modify_flow_action_esp(action, &esp_attr.hdr,
360 &esp_attr.hdr, 359 attrs);
361 attrs);
362} 360}
363 361
364static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = { 362static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
365 [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = { 363 [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = {
366 { .ptr = { 364 .type = UVERBS_ATTR_TYPE_PTR_IN,
367 .type = UVERBS_ATTR_TYPE_PTR_IN, 365 UVERBS_ATTR_STRUCT(
368 UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm), 366 struct ib_uverbs_flow_action_esp_keymat_aes_gcm,
369 .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO, 367 aes_key),
370 } },
371 }, 368 },
372}; 369};
373 370
374static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = { 371static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
375 [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = { 372 [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = {
376 { .ptr = { 373 .type = UVERBS_ATTR_TYPE_PTR_IN,
377 .type = UVERBS_ATTR_TYPE_PTR_IN, 374 UVERBS_ATTR_NO_DATA(),
378 /* No need to specify any data */
379 .len = 0,
380 } }
381 }, 375 },
382 [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = { 376 [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
383 { .ptr = { 377 .type = UVERBS_ATTR_TYPE_PTR_IN,
384 .type = UVERBS_ATTR_TYPE_PTR_IN, 378 UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp,
385 UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size), 379 size),
386 .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
387 } }
388 }, 380 },
389}; 381};
390 382
391static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, 383DECLARE_UVERBS_NAMED_METHOD(
392 &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION, 384 UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
393 UVERBS_ACCESS_NEW, 385 UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
394 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), 386 UVERBS_OBJECT_FLOW_ACTION,
395 &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, 387 UVERBS_ACCESS_NEW,
396 UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts), 388 UA_MANDATORY),
397 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY | 389 UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
398 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), 390 UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
399 &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)), 391 hard_limit_pkts),
400 &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, 392 UA_MANDATORY),
401 uverbs_flow_action_esp_keymat, 393 UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
402 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), 394 UVERBS_ATTR_TYPE(__u32),
403 &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, 395 UA_OPTIONAL),
404 uverbs_flow_action_esp_replay), 396 UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
405 &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, 397 uverbs_flow_action_esp_keymat,
406 UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type))); 398 UA_MANDATORY),
407 399 UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
408static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY, 400 uverbs_flow_action_esp_replay,
409 &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION, 401 UA_OPTIONAL),
410 UVERBS_ACCESS_WRITE, 402 UVERBS_ATTR_PTR_IN(
411 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), 403 UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
412 &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, 404 UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
413 UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts), 405 UA_OPTIONAL));
414 UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)), 406
415 &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)), 407DECLARE_UVERBS_NAMED_METHOD(
416 &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, 408 UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
417 uverbs_flow_action_esp_keymat), 409 UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE,
418 &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY, 410 UVERBS_OBJECT_FLOW_ACTION,
419 uverbs_flow_action_esp_replay), 411 UVERBS_ACCESS_WRITE,
420 &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, 412 UA_MANDATORY),
421 UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type))); 413 UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
422 414 UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
423static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_FLOW_ACTION_DESTROY, 415 hard_limit_pkts),
424 uverbs_destroy_def_handler, 416 UA_OPTIONAL),
425 &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, 417 UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
426 UVERBS_OBJECT_FLOW_ACTION, 418 UVERBS_ATTR_TYPE(__u32),
427 UVERBS_ACCESS_DESTROY, 419 UA_OPTIONAL),
428 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); 420 UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
429 421 uverbs_flow_action_esp_keymat,
430DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW_ACTION, 422 UA_OPTIONAL),
431 &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow_action), 423 UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
432 &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE), 424 uverbs_flow_action_esp_replay,
433 &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY), 425 UA_OPTIONAL),
434 &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)); 426 UVERBS_ATTR_PTR_IN(
435 427 UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
428 UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
429 UA_OPTIONAL));
430
431DECLARE_UVERBS_NAMED_METHOD_DESTROY(
432 UVERBS_METHOD_FLOW_ACTION_DESTROY,
433 UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
434 UVERBS_OBJECT_FLOW_ACTION,
435 UVERBS_ACCESS_DESTROY,
436 UA_MANDATORY));
437
438DECLARE_UVERBS_NAMED_OBJECT(
439 UVERBS_OBJECT_FLOW_ACTION,
440 UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action),
441 &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
442 &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
443 &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index 68f7cadf088f..cf02e774303e 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -39,14 +39,18 @@ static int uverbs_free_mr(struct ib_uobject *uobject,
39 return ib_dereg_mr((struct ib_mr *)uobject->object); 39 return ib_dereg_mr((struct ib_mr *)uobject->object);
40} 40}
41 41
42static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev, 42static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
43 struct ib_uverbs_file *file, 43 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
44 struct uverbs_attr_bundle *attrs)
45{ 44{
46 struct ib_dm_mr_attr attr = {}; 45 struct ib_dm_mr_attr attr = {};
47 struct ib_uobject *uobj; 46 struct ib_uobject *uobj =
48 struct ib_dm *dm; 47 uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE);
49 struct ib_pd *pd; 48 struct ib_dm *dm =
49 uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE);
50 struct ib_pd *pd =
51 uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE);
52 struct ib_device *ib_dev = pd->device;
53
50 struct ib_mr *mr; 54 struct ib_mr *mr;
51 int ret; 55 int ret;
52 56
@@ -62,8 +66,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev,
62 if (ret) 66 if (ret)
63 return ret; 67 return ret;
64 68
65 ret = uverbs_copy_from(&attr.access_flags, attrs, 69 ret = uverbs_get_flags32(&attr.access_flags, attrs,
66 UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS); 70 UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
71 IB_ACCESS_SUPPORTED);
67 if (ret) 72 if (ret)
68 return ret; 73 return ret;
69 74
@@ -74,12 +79,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev,
74 if (ret) 79 if (ret)
75 return ret; 80 return ret;
76 81
77 pd = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE);
78
79 dm = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE);
80
81 uobj = uverbs_attr_get(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE)->obj_attr.uobject;
82
83 if (attr.offset > dm->length || attr.length > dm->length || 82 if (attr.offset > dm->length || attr.length > dm->length ||
84 attr.length > dm->length - attr.offset) 83 attr.length > dm->length - attr.offset)
85 return -EINVAL; 84 return -EINVAL;
@@ -115,33 +114,36 @@ err_dereg:
115 return ret; 114 return ret;
116} 115}
117 116
118static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG, 117DECLARE_UVERBS_NAMED_METHOD(
119 &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR, 118 UVERBS_METHOD_DM_MR_REG,
120 UVERBS_ACCESS_NEW, 119 UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE,
121 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), 120 UVERBS_OBJECT_MR,
122 &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET, 121 UVERBS_ACCESS_NEW,
123 UVERBS_ATTR_TYPE(u64), 122 UA_MANDATORY),
124 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), 123 UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET,
125 &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH, 124 UVERBS_ATTR_TYPE(u64),
126 UVERBS_ATTR_TYPE(u64), 125 UA_MANDATORY),
127 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), 126 UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH,
128 &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, UVERBS_OBJECT_PD, 127 UVERBS_ATTR_TYPE(u64),
129 UVERBS_ACCESS_READ, 128 UA_MANDATORY),
130 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), 129 UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE,
131 &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS, 130 UVERBS_OBJECT_PD,
131 UVERBS_ACCESS_READ,
132 UA_MANDATORY),
133 UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
134 enum ib_access_flags),
135 UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE,
136 UVERBS_OBJECT_DM,
137 UVERBS_ACCESS_READ,
138 UA_MANDATORY),
139 UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY,
140 UVERBS_ATTR_TYPE(u32),
141 UA_MANDATORY),
142 UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
132 UVERBS_ATTR_TYPE(u32), 143 UVERBS_ATTR_TYPE(u32),
133 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), 144 UA_MANDATORY));
134 &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, UVERBS_OBJECT_DM, 145
135 UVERBS_ACCESS_READ, 146DECLARE_UVERBS_NAMED_OBJECT(
136 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), 147 UVERBS_OBJECT_MR,
137 &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY, 148 UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
138 UVERBS_ATTR_TYPE(u32), 149 &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
139 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
140 &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
141 UVERBS_ATTR_TYPE(u32),
142 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
143
144DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MR,
145 /* 1 is used in order to free the MR after all the MWs */
146 &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr),
147 &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
new file mode 100644
index 000000000000..73ea6f0db88f
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -0,0 +1,346 @@
1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/*
3 * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
4 */
5#include <rdma/uverbs_ioctl.h>
6#include <rdma/rdma_user_ioctl.h>
7#include <linux/bitops.h>
8#include "rdma_core.h"
9#include "uverbs.h"
10
11static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size)
12{
13 void *elm;
14 int rc;
15
16 if (key == UVERBS_API_KEY_ERR)
17 return ERR_PTR(-EOVERFLOW);
18
19 elm = kzalloc(alloc_size, GFP_KERNEL);
20 rc = radix_tree_insert(&uapi->radix, key, elm);
21 if (rc) {
22 kfree(elm);
23 return ERR_PTR(rc);
24 }
25
26 return elm;
27}
28
29static int uapi_merge_method(struct uverbs_api *uapi,
30 struct uverbs_api_object *obj_elm, u32 obj_key,
31 const struct uverbs_method_def *method,
32 bool is_driver)
33{
34 u32 method_key = obj_key | uapi_key_ioctl_method(method->id);
35 struct uverbs_api_ioctl_method *method_elm;
36 unsigned int i;
37
38 if (!method->attrs)
39 return 0;
40
41 method_elm = uapi_add_elm(uapi, method_key, sizeof(*method_elm));
42 if (IS_ERR(method_elm)) {
43 if (method_elm != ERR_PTR(-EEXIST))
44 return PTR_ERR(method_elm);
45
46 /*
47 * This occurs when a driver uses ADD_UVERBS_ATTRIBUTES_SIMPLE
48 */
49 if (WARN_ON(method->handler))
50 return -EINVAL;
51 method_elm = radix_tree_lookup(&uapi->radix, method_key);
52 if (WARN_ON(!method_elm))
53 return -EINVAL;
54 } else {
55 WARN_ON(!method->handler);
56 rcu_assign_pointer(method_elm->handler, method->handler);
57 if (method->handler != uverbs_destroy_def_handler)
58 method_elm->driver_method = is_driver;
59 }
60
61 for (i = 0; i != method->num_attrs; i++) {
62 const struct uverbs_attr_def *attr = (*method->attrs)[i];
63 struct uverbs_api_attr *attr_slot;
64
65 if (!attr)
66 continue;
67
68 /*
69 * ENUM_IN contains the 'ids' pointer to the driver's .rodata,
70 * so if it is specified by a driver then it always makes this
71 * into a driver method.
72 */
73 if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN)
74 method_elm->driver_method |= is_driver;
75
76 attr_slot =
77 uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id),
78 sizeof(*attr_slot));
79 /* Attributes are not allowed to be modified by drivers */
80 if (IS_ERR(attr_slot))
81 return PTR_ERR(attr_slot);
82
83 attr_slot->spec = attr->attr;
84 }
85
86 return 0;
87}
88
89static int uapi_merge_tree(struct uverbs_api *uapi,
90 const struct uverbs_object_tree_def *tree,
91 bool is_driver)
92{
93 unsigned int i, j;
94 int rc;
95
96 if (!tree->objects)
97 return 0;
98
99 for (i = 0; i != tree->num_objects; i++) {
100 const struct uverbs_object_def *obj = (*tree->objects)[i];
101 struct uverbs_api_object *obj_elm;
102 u32 obj_key;
103
104 if (!obj)
105 continue;
106
107 obj_key = uapi_key_obj(obj->id);
108 obj_elm = uapi_add_elm(uapi, obj_key, sizeof(*obj_elm));
109 if (IS_ERR(obj_elm)) {
110 if (obj_elm != ERR_PTR(-EEXIST))
111 return PTR_ERR(obj_elm);
112
113 /* This occurs when a driver uses ADD_UVERBS_METHODS */
114 if (WARN_ON(obj->type_attrs))
115 return -EINVAL;
116 obj_elm = radix_tree_lookup(&uapi->radix, obj_key);
117 if (WARN_ON(!obj_elm))
118 return -EINVAL;
119 } else {
120 obj_elm->type_attrs = obj->type_attrs;
121 if (obj->type_attrs) {
122 obj_elm->type_class =
123 obj->type_attrs->type_class;
124 /*
125 * Today drivers are only permitted to use
126 * idr_class types. They cannot use FD types
127 * because we currently have no way to revoke
128 * the fops pointer after device
129 * disassociation.
130 */
131 if (WARN_ON(is_driver &&
132 obj->type_attrs->type_class !=
133 &uverbs_idr_class))
134 return -EINVAL;
135 }
136 }
137
138 if (!obj->methods)
139 continue;
140
141 for (j = 0; j != obj->num_methods; j++) {
142 const struct uverbs_method_def *method =
143 (*obj->methods)[j];
144 if (!method)
145 continue;
146
147 rc = uapi_merge_method(uapi, obj_elm, obj_key, method,
148 is_driver);
149 if (rc)
150 return rc;
151 }
152 }
153
154 return 0;
155}
156
157static int
158uapi_finalize_ioctl_method(struct uverbs_api *uapi,
159 struct uverbs_api_ioctl_method *method_elm,
160 u32 method_key)
161{
162 struct radix_tree_iter iter;
163 unsigned int num_attrs = 0;
164 unsigned int max_bkey = 0;
165 bool single_uobj = false;
166 void __rcu **slot;
167
168 method_elm->destroy_bkey = UVERBS_API_ATTR_BKEY_LEN;
169 radix_tree_for_each_slot (slot, &uapi->radix, &iter,
170 uapi_key_attrs_start(method_key)) {
171 struct uverbs_api_attr *elm =
172 rcu_dereference_protected(*slot, true);
173 u32 attr_key = iter.index & UVERBS_API_ATTR_KEY_MASK;
174 u32 attr_bkey = uapi_bkey_attr(attr_key);
175 u8 type = elm->spec.type;
176
177 if (uapi_key_attr_to_method(iter.index) !=
178 uapi_key_attr_to_method(method_key))
179 break;
180
181 if (elm->spec.mandatory)
182 __set_bit(attr_bkey, method_elm->attr_mandatory);
183
184 if (type == UVERBS_ATTR_TYPE_IDR ||
185 type == UVERBS_ATTR_TYPE_FD) {
186 u8 access = elm->spec.u.obj.access;
187
188 /*
189 * Verbs specs may only have one NEW/DESTROY, we don't
190 * have the infrastructure to abort multiple NEW's or
191 * cope with multiple DESTROY failure.
192 */
193 if (access == UVERBS_ACCESS_NEW ||
194 access == UVERBS_ACCESS_DESTROY) {
195 if (WARN_ON(single_uobj))
196 return -EINVAL;
197
198 single_uobj = true;
199 if (WARN_ON(!elm->spec.mandatory))
200 return -EINVAL;
201 }
202
203 if (access == UVERBS_ACCESS_DESTROY)
204 method_elm->destroy_bkey = attr_bkey;
205 }
206
207 max_bkey = max(max_bkey, attr_bkey);
208 num_attrs++;
209 }
210
211 method_elm->key_bitmap_len = max_bkey + 1;
212 WARN_ON(method_elm->key_bitmap_len > UVERBS_API_ATTR_BKEY_LEN);
213
214 uapi_compute_bundle_size(method_elm, num_attrs);
215 return 0;
216}
217
218static int uapi_finalize(struct uverbs_api *uapi)
219{
220 struct radix_tree_iter iter;
221 void __rcu **slot;
222 int rc;
223
224 radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
225 struct uverbs_api_ioctl_method *method_elm =
226 rcu_dereference_protected(*slot, true);
227
228 if (uapi_key_is_ioctl_method(iter.index)) {
229 rc = uapi_finalize_ioctl_method(uapi, method_elm,
230 iter.index);
231 if (rc)
232 return rc;
233 }
234 }
235
236 return 0;
237}
238
239void uverbs_destroy_api(struct uverbs_api *uapi)
240{
241 struct radix_tree_iter iter;
242 void __rcu **slot;
243
244 if (!uapi)
245 return;
246
247 radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
248 kfree(rcu_dereference_protected(*slot, true));
249 radix_tree_iter_delete(&uapi->radix, &iter, slot);
250 }
251}
252
253struct uverbs_api *uverbs_alloc_api(
254 const struct uverbs_object_tree_def *const *driver_specs,
255 enum rdma_driver_id driver_id)
256{
257 struct uverbs_api *uapi;
258 int rc;
259
260 uapi = kzalloc(sizeof(*uapi), GFP_KERNEL);
261 if (!uapi)
262 return ERR_PTR(-ENOMEM);
263
264 INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL);
265 uapi->driver_id = driver_id;
266
267 rc = uapi_merge_tree(uapi, uverbs_default_get_objects(), false);
268 if (rc)
269 goto err;
270
271 for (; driver_specs && *driver_specs; driver_specs++) {
272 rc = uapi_merge_tree(uapi, *driver_specs, true);
273 if (rc)
274 goto err;
275 }
276
277 rc = uapi_finalize(uapi);
278 if (rc)
279 goto err;
280
281 return uapi;
282err:
283 if (rc != -ENOMEM)
284 pr_err("Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n",
285 rc);
286
287 uverbs_destroy_api(uapi);
288 return ERR_PTR(rc);
289}
290
291/*
292 * The pre version is done before destroying the HW objects, it only blocks
293 * off method access. All methods that require the ib_dev or the module data
294 * must test one of these assignments prior to continuing.
295 */
296void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev)
297{
298 struct uverbs_api *uapi = uverbs_dev->uapi;
299 struct radix_tree_iter iter;
300 void __rcu **slot;
301
302 rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
303
304 radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
305 if (uapi_key_is_ioctl_method(iter.index)) {
306 struct uverbs_api_ioctl_method *method_elm =
307 rcu_dereference_protected(*slot, true);
308
309 if (method_elm->driver_method)
310 rcu_assign_pointer(method_elm->handler, NULL);
311 }
312 }
313
314 synchronize_srcu(&uverbs_dev->disassociate_srcu);
315}
316
317/*
318 * Called when a driver disassociates from the ib_uverbs_device. The
319 * assumption is that the driver module will unload after. Replace everything
320 * related to the driver with NULL as a safety measure.
321 */
322void uverbs_disassociate_api(struct uverbs_api *uapi)
323{
324 struct radix_tree_iter iter;
325 void __rcu **slot;
326
327 radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
328 if (uapi_key_is_object(iter.index)) {
329 struct uverbs_api_object *object_elm =
330 rcu_dereference_protected(*slot, true);
331
332 /*
333 * Some type_attrs are in the driver module. We don't
334 * bother to keep track of which since there should be
335 * no use of this after disassociate.
336 */
337 object_elm->type_attrs = NULL;
338 } else if (uapi_key_is_attr(iter.index)) {
339 struct uverbs_api_attr *elm =
340 rcu_dereference_protected(*slot, true);
341
342 if (elm->spec.type == UVERBS_ATTR_TYPE_ENUM_IN)
343 elm->spec.u2.enum_def.ids = NULL;
344 }
345 }
346}
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 9d6beb948535..6ee03d6089eb 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -326,12 +326,162 @@ EXPORT_SYMBOL(ib_dealloc_pd);
326 326
327/* Address handles */ 327/* Address handles */
328 328
329/**
330 * rdma_copy_ah_attr - Copy rdma ah attribute from source to destination.
331 * @dest: Pointer to destination ah_attr. Contents of the destination
332 * pointer is assumed to be invalid and attribute are overwritten.
333 * @src: Pointer to source ah_attr.
334 */
335void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
336 const struct rdma_ah_attr *src)
337{
338 *dest = *src;
339 if (dest->grh.sgid_attr)
340 rdma_hold_gid_attr(dest->grh.sgid_attr);
341}
342EXPORT_SYMBOL(rdma_copy_ah_attr);
343
344/**
345 * rdma_replace_ah_attr - Replace valid ah_attr with new new one.
346 * @old: Pointer to existing ah_attr which needs to be replaced.
347 * old is assumed to be valid or zero'd
348 * @new: Pointer to the new ah_attr.
349 *
350 * rdma_replace_ah_attr() first releases any reference in the old ah_attr if
351 * old the ah_attr is valid; after that it copies the new attribute and holds
352 * the reference to the replaced ah_attr.
353 */
354void rdma_replace_ah_attr(struct rdma_ah_attr *old,
355 const struct rdma_ah_attr *new)
356{
357 rdma_destroy_ah_attr(old);
358 *old = *new;
359 if (old->grh.sgid_attr)
360 rdma_hold_gid_attr(old->grh.sgid_attr);
361}
362EXPORT_SYMBOL(rdma_replace_ah_attr);
363
364/**
365 * rdma_move_ah_attr - Move ah_attr pointed by source to destination.
366 * @dest: Pointer to destination ah_attr to copy to.
367 * dest is assumed to be valid or zero'd
368 * @src: Pointer to the new ah_attr.
369 *
370 * rdma_move_ah_attr() first releases any reference in the destination ah_attr
371 * if it is valid. This also transfers ownership of internal references from
372 * src to dest, making src invalid in the process. No new reference of the src
373 * ah_attr is taken.
374 */
375void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src)
376{
377 rdma_destroy_ah_attr(dest);
378 *dest = *src;
379 src->grh.sgid_attr = NULL;
380}
381EXPORT_SYMBOL(rdma_move_ah_attr);
382
383/*
384 * Validate that the rdma_ah_attr is valid for the device before passing it
385 * off to the driver.
386 */
387static int rdma_check_ah_attr(struct ib_device *device,
388 struct rdma_ah_attr *ah_attr)
389{
390 if (!rdma_is_port_valid(device, ah_attr->port_num))
391 return -EINVAL;
392
393 if ((rdma_is_grh_required(device, ah_attr->port_num) ||
394 ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) &&
395 !(ah_attr->ah_flags & IB_AH_GRH))
396 return -EINVAL;
397
398 if (ah_attr->grh.sgid_attr) {
399 /*
400 * Make sure the passed sgid_attr is consistent with the
401 * parameters
402 */
403 if (ah_attr->grh.sgid_attr->index != ah_attr->grh.sgid_index ||
404 ah_attr->grh.sgid_attr->port_num != ah_attr->port_num)
405 return -EINVAL;
406 }
407 return 0;
408}
409
410/*
411 * If the ah requires a GRH then ensure that sgid_attr pointer is filled in.
412 * On success the caller is responsible to call rdma_unfill_sgid_attr().
413 */
414static int rdma_fill_sgid_attr(struct ib_device *device,
415 struct rdma_ah_attr *ah_attr,
416 const struct ib_gid_attr **old_sgid_attr)
417{
418 const struct ib_gid_attr *sgid_attr;
419 struct ib_global_route *grh;
420 int ret;
421
422 *old_sgid_attr = ah_attr->grh.sgid_attr;
423
424 ret = rdma_check_ah_attr(device, ah_attr);
425 if (ret)
426 return ret;
427
428 if (!(ah_attr->ah_flags & IB_AH_GRH))
429 return 0;
430
431 grh = rdma_ah_retrieve_grh(ah_attr);
432 if (grh->sgid_attr)
433 return 0;
434
435 sgid_attr =
436 rdma_get_gid_attr(device, ah_attr->port_num, grh->sgid_index);
437 if (IS_ERR(sgid_attr))
438 return PTR_ERR(sgid_attr);
439
440 /* Move ownerhip of the kref into the ah_attr */
441 grh->sgid_attr = sgid_attr;
442 return 0;
443}
444
445static void rdma_unfill_sgid_attr(struct rdma_ah_attr *ah_attr,
446 const struct ib_gid_attr *old_sgid_attr)
447{
448 /*
449 * Fill didn't change anything, the caller retains ownership of
450 * whatever it passed
451 */
452 if (ah_attr->grh.sgid_attr == old_sgid_attr)
453 return;
454
455 /*
456 * Otherwise, we need to undo what rdma_fill_sgid_attr so the caller
457 * doesn't see any change in the rdma_ah_attr. If we get here
458 * old_sgid_attr is NULL.
459 */
460 rdma_destroy_ah_attr(ah_attr);
461}
462
463static const struct ib_gid_attr *
464rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr,
465 const struct ib_gid_attr *old_attr)
466{
467 if (old_attr)
468 rdma_put_gid_attr(old_attr);
469 if (ah_attr->ah_flags & IB_AH_GRH) {
470 rdma_hold_gid_attr(ah_attr->grh.sgid_attr);
471 return ah_attr->grh.sgid_attr;
472 }
473 return NULL;
474}
475
329static struct ib_ah *_rdma_create_ah(struct ib_pd *pd, 476static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
330 struct rdma_ah_attr *ah_attr, 477 struct rdma_ah_attr *ah_attr,
331 struct ib_udata *udata) 478 struct ib_udata *udata)
332{ 479{
333 struct ib_ah *ah; 480 struct ib_ah *ah;
334 481
482 if (!pd->device->create_ah)
483 return ERR_PTR(-EOPNOTSUPP);
484
335 ah = pd->device->create_ah(pd, ah_attr, udata); 485 ah = pd->device->create_ah(pd, ah_attr, udata);
336 486
337 if (!IS_ERR(ah)) { 487 if (!IS_ERR(ah)) {
@@ -339,15 +489,38 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
339 ah->pd = pd; 489 ah->pd = pd;
340 ah->uobject = NULL; 490 ah->uobject = NULL;
341 ah->type = ah_attr->type; 491 ah->type = ah_attr->type;
492 ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL);
493
342 atomic_inc(&pd->usecnt); 494 atomic_inc(&pd->usecnt);
343 } 495 }
344 496
345 return ah; 497 return ah;
346} 498}
347 499
500/**
501 * rdma_create_ah - Creates an address handle for the
502 * given address vector.
503 * @pd: The protection domain associated with the address handle.
504 * @ah_attr: The attributes of the address vector.
505 *
506 * It returns 0 on success and returns appropriate error code on error.
507 * The address handle is used to reference a local or global destination
508 * in all UD QP post sends.
509 */
348struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr) 510struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr)
349{ 511{
350 return _rdma_create_ah(pd, ah_attr, NULL); 512 const struct ib_gid_attr *old_sgid_attr;
513 struct ib_ah *ah;
514 int ret;
515
516 ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
517 if (ret)
518 return ERR_PTR(ret);
519
520 ah = _rdma_create_ah(pd, ah_attr, NULL);
521
522 rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
523 return ah;
351} 524}
352EXPORT_SYMBOL(rdma_create_ah); 525EXPORT_SYMBOL(rdma_create_ah);
353 526
@@ -368,15 +541,27 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
368 struct rdma_ah_attr *ah_attr, 541 struct rdma_ah_attr *ah_attr,
369 struct ib_udata *udata) 542 struct ib_udata *udata)
370{ 543{
544 const struct ib_gid_attr *old_sgid_attr;
545 struct ib_ah *ah;
371 int err; 546 int err;
372 547
548 err = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
549 if (err)
550 return ERR_PTR(err);
551
373 if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { 552 if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
374 err = ib_resolve_eth_dmac(pd->device, ah_attr); 553 err = ib_resolve_eth_dmac(pd->device, ah_attr);
375 if (err) 554 if (err) {
376 return ERR_PTR(err); 555 ah = ERR_PTR(err);
556 goto out;
557 }
377 } 558 }
378 559
379 return _rdma_create_ah(pd, ah_attr, udata); 560 ah = _rdma_create_ah(pd, ah_attr, udata);
561
562out:
563 rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
564 return ah;
380} 565}
381EXPORT_SYMBOL(rdma_create_user_ah); 566EXPORT_SYMBOL(rdma_create_user_ah);
382 567
@@ -455,16 +640,16 @@ static bool find_gid_index(const union ib_gid *gid,
455 return true; 640 return true;
456} 641}
457 642
458static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num, 643static const struct ib_gid_attr *
459 u16 vlan_id, const union ib_gid *sgid, 644get_sgid_attr_from_eth(struct ib_device *device, u8 port_num,
460 enum ib_gid_type gid_type, 645 u16 vlan_id, const union ib_gid *sgid,
461 u16 *gid_index) 646 enum ib_gid_type gid_type)
462{ 647{
463 struct find_gid_index_context context = {.vlan_id = vlan_id, 648 struct find_gid_index_context context = {.vlan_id = vlan_id,
464 .gid_type = gid_type}; 649 .gid_type = gid_type};
465 650
466 return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index, 651 return rdma_find_gid_by_filter(device, sgid, port_num, find_gid_index,
467 &context, gid_index); 652 &context);
468} 653}
469 654
470int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, 655int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
@@ -508,39 +693,24 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
508static int ib_resolve_unicast_gid_dmac(struct ib_device *device, 693static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
509 struct rdma_ah_attr *ah_attr) 694 struct rdma_ah_attr *ah_attr)
510{ 695{
511 struct ib_gid_attr sgid_attr; 696 struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr);
512 struct ib_global_route *grh; 697 const struct ib_gid_attr *sgid_attr = grh->sgid_attr;
513 int hop_limit = 0xff; 698 int hop_limit = 0xff;
514 union ib_gid sgid; 699 int ret = 0;
515 int ret;
516
517 grh = rdma_ah_retrieve_grh(ah_attr);
518
519 ret = ib_query_gid(device,
520 rdma_ah_get_port_num(ah_attr),
521 grh->sgid_index,
522 &sgid, &sgid_attr);
523 if (ret || !sgid_attr.ndev) {
524 if (!ret)
525 ret = -ENXIO;
526 return ret;
527 }
528 700
529 /* If destination is link local and source GID is RoCEv1, 701 /* If destination is link local and source GID is RoCEv1,
530 * IP stack is not used. 702 * IP stack is not used.
531 */ 703 */
532 if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) && 704 if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) &&
533 sgid_attr.gid_type == IB_GID_TYPE_ROCE) { 705 sgid_attr->gid_type == IB_GID_TYPE_ROCE) {
534 rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw, 706 rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
535 ah_attr->roce.dmac); 707 ah_attr->roce.dmac);
536 goto done; 708 return ret;
537 } 709 }
538 710
539 ret = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid, 711 ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid,
540 ah_attr->roce.dmac, 712 ah_attr->roce.dmac,
541 sgid_attr.ndev, &hop_limit); 713 sgid_attr->ndev, &hop_limit);
542done:
543 dev_put(sgid_attr.ndev);
544 714
545 grh->hop_limit = hop_limit; 715 grh->hop_limit = hop_limit;
546 return ret; 716 return ret;
@@ -555,16 +725,18 @@ done:
555 * as sgid and, sgid is used as dgid because sgid contains destinations 725 * as sgid and, sgid is used as dgid because sgid contains destinations
556 * GID whom to respond to. 726 * GID whom to respond to.
557 * 727 *
728 * On success the caller is responsible to call rdma_destroy_ah_attr on the
729 * attr.
558 */ 730 */
559int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, 731int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
560 const struct ib_wc *wc, const struct ib_grh *grh, 732 const struct ib_wc *wc, const struct ib_grh *grh,
561 struct rdma_ah_attr *ah_attr) 733 struct rdma_ah_attr *ah_attr)
562{ 734{
563 u32 flow_class; 735 u32 flow_class;
564 u16 gid_index;
565 int ret; 736 int ret;
566 enum rdma_network_type net_type = RDMA_NETWORK_IB; 737 enum rdma_network_type net_type = RDMA_NETWORK_IB;
567 enum ib_gid_type gid_type = IB_GID_TYPE_IB; 738 enum ib_gid_type gid_type = IB_GID_TYPE_IB;
739 const struct ib_gid_attr *sgid_attr;
568 int hoplimit = 0xff; 740 int hoplimit = 0xff;
569 union ib_gid dgid; 741 union ib_gid dgid;
570 union ib_gid sgid; 742 union ib_gid sgid;
@@ -595,72 +767,141 @@ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
595 if (!(wc->wc_flags & IB_WC_GRH)) 767 if (!(wc->wc_flags & IB_WC_GRH))
596 return -EPROTOTYPE; 768 return -EPROTOTYPE;
597 769
598 ret = get_sgid_index_from_eth(device, port_num, 770 sgid_attr = get_sgid_attr_from_eth(device, port_num,
599 vlan_id, &dgid, 771 vlan_id, &dgid,
600 gid_type, &gid_index); 772 gid_type);
601 if (ret) 773 if (IS_ERR(sgid_attr))
602 return ret; 774 return PTR_ERR(sgid_attr);
603 775
604 flow_class = be32_to_cpu(grh->version_tclass_flow); 776 flow_class = be32_to_cpu(grh->version_tclass_flow);
605 rdma_ah_set_grh(ah_attr, &sgid, 777 rdma_move_grh_sgid_attr(ah_attr,
606 flow_class & 0xFFFFF, 778 &sgid,
607 (u8)gid_index, hoplimit, 779 flow_class & 0xFFFFF,
608 (flow_class >> 20) & 0xFF); 780 hoplimit,
609 return ib_resolve_unicast_gid_dmac(device, ah_attr); 781 (flow_class >> 20) & 0xFF,
782 sgid_attr);
783
784 ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
785 if (ret)
786 rdma_destroy_ah_attr(ah_attr);
787
788 return ret;
610 } else { 789 } else {
611 rdma_ah_set_dlid(ah_attr, wc->slid); 790 rdma_ah_set_dlid(ah_attr, wc->slid);
612 rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits); 791 rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
613 792
614 if (wc->wc_flags & IB_WC_GRH) { 793 if ((wc->wc_flags & IB_WC_GRH) == 0)
615 if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) { 794 return 0;
616 ret = ib_find_cached_gid_by_port(device, &dgid, 795
617 IB_GID_TYPE_IB, 796 if (dgid.global.interface_id !=
618 port_num, NULL, 797 cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
619 &gid_index); 798 sgid_attr = rdma_find_gid_by_port(
620 if (ret) 799 device, &dgid, IB_GID_TYPE_IB, port_num, NULL);
621 return ret; 800 } else
622 } else { 801 sgid_attr = rdma_get_gid_attr(device, port_num, 0);
623 gid_index = 0;
624 }
625 802
626 flow_class = be32_to_cpu(grh->version_tclass_flow); 803 if (IS_ERR(sgid_attr))
627 rdma_ah_set_grh(ah_attr, &sgid, 804 return PTR_ERR(sgid_attr);
805 flow_class = be32_to_cpu(grh->version_tclass_flow);
806 rdma_move_grh_sgid_attr(ah_attr,
807 &sgid,
628 flow_class & 0xFFFFF, 808 flow_class & 0xFFFFF,
629 (u8)gid_index, hoplimit, 809 hoplimit,
630 (flow_class >> 20) & 0xFF); 810 (flow_class >> 20) & 0xFF,
631 } 811 sgid_attr);
812
632 return 0; 813 return 0;
633 } 814 }
634} 815}
635EXPORT_SYMBOL(ib_init_ah_attr_from_wc); 816EXPORT_SYMBOL(ib_init_ah_attr_from_wc);
636 817
818/**
819 * rdma_move_grh_sgid_attr - Sets the sgid attribute of GRH, taking ownership
820 * of the reference
821 *
822 * @attr: Pointer to AH attribute structure
823 * @dgid: Destination GID
824 * @flow_label: Flow label
825 * @hop_limit: Hop limit
826 * @traffic_class: traffic class
827 * @sgid_attr: Pointer to SGID attribute
828 *
829 * This takes ownership of the sgid_attr reference. The caller must ensure
830 * rdma_destroy_ah_attr() is called before destroying the rdma_ah_attr after
831 * calling this function.
832 */
833void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
834 u32 flow_label, u8 hop_limit, u8 traffic_class,
835 const struct ib_gid_attr *sgid_attr)
836{
837 rdma_ah_set_grh(attr, dgid, flow_label, sgid_attr->index, hop_limit,
838 traffic_class);
839 attr->grh.sgid_attr = sgid_attr;
840}
841EXPORT_SYMBOL(rdma_move_grh_sgid_attr);
842
843/**
844 * rdma_destroy_ah_attr - Release reference to SGID attribute of
845 * ah attribute.
846 * @ah_attr: Pointer to ah attribute
847 *
848 * Release reference to the SGID attribute of the ah attribute if it is
849 * non NULL. It is safe to call this multiple times, and safe to call it on
850 * a zero initialized ah_attr.
851 */
852void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr)
853{
854 if (ah_attr->grh.sgid_attr) {
855 rdma_put_gid_attr(ah_attr->grh.sgid_attr);
856 ah_attr->grh.sgid_attr = NULL;
857 }
858}
859EXPORT_SYMBOL(rdma_destroy_ah_attr);
860
637struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, 861struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
638 const struct ib_grh *grh, u8 port_num) 862 const struct ib_grh *grh, u8 port_num)
639{ 863{
640 struct rdma_ah_attr ah_attr; 864 struct rdma_ah_attr ah_attr;
865 struct ib_ah *ah;
641 int ret; 866 int ret;
642 867
643 ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr); 868 ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr);
644 if (ret) 869 if (ret)
645 return ERR_PTR(ret); 870 return ERR_PTR(ret);
646 871
647 return rdma_create_ah(pd, &ah_attr); 872 ah = rdma_create_ah(pd, &ah_attr);
873
874 rdma_destroy_ah_attr(&ah_attr);
875 return ah;
648} 876}
649EXPORT_SYMBOL(ib_create_ah_from_wc); 877EXPORT_SYMBOL(ib_create_ah_from_wc);
650 878
651int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) 879int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
652{ 880{
881 const struct ib_gid_attr *old_sgid_attr;
882 int ret;
883
653 if (ah->type != ah_attr->type) 884 if (ah->type != ah_attr->type)
654 return -EINVAL; 885 return -EINVAL;
655 886
656 return ah->device->modify_ah ? 887 ret = rdma_fill_sgid_attr(ah->device, ah_attr, &old_sgid_attr);
888 if (ret)
889 return ret;
890
891 ret = ah->device->modify_ah ?
657 ah->device->modify_ah(ah, ah_attr) : 892 ah->device->modify_ah(ah, ah_attr) :
658 -EOPNOTSUPP; 893 -EOPNOTSUPP;
894
895 ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr);
896 rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
897 return ret;
659} 898}
660EXPORT_SYMBOL(rdma_modify_ah); 899EXPORT_SYMBOL(rdma_modify_ah);
661 900
662int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr) 901int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
663{ 902{
903 ah_attr->grh.sgid_attr = NULL;
904
664 return ah->device->query_ah ? 905 return ah->device->query_ah ?
665 ah->device->query_ah(ah, ah_attr) : 906 ah->device->query_ah(ah, ah_attr) :
666 -EOPNOTSUPP; 907 -EOPNOTSUPP;
@@ -669,13 +910,17 @@ EXPORT_SYMBOL(rdma_query_ah);
669 910
670int rdma_destroy_ah(struct ib_ah *ah) 911int rdma_destroy_ah(struct ib_ah *ah)
671{ 912{
913 const struct ib_gid_attr *sgid_attr = ah->sgid_attr;
672 struct ib_pd *pd; 914 struct ib_pd *pd;
673 int ret; 915 int ret;
674 916
675 pd = ah->pd; 917 pd = ah->pd;
676 ret = ah->device->destroy_ah(ah); 918 ret = ah->device->destroy_ah(ah);
677 if (!ret) 919 if (!ret) {
678 atomic_dec(&pd->usecnt); 920 atomic_dec(&pd->usecnt);
921 if (sgid_attr)
922 rdma_put_gid_attr(sgid_attr);
923 }
679 924
680 return ret; 925 return ret;
681} 926}
@@ -1290,16 +1535,19 @@ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
1290} 1535}
1291EXPORT_SYMBOL(ib_modify_qp_is_ok); 1536EXPORT_SYMBOL(ib_modify_qp_is_ok);
1292 1537
1538/**
1539 * ib_resolve_eth_dmac - Resolve destination mac address
1540 * @device: Device to consider
1541 * @ah_attr: address handle attribute which describes the
1542 * source and destination parameters
1543 * ib_resolve_eth_dmac() resolves destination mac address and L3 hop limit It
1544 * returns 0 on success or appropriate error code. It initializes the
1545 * necessary ah_attr fields when call is successful.
1546 */
1293static int ib_resolve_eth_dmac(struct ib_device *device, 1547static int ib_resolve_eth_dmac(struct ib_device *device,
1294 struct rdma_ah_attr *ah_attr) 1548 struct rdma_ah_attr *ah_attr)
1295{ 1549{
1296 int ret = 0; 1550 int ret = 0;
1297 struct ib_global_route *grh;
1298
1299 if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr)))
1300 return -EINVAL;
1301
1302 grh = rdma_ah_retrieve_grh(ah_attr);
1303 1551
1304 if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) { 1552 if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
1305 if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) { 1553 if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
@@ -1317,6 +1565,14 @@ static int ib_resolve_eth_dmac(struct ib_device *device,
1317 return ret; 1565 return ret;
1318} 1566}
1319 1567
1568static bool is_qp_type_connected(const struct ib_qp *qp)
1569{
1570 return (qp->qp_type == IB_QPT_UC ||
1571 qp->qp_type == IB_QPT_RC ||
1572 qp->qp_type == IB_QPT_XRC_INI ||
1573 qp->qp_type == IB_QPT_XRC_TGT);
1574}
1575
1320/** 1576/**
1321 * IB core internal function to perform QP attributes modification. 1577 * IB core internal function to perform QP attributes modification.
1322 */ 1578 */
@@ -1324,8 +1580,53 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
1324 int attr_mask, struct ib_udata *udata) 1580 int attr_mask, struct ib_udata *udata)
1325{ 1581{
1326 u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; 1582 u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
1583 const struct ib_gid_attr *old_sgid_attr_av;
1584 const struct ib_gid_attr *old_sgid_attr_alt_av;
1327 int ret; 1585 int ret;
1328 1586
1587 if (attr_mask & IB_QP_AV) {
1588 ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr,
1589 &old_sgid_attr_av);
1590 if (ret)
1591 return ret;
1592 }
1593 if (attr_mask & IB_QP_ALT_PATH) {
1594 /*
1595 * FIXME: This does not track the migration state, so if the
1596 * user loads a new alternate path after the HW has migrated
1597 * from primary->alternate we will keep the wrong
1598 * references. This is OK for IB because the reference
1599 * counting does not serve any functional purpose.
1600 */
1601 ret = rdma_fill_sgid_attr(qp->device, &attr->alt_ah_attr,
1602 &old_sgid_attr_alt_av);
1603 if (ret)
1604 goto out_av;
1605
1606 /*
1607 * Today the core code can only handle alternate paths and APM
1608 * for IB. Ban them in roce mode.
1609 */
1610 if (!(rdma_protocol_ib(qp->device,
1611 attr->alt_ah_attr.port_num) &&
1612 rdma_protocol_ib(qp->device, port))) {
1613 ret = EINVAL;
1614 goto out;
1615 }
1616 }
1617
1618 /*
1619 * If the user provided the qp_attr then we have to resolve it. Kernel
1620 * users have to provide already resolved rdma_ah_attr's
1621 */
1622 if (udata && (attr_mask & IB_QP_AV) &&
1623 attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
1624 is_qp_type_connected(qp)) {
1625 ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
1626 if (ret)
1627 goto out;
1628 }
1629
1329 if (rdma_ib_or_roce(qp->device, port)) { 1630 if (rdma_ib_or_roce(qp->device, port)) {
1330 if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) { 1631 if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
1331 pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n", 1632 pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n",
@@ -1341,20 +1642,27 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
1341 } 1642 }
1342 1643
1343 ret = ib_security_modify_qp(qp, attr, attr_mask, udata); 1644 ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
1344 if (!ret && (attr_mask & IB_QP_PORT)) 1645 if (ret)
1345 qp->port = attr->port_num; 1646 goto out;
1346 1647
1648 if (attr_mask & IB_QP_PORT)
1649 qp->port = attr->port_num;
1650 if (attr_mask & IB_QP_AV)
1651 qp->av_sgid_attr =
1652 rdma_update_sgid_attr(&attr->ah_attr, qp->av_sgid_attr);
1653 if (attr_mask & IB_QP_ALT_PATH)
1654 qp->alt_path_sgid_attr = rdma_update_sgid_attr(
1655 &attr->alt_ah_attr, qp->alt_path_sgid_attr);
1656
1657out:
1658 if (attr_mask & IB_QP_ALT_PATH)
1659 rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av);
1660out_av:
1661 if (attr_mask & IB_QP_AV)
1662 rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av);
1347 return ret; 1663 return ret;
1348} 1664}
1349 1665
1350static bool is_qp_type_connected(const struct ib_qp *qp)
1351{
1352 return (qp->qp_type == IB_QPT_UC ||
1353 qp->qp_type == IB_QPT_RC ||
1354 qp->qp_type == IB_QPT_XRC_INI ||
1355 qp->qp_type == IB_QPT_XRC_TGT);
1356}
1357
1358/** 1666/**
1359 * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. 1667 * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
1360 * @ib_qp: The QP to modify. 1668 * @ib_qp: The QP to modify.
@@ -1369,17 +1677,7 @@ static bool is_qp_type_connected(const struct ib_qp *qp)
1369int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr, 1677int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
1370 int attr_mask, struct ib_udata *udata) 1678 int attr_mask, struct ib_udata *udata)
1371{ 1679{
1372 struct ib_qp *qp = ib_qp->real_qp; 1680 return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata);
1373 int ret;
1374
1375 if (attr_mask & IB_QP_AV &&
1376 attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
1377 is_qp_type_connected(qp)) {
1378 ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
1379 if (ret)
1380 return ret;
1381 }
1382 return _ib_modify_qp(qp, attr, attr_mask, udata);
1383} 1681}
1384EXPORT_SYMBOL(ib_modify_qp_with_udata); 1682EXPORT_SYMBOL(ib_modify_qp_with_udata);
1385 1683
@@ -1451,6 +1749,9 @@ int ib_query_qp(struct ib_qp *qp,
1451 int qp_attr_mask, 1749 int qp_attr_mask,
1452 struct ib_qp_init_attr *qp_init_attr) 1750 struct ib_qp_init_attr *qp_init_attr)
1453{ 1751{
1752 qp_attr->ah_attr.grh.sgid_attr = NULL;
1753 qp_attr->alt_ah_attr.grh.sgid_attr = NULL;
1754
1454 return qp->device->query_qp ? 1755 return qp->device->query_qp ?
1455 qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) : 1756 qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
1456 -EOPNOTSUPP; 1757 -EOPNOTSUPP;
@@ -1509,6 +1810,8 @@ static int __ib_destroy_shared_qp(struct ib_qp *qp)
1509 1810
1510int ib_destroy_qp(struct ib_qp *qp) 1811int ib_destroy_qp(struct ib_qp *qp)
1511{ 1812{
1813 const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr;
1814 const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr;
1512 struct ib_pd *pd; 1815 struct ib_pd *pd;
1513 struct ib_cq *scq, *rcq; 1816 struct ib_cq *scq, *rcq;
1514 struct ib_srq *srq; 1817 struct ib_srq *srq;
@@ -1539,6 +1842,10 @@ int ib_destroy_qp(struct ib_qp *qp)
1539 rdma_restrack_del(&qp->res); 1842 rdma_restrack_del(&qp->res);
1540 ret = qp->device->destroy_qp(qp); 1843 ret = qp->device->destroy_qp(qp);
1541 if (!ret) { 1844 if (!ret) {
1845 if (alt_path_sgid_attr)
1846 rdma_put_gid_attr(alt_path_sgid_attr);
1847 if (av_sgid_attr)
1848 rdma_put_gid_attr(av_sgid_attr);
1542 if (pd) 1849 if (pd)
1543 atomic_dec(&pd->usecnt); 1850 atomic_dec(&pd->usecnt);
1544 if (scq) 1851 if (scq)
@@ -1977,35 +2284,6 @@ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
1977} 2284}
1978EXPORT_SYMBOL(ib_destroy_rwq_ind_table); 2285EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
1979 2286
1980struct ib_flow *ib_create_flow(struct ib_qp *qp,
1981 struct ib_flow_attr *flow_attr,
1982 int domain)
1983{
1984 struct ib_flow *flow_id;
1985 if (!qp->device->create_flow)
1986 return ERR_PTR(-EOPNOTSUPP);
1987
1988 flow_id = qp->device->create_flow(qp, flow_attr, domain, NULL);
1989 if (!IS_ERR(flow_id)) {
1990 atomic_inc(&qp->usecnt);
1991 flow_id->qp = qp;
1992 }
1993 return flow_id;
1994}
1995EXPORT_SYMBOL(ib_create_flow);
1996
1997int ib_destroy_flow(struct ib_flow *flow_id)
1998{
1999 int err;
2000 struct ib_qp *qp = flow_id->qp;
2001
2002 err = qp->device->destroy_flow(flow_id);
2003 if (!err)
2004 atomic_dec(&qp->usecnt);
2005 return err;
2006}
2007EXPORT_SYMBOL(ib_destroy_flow);
2008
2009int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, 2287int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
2010 struct ib_mr_status *mr_status) 2288 struct ib_mr_status *mr_status)
2011{ 2289{
@@ -2200,7 +2478,6 @@ static void __ib_drain_sq(struct ib_qp *qp)
2200 struct ib_cq *cq = qp->send_cq; 2478 struct ib_cq *cq = qp->send_cq;
2201 struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; 2479 struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
2202 struct ib_drain_cqe sdrain; 2480 struct ib_drain_cqe sdrain;
2203 struct ib_send_wr *bad_swr;
2204 struct ib_rdma_wr swr = { 2481 struct ib_rdma_wr swr = {
2205 .wr = { 2482 .wr = {
2206 .next = NULL, 2483 .next = NULL,
@@ -2219,7 +2496,7 @@ static void __ib_drain_sq(struct ib_qp *qp)
2219 sdrain.cqe.done = ib_drain_qp_done; 2496 sdrain.cqe.done = ib_drain_qp_done;
2220 init_completion(&sdrain.done); 2497 init_completion(&sdrain.done);
2221 2498
2222 ret = ib_post_send(qp, &swr.wr, &bad_swr); 2499 ret = ib_post_send(qp, &swr.wr, NULL);
2223 if (ret) { 2500 if (ret) {
2224 WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); 2501 WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
2225 return; 2502 return;
@@ -2240,7 +2517,7 @@ static void __ib_drain_rq(struct ib_qp *qp)
2240 struct ib_cq *cq = qp->recv_cq; 2517 struct ib_cq *cq = qp->recv_cq;
2241 struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; 2518 struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
2242 struct ib_drain_cqe rdrain; 2519 struct ib_drain_cqe rdrain;
2243 struct ib_recv_wr rwr = {}, *bad_rwr; 2520 struct ib_recv_wr rwr = {};
2244 int ret; 2521 int ret;
2245 2522
2246 ret = ib_modify_qp(qp, &attr, IB_QP_STATE); 2523 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
@@ -2253,7 +2530,7 @@ static void __ib_drain_rq(struct ib_qp *qp)
2253 rdrain.cqe.done = ib_drain_qp_done; 2530 rdrain.cqe.done = ib_drain_qp_done;
2254 init_completion(&rdrain.done); 2531 init_completion(&rdrain.done);
2255 2532
2256 ret = ib_post_recv(qp, &rwr, &bad_rwr); 2533 ret = ib_post_recv(qp, &rwr, NULL);
2257 if (ret) { 2534 if (ret) {
2258 WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); 2535 WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
2259 return; 2536 return;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index a76e206704d4..bbfb86eb2d24 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -166,7 +166,8 @@ int bnxt_re_query_device(struct ib_device *ibdev,
166 | IB_DEVICE_MEM_WINDOW 166 | IB_DEVICE_MEM_WINDOW
167 | IB_DEVICE_MEM_WINDOW_TYPE_2B 167 | IB_DEVICE_MEM_WINDOW_TYPE_2B
168 | IB_DEVICE_MEM_MGT_EXTENSIONS; 168 | IB_DEVICE_MEM_MGT_EXTENSIONS;
169 ib_attr->max_sge = dev_attr->max_qp_sges; 169 ib_attr->max_send_sge = dev_attr->max_qp_sges;
170 ib_attr->max_recv_sge = dev_attr->max_qp_sges;
170 ib_attr->max_sge_rd = dev_attr->max_qp_sges; 171 ib_attr->max_sge_rd = dev_attr->max_qp_sges;
171 ib_attr->max_cq = dev_attr->max_cq; 172 ib_attr->max_cq = dev_attr->max_cq;
172 ib_attr->max_cqe = dev_attr->max_cq_wqes; 173 ib_attr->max_cqe = dev_attr->max_cq_wqes;
@@ -243,8 +244,8 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
243 port_attr->gid_tbl_len = dev_attr->max_sgid; 244 port_attr->gid_tbl_len = dev_attr->max_sgid;
244 port_attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | 245 port_attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
245 IB_PORT_DEVICE_MGMT_SUP | 246 IB_PORT_DEVICE_MGMT_SUP |
246 IB_PORT_VENDOR_CLASS_SUP | 247 IB_PORT_VENDOR_CLASS_SUP;
247 IB_PORT_IP_BASED_GIDS; 248 port_attr->ip_gids = true;
248 249
249 port_attr->max_msg_sz = (u32)BNXT_RE_MAX_MR_SIZE_LOW; 250 port_attr->max_msg_sz = (u32)BNXT_RE_MAX_MR_SIZE_LOW;
250 port_attr->bad_pkey_cntr = 0; 251 port_attr->bad_pkey_cntr = 0;
@@ -364,8 +365,7 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context)
364 return rc; 365 return rc;
365} 366}
366 367
367int bnxt_re_add_gid(const union ib_gid *gid, 368int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context)
368 const struct ib_gid_attr *attr, void **context)
369{ 369{
370 int rc; 370 int rc;
371 u32 tbl_idx = 0; 371 u32 tbl_idx = 0;
@@ -377,7 +377,7 @@ int bnxt_re_add_gid(const union ib_gid *gid,
377 if ((attr->ndev) && is_vlan_dev(attr->ndev)) 377 if ((attr->ndev) && is_vlan_dev(attr->ndev))
378 vlan_id = vlan_dev_vlan_id(attr->ndev); 378 vlan_id = vlan_dev_vlan_id(attr->ndev);
379 379
380 rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)gid, 380 rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)&attr->gid,
381 rdev->qplib_res.netdev->dev_addr, 381 rdev->qplib_res.netdev->dev_addr,
382 vlan_id, true, &tbl_idx); 382 vlan_id, true, &tbl_idx);
383 if (rc == -EALREADY) { 383 if (rc == -EALREADY) {
@@ -673,8 +673,6 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
673 int rc; 673 int rc;
674 u8 nw_type; 674 u8 nw_type;
675 675
676 struct ib_gid_attr sgid_attr;
677
678 if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) { 676 if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) {
679 dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set"); 677 dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set");
680 return ERR_PTR(-EINVAL); 678 return ERR_PTR(-EINVAL);
@@ -705,20 +703,11 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
705 grh->dgid.raw) && 703 grh->dgid.raw) &&
706 !rdma_link_local_addr((struct in6_addr *) 704 !rdma_link_local_addr((struct in6_addr *)
707 grh->dgid.raw)) { 705 grh->dgid.raw)) {
708 union ib_gid sgid; 706 const struct ib_gid_attr *sgid_attr;
709 707
710 rc = ib_get_cached_gid(&rdev->ibdev, 1, 708 sgid_attr = grh->sgid_attr;
711 grh->sgid_index, &sgid,
712 &sgid_attr);
713 if (rc) {
714 dev_err(rdev_to_dev(rdev),
715 "Failed to query gid at index %d",
716 grh->sgid_index);
717 goto fail;
718 }
719 dev_put(sgid_attr.ndev);
720 /* Get network header type for this GID */ 709 /* Get network header type for this GID */
721 nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); 710 nw_type = rdma_gid_attr_network_type(sgid_attr);
722 switch (nw_type) { 711 switch (nw_type) {
723 case RDMA_NETWORK_IPV4: 712 case RDMA_NETWORK_IPV4:
724 ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4; 713 ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4;
@@ -1408,7 +1397,7 @@ struct ib_srq *bnxt_re_create_srq(struct ib_pd *ib_pd,
1408 } 1397 }
1409 1398
1410 if (srq_init_attr->srq_type != IB_SRQT_BASIC) { 1399 if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
1411 rc = -ENOTSUPP; 1400 rc = -EOPNOTSUPP;
1412 goto exit; 1401 goto exit;
1413 } 1402 }
1414 1403
@@ -1530,8 +1519,8 @@ int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr)
1530 return 0; 1519 return 0;
1531} 1520}
1532 1521
1533int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, struct ib_recv_wr *wr, 1522int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, const struct ib_recv_wr *wr,
1534 struct ib_recv_wr **bad_wr) 1523 const struct ib_recv_wr **bad_wr)
1535{ 1524{
1536 struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq, 1525 struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
1537 ib_srq); 1526 ib_srq);
@@ -1599,9 +1588,6 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
1599 struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; 1588 struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
1600 enum ib_qp_state curr_qp_state, new_qp_state; 1589 enum ib_qp_state curr_qp_state, new_qp_state;
1601 int rc, entries; 1590 int rc, entries;
1602 int status;
1603 union ib_gid sgid;
1604 struct ib_gid_attr sgid_attr;
1605 unsigned int flags; 1591 unsigned int flags;
1606 u8 nw_type; 1592 u8 nw_type;
1607 1593
@@ -1668,6 +1654,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
1668 if (qp_attr_mask & IB_QP_AV) { 1654 if (qp_attr_mask & IB_QP_AV) {
1669 const struct ib_global_route *grh = 1655 const struct ib_global_route *grh =
1670 rdma_ah_read_grh(&qp_attr->ah_attr); 1656 rdma_ah_read_grh(&qp_attr->ah_attr);
1657 const struct ib_gid_attr *sgid_attr;
1671 1658
1672 qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DGID | 1659 qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DGID |
1673 CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL | 1660 CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL |
@@ -1691,29 +1678,23 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
1691 ether_addr_copy(qp->qplib_qp.ah.dmac, 1678 ether_addr_copy(qp->qplib_qp.ah.dmac,
1692 qp_attr->ah_attr.roce.dmac); 1679 qp_attr->ah_attr.roce.dmac);
1693 1680
1694 status = ib_get_cached_gid(&rdev->ibdev, 1, 1681 sgid_attr = qp_attr->ah_attr.grh.sgid_attr;
1695 grh->sgid_index, 1682 memcpy(qp->qplib_qp.smac, sgid_attr->ndev->dev_addr,
1696 &sgid, &sgid_attr); 1683 ETH_ALEN);
1697 if (!status) { 1684 nw_type = rdma_gid_attr_network_type(sgid_attr);
1698 memcpy(qp->qplib_qp.smac, sgid_attr.ndev->dev_addr, 1685 switch (nw_type) {
1699 ETH_ALEN); 1686 case RDMA_NETWORK_IPV4:
1700 dev_put(sgid_attr.ndev); 1687 qp->qplib_qp.nw_type =
1701 nw_type = ib_gid_to_network_type(sgid_attr.gid_type, 1688 CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4;
1702 &sgid); 1689 break;
1703 switch (nw_type) { 1690 case RDMA_NETWORK_IPV6:
1704 case RDMA_NETWORK_IPV4: 1691 qp->qplib_qp.nw_type =
1705 qp->qplib_qp.nw_type = 1692 CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6;
1706 CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4; 1693 break;
1707 break; 1694 default:
1708 case RDMA_NETWORK_IPV6: 1695 qp->qplib_qp.nw_type =
1709 qp->qplib_qp.nw_type = 1696 CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1;
1710 CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6; 1697 break;
1711 break;
1712 default:
1713 qp->qplib_qp.nw_type =
1714 CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1;
1715 break;
1716 }
1717 } 1698 }
1718 } 1699 }
1719 1700
@@ -1895,19 +1876,17 @@ out:
1895/* Routine for sending QP1 packets for RoCE V1 an V2 1876/* Routine for sending QP1 packets for RoCE V1 an V2
1896 */ 1877 */
1897static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, 1878static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
1898 struct ib_send_wr *wr, 1879 const struct ib_send_wr *wr,
1899 struct bnxt_qplib_swqe *wqe, 1880 struct bnxt_qplib_swqe *wqe,
1900 int payload_size) 1881 int payload_size)
1901{ 1882{
1902 struct ib_device *ibdev = &qp->rdev->ibdev;
1903 struct bnxt_re_ah *ah = container_of(ud_wr(wr)->ah, struct bnxt_re_ah, 1883 struct bnxt_re_ah *ah = container_of(ud_wr(wr)->ah, struct bnxt_re_ah,
1904 ib_ah); 1884 ib_ah);
1905 struct bnxt_qplib_ah *qplib_ah = &ah->qplib_ah; 1885 struct bnxt_qplib_ah *qplib_ah = &ah->qplib_ah;
1886 const struct ib_gid_attr *sgid_attr = ah->ib_ah.sgid_attr;
1906 struct bnxt_qplib_sge sge; 1887 struct bnxt_qplib_sge sge;
1907 union ib_gid sgid;
1908 u8 nw_type; 1888 u8 nw_type;
1909 u16 ether_type; 1889 u16 ether_type;
1910 struct ib_gid_attr sgid_attr;
1911 union ib_gid dgid; 1890 union ib_gid dgid;
1912 bool is_eth = false; 1891 bool is_eth = false;
1913 bool is_vlan = false; 1892 bool is_vlan = false;
@@ -1920,22 +1899,10 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
1920 1899
1921 memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr)); 1900 memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr));
1922 1901
1923 rc = ib_get_cached_gid(ibdev, 1, 1902 if (is_vlan_dev(sgid_attr->ndev))
1924 qplib_ah->host_sgid_index, &sgid, 1903 vlan_id = vlan_dev_vlan_id(sgid_attr->ndev);
1925 &sgid_attr);
1926 if (rc) {
1927 dev_err(rdev_to_dev(qp->rdev),
1928 "Failed to query gid at index %d",
1929 qplib_ah->host_sgid_index);
1930 return rc;
1931 }
1932 if (sgid_attr.ndev) {
1933 if (is_vlan_dev(sgid_attr.ndev))
1934 vlan_id = vlan_dev_vlan_id(sgid_attr.ndev);
1935 dev_put(sgid_attr.ndev);
1936 }
1937 /* Get network header type for this GID */ 1904 /* Get network header type for this GID */
1938 nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); 1905 nw_type = rdma_gid_attr_network_type(sgid_attr);
1939 switch (nw_type) { 1906 switch (nw_type) {
1940 case RDMA_NETWORK_IPV4: 1907 case RDMA_NETWORK_IPV4:
1941 nw_type = BNXT_RE_ROCEV2_IPV4_PACKET; 1908 nw_type = BNXT_RE_ROCEV2_IPV4_PACKET;
@@ -1948,9 +1915,9 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
1948 break; 1915 break;
1949 } 1916 }
1950 memcpy(&dgid.raw, &qplib_ah->dgid, 16); 1917 memcpy(&dgid.raw, &qplib_ah->dgid, 16);
1951 is_udp = sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; 1918 is_udp = sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
1952 if (is_udp) { 1919 if (is_udp) {
1953 if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) { 1920 if (ipv6_addr_v4mapped((struct in6_addr *)&sgid_attr->gid)) {
1954 ip_version = 4; 1921 ip_version = 4;
1955 ether_type = ETH_P_IP; 1922 ether_type = ETH_P_IP;
1956 } else { 1923 } else {
@@ -1983,9 +1950,10 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
1983 } 1950 }
1984 1951
1985 if (is_grh || (ip_version == 6)) { 1952 if (is_grh || (ip_version == 6)) {
1986 memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid.raw, sizeof(sgid)); 1953 memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid_attr->gid.raw,
1954 sizeof(sgid_attr->gid));
1987 memcpy(qp->qp1_hdr.grh.destination_gid.raw, qplib_ah->dgid.data, 1955 memcpy(qp->qp1_hdr.grh.destination_gid.raw, qplib_ah->dgid.data,
1988 sizeof(sgid)); 1956 sizeof(sgid_attr->gid));
1989 qp->qp1_hdr.grh.hop_limit = qplib_ah->hop_limit; 1957 qp->qp1_hdr.grh.hop_limit = qplib_ah->hop_limit;
1990 } 1958 }
1991 1959
@@ -1995,7 +1963,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
1995 qp->qp1_hdr.ip4.frag_off = htons(IP_DF); 1963 qp->qp1_hdr.ip4.frag_off = htons(IP_DF);
1996 qp->qp1_hdr.ip4.ttl = qplib_ah->hop_limit; 1964 qp->qp1_hdr.ip4.ttl = qplib_ah->hop_limit;
1997 1965
1998 memcpy(&qp->qp1_hdr.ip4.saddr, sgid.raw + 12, 4); 1966 memcpy(&qp->qp1_hdr.ip4.saddr, sgid_attr->gid.raw + 12, 4);
1999 memcpy(&qp->qp1_hdr.ip4.daddr, qplib_ah->dgid.data + 12, 4); 1967 memcpy(&qp->qp1_hdr.ip4.daddr, qplib_ah->dgid.data + 12, 4);
2000 qp->qp1_hdr.ip4.check = ib_ud_ip4_csum(&qp->qp1_hdr); 1968 qp->qp1_hdr.ip4.check = ib_ud_ip4_csum(&qp->qp1_hdr);
2001 } 1969 }
@@ -2080,7 +2048,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
2080 * and the MAD datagram out to the provided SGE. 2048 * and the MAD datagram out to the provided SGE.
2081 */ 2049 */
2082static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp, 2050static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp,
2083 struct ib_recv_wr *wr, 2051 const struct ib_recv_wr *wr,
2084 struct bnxt_qplib_swqe *wqe, 2052 struct bnxt_qplib_swqe *wqe,
2085 int payload_size) 2053 int payload_size)
2086{ 2054{
@@ -2125,7 +2093,7 @@ static int is_ud_qp(struct bnxt_re_qp *qp)
2125} 2093}
2126 2094
2127static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp, 2095static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp,
2128 struct ib_send_wr *wr, 2096 const struct ib_send_wr *wr,
2129 struct bnxt_qplib_swqe *wqe) 2097 struct bnxt_qplib_swqe *wqe)
2130{ 2098{
2131 struct bnxt_re_ah *ah = NULL; 2099 struct bnxt_re_ah *ah = NULL;
@@ -2163,7 +2131,7 @@ static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp,
2163 return 0; 2131 return 0;
2164} 2132}
2165 2133
2166static int bnxt_re_build_rdma_wqe(struct ib_send_wr *wr, 2134static int bnxt_re_build_rdma_wqe(const struct ib_send_wr *wr,
2167 struct bnxt_qplib_swqe *wqe) 2135 struct bnxt_qplib_swqe *wqe)
2168{ 2136{
2169 switch (wr->opcode) { 2137 switch (wr->opcode) {
@@ -2195,7 +2163,7 @@ static int bnxt_re_build_rdma_wqe(struct ib_send_wr *wr,
2195 return 0; 2163 return 0;
2196} 2164}
2197 2165
2198static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr, 2166static int bnxt_re_build_atomic_wqe(const struct ib_send_wr *wr,
2199 struct bnxt_qplib_swqe *wqe) 2167 struct bnxt_qplib_swqe *wqe)
2200{ 2168{
2201 switch (wr->opcode) { 2169 switch (wr->opcode) {
@@ -2222,7 +2190,7 @@ static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr,
2222 return 0; 2190 return 0;
2223} 2191}
2224 2192
2225static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr, 2193static int bnxt_re_build_inv_wqe(const struct ib_send_wr *wr,
2226 struct bnxt_qplib_swqe *wqe) 2194 struct bnxt_qplib_swqe *wqe)
2227{ 2195{
2228 wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV; 2196 wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV;
@@ -2241,7 +2209,7 @@ static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr,
2241 return 0; 2209 return 0;
2242} 2210}
2243 2211
2244static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr, 2212static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr,
2245 struct bnxt_qplib_swqe *wqe) 2213 struct bnxt_qplib_swqe *wqe)
2246{ 2214{
2247 struct bnxt_re_mr *mr = container_of(wr->mr, struct bnxt_re_mr, ib_mr); 2215 struct bnxt_re_mr *mr = container_of(wr->mr, struct bnxt_re_mr, ib_mr);
@@ -2283,7 +2251,7 @@ static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr,
2283} 2251}
2284 2252
2285static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev, 2253static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev,
2286 struct ib_send_wr *wr, 2254 const struct ib_send_wr *wr,
2287 struct bnxt_qplib_swqe *wqe) 2255 struct bnxt_qplib_swqe *wqe)
2288{ 2256{
2289 /* Copy the inline data to the data field */ 2257 /* Copy the inline data to the data field */
@@ -2313,7 +2281,7 @@ static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev,
2313} 2281}
2314 2282
2315static int bnxt_re_copy_wr_payload(struct bnxt_re_dev *rdev, 2283static int bnxt_re_copy_wr_payload(struct bnxt_re_dev *rdev,
2316 struct ib_send_wr *wr, 2284 const struct ib_send_wr *wr,
2317 struct bnxt_qplib_swqe *wqe) 2285 struct bnxt_qplib_swqe *wqe)
2318{ 2286{
2319 int payload_sz = 0; 2287 int payload_sz = 0;
@@ -2345,7 +2313,7 @@ static void bnxt_ud_qp_hw_stall_workaround(struct bnxt_re_qp *qp)
2345 2313
2346static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev, 2314static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev,
2347 struct bnxt_re_qp *qp, 2315 struct bnxt_re_qp *qp,
2348 struct ib_send_wr *wr) 2316 const struct ib_send_wr *wr)
2349{ 2317{
2350 struct bnxt_qplib_swqe wqe; 2318 struct bnxt_qplib_swqe wqe;
2351 int rc = 0, payload_sz = 0; 2319 int rc = 0, payload_sz = 0;
@@ -2393,8 +2361,8 @@ bad:
2393 return rc; 2361 return rc;
2394} 2362}
2395 2363
2396int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr, 2364int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
2397 struct ib_send_wr **bad_wr) 2365 const struct ib_send_wr **bad_wr)
2398{ 2366{
2399 struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); 2367 struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
2400 struct bnxt_qplib_swqe wqe; 2368 struct bnxt_qplib_swqe wqe;
@@ -2441,7 +2409,7 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr,
2441 default: 2409 default:
2442 break; 2410 break;
2443 } 2411 }
2444 /* Fall thru to build the wqe */ 2412 /* fall through */
2445 case IB_WR_SEND_WITH_INV: 2413 case IB_WR_SEND_WITH_INV:
2446 rc = bnxt_re_build_send_wqe(qp, wr, &wqe); 2414 rc = bnxt_re_build_send_wqe(qp, wr, &wqe);
2447 break; 2415 break;
@@ -2493,7 +2461,7 @@ bad:
2493 2461
2494static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev, 2462static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev,
2495 struct bnxt_re_qp *qp, 2463 struct bnxt_re_qp *qp,
2496 struct ib_recv_wr *wr) 2464 const struct ib_recv_wr *wr)
2497{ 2465{
2498 struct bnxt_qplib_swqe wqe; 2466 struct bnxt_qplib_swqe wqe;
2499 int rc = 0; 2467 int rc = 0;
@@ -2526,8 +2494,8 @@ static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev,
2526 return rc; 2494 return rc;
2527} 2495}
2528 2496
2529int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr, 2497int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr,
2530 struct ib_recv_wr **bad_wr) 2498 const struct ib_recv_wr **bad_wr)
2531{ 2499{
2532 struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); 2500 struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
2533 struct bnxt_qplib_swqe wqe; 2501 struct bnxt_qplib_swqe wqe;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 5c6414cad4af..aa33e7b82c84 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -158,8 +158,7 @@ void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str);
158int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num, 158int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
159 u16 index, u16 *pkey); 159 u16 index, u16 *pkey);
160int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context); 160int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context);
161int bnxt_re_add_gid(const union ib_gid *gid, 161int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context);
162 const struct ib_gid_attr *attr, void **context);
163int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num, 162int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
164 int index, union ib_gid *gid); 163 int index, union ib_gid *gid);
165enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, 164enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
@@ -182,8 +181,8 @@ int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
182 struct ib_udata *udata); 181 struct ib_udata *udata);
183int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); 182int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
184int bnxt_re_destroy_srq(struct ib_srq *srq); 183int bnxt_re_destroy_srq(struct ib_srq *srq);
185int bnxt_re_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *recv_wr, 184int bnxt_re_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr,
186 struct ib_recv_wr **bad_recv_wr); 185 const struct ib_recv_wr **bad_recv_wr);
187struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd, 186struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd,
188 struct ib_qp_init_attr *qp_init_attr, 187 struct ib_qp_init_attr *qp_init_attr,
189 struct ib_udata *udata); 188 struct ib_udata *udata);
@@ -192,10 +191,10 @@ int bnxt_re_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
192int bnxt_re_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, 191int bnxt_re_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
193 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); 192 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
194int bnxt_re_destroy_qp(struct ib_qp *qp); 193int bnxt_re_destroy_qp(struct ib_qp *qp);
195int bnxt_re_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, 194int bnxt_re_post_send(struct ib_qp *qp, const struct ib_send_wr *send_wr,
196 struct ib_send_wr **bad_send_wr); 195 const struct ib_send_wr **bad_send_wr);
197int bnxt_re_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, 196int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
198 struct ib_recv_wr **bad_recv_wr); 197 const struct ib_recv_wr **bad_recv_wr);
199struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev, 198struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
200 const struct ib_cq_init_attr *attr, 199 const struct ib_cq_init_attr *attr,
201 struct ib_ucontext *context, 200 struct ib_ucontext *context,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 50d8f1fc98d5..e426b990c1dd 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -2354,7 +2354,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
2354 srq = qp->srq; 2354 srq = qp->srq;
2355 if (!srq) 2355 if (!srq)
2356 return -EINVAL; 2356 return -EINVAL;
2357 if (wr_id_idx > srq->hwq.max_elements) { 2357 if (wr_id_idx >= srq->hwq.max_elements) {
2358 dev_err(&cq->hwq.pdev->dev, 2358 dev_err(&cq->hwq.pdev->dev,
2359 "QPLIB: FP: CQ Process RC "); 2359 "QPLIB: FP: CQ Process RC ");
2360 dev_err(&cq->hwq.pdev->dev, 2360 dev_err(&cq->hwq.pdev->dev,
@@ -2369,7 +2369,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
2369 *pcqe = cqe; 2369 *pcqe = cqe;
2370 } else { 2370 } else {
2371 rq = &qp->rq; 2371 rq = &qp->rq;
2372 if (wr_id_idx > rq->hwq.max_elements) { 2372 if (wr_id_idx >= rq->hwq.max_elements) {
2373 dev_err(&cq->hwq.pdev->dev, 2373 dev_err(&cq->hwq.pdev->dev,
2374 "QPLIB: FP: CQ Process RC "); 2374 "QPLIB: FP: CQ Process RC ");
2375 dev_err(&cq->hwq.pdev->dev, 2375 dev_err(&cq->hwq.pdev->dev,
@@ -2437,7 +2437,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
2437 if (!srq) 2437 if (!srq)
2438 return -EINVAL; 2438 return -EINVAL;
2439 2439
2440 if (wr_id_idx > srq->hwq.max_elements) { 2440 if (wr_id_idx >= srq->hwq.max_elements) {
2441 dev_err(&cq->hwq.pdev->dev, 2441 dev_err(&cq->hwq.pdev->dev,
2442 "QPLIB: FP: CQ Process UD "); 2442 "QPLIB: FP: CQ Process UD ");
2443 dev_err(&cq->hwq.pdev->dev, 2443 dev_err(&cq->hwq.pdev->dev,
@@ -2452,7 +2452,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
2452 *pcqe = cqe; 2452 *pcqe = cqe;
2453 } else { 2453 } else {
2454 rq = &qp->rq; 2454 rq = &qp->rq;
2455 if (wr_id_idx > rq->hwq.max_elements) { 2455 if (wr_id_idx >= rq->hwq.max_elements) {
2456 dev_err(&cq->hwq.pdev->dev, 2456 dev_err(&cq->hwq.pdev->dev,
2457 "QPLIB: FP: CQ Process UD "); 2457 "QPLIB: FP: CQ Process UD ");
2458 dev_err(&cq->hwq.pdev->dev, 2458 dev_err(&cq->hwq.pdev->dev,
@@ -2546,7 +2546,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
2546 "QPLIB: FP: SRQ used but not defined??"); 2546 "QPLIB: FP: SRQ used but not defined??");
2547 return -EINVAL; 2547 return -EINVAL;
2548 } 2548 }
2549 if (wr_id_idx > srq->hwq.max_elements) { 2549 if (wr_id_idx >= srq->hwq.max_elements) {
2550 dev_err(&cq->hwq.pdev->dev, 2550 dev_err(&cq->hwq.pdev->dev,
2551 "QPLIB: FP: CQ Process Raw/QP1 "); 2551 "QPLIB: FP: CQ Process Raw/QP1 ");
2552 dev_err(&cq->hwq.pdev->dev, 2552 dev_err(&cq->hwq.pdev->dev,
@@ -2561,7 +2561,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
2561 *pcqe = cqe; 2561 *pcqe = cqe;
2562 } else { 2562 } else {
2563 rq = &qp->rq; 2563 rq = &qp->rq;
2564 if (wr_id_idx > rq->hwq.max_elements) { 2564 if (wr_id_idx >= rq->hwq.max_elements) {
2565 dev_err(&cq->hwq.pdev->dev, 2565 dev_err(&cq->hwq.pdev->dev,
2566 "QPLIB: FP: CQ Process Raw/QP1 RQ wr_id "); 2566 "QPLIB: FP: CQ Process Raw/QP1 RQ wr_id ");
2567 dev_err(&cq->hwq.pdev->dev, 2567 dev_err(&cq->hwq.pdev->dev,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 2f3f32eaa1d5..4097f3fa25c5 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -197,7 +197,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
197 struct bnxt_qplib_sgid_tbl *sgid_tbl, int index, 197 struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
198 struct bnxt_qplib_gid *gid) 198 struct bnxt_qplib_gid *gid)
199{ 199{
200 if (index > sgid_tbl->max) { 200 if (index >= sgid_tbl->max) {
201 dev_err(&res->pdev->dev, 201 dev_err(&res->pdev->dev,
202 "QPLIB: Index %d exceeded SGID table max (%d)", 202 "QPLIB: Index %d exceeded SGID table max (%d)",
203 index, sgid_tbl->max); 203 index, sgid_tbl->max);
@@ -402,7 +402,7 @@ int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
402 *pkey = 0xFFFF; 402 *pkey = 0xFFFF;
403 return 0; 403 return 0;
404 } 404 }
405 if (index > pkey_tbl->max) { 405 if (index >= pkey_tbl->max) {
406 dev_err(&res->pdev->dev, 406 dev_err(&res->pdev->dev,
407 "QPLIB: Index %d exceeded PKEY table max (%d)", 407 "QPLIB: Index %d exceeded PKEY table max (%d)",
408 index, pkey_tbl->max); 408 index, pkey_tbl->max);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index 0a8542c20804..a098c0140580 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -32,38 +32,16 @@
32#include "iwch_provider.h" 32#include "iwch_provider.h"
33#include "iwch.h" 33#include "iwch.h"
34 34
35/* 35static int __iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
36 * Get one cq entry from cxio and map it to openib. 36 struct iwch_qp *qhp, struct ib_wc *wc)
37 *
38 * Returns:
39 * 0 EMPTY;
40 * 1 cqe returned
41 * -EAGAIN caller must try again
42 * any other -errno fatal error
43 */
44static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
45 struct ib_wc *wc)
46{ 37{
47 struct iwch_qp *qhp = NULL; 38 struct t3_wq *wq = qhp ? &qhp->wq : NULL;
48 struct t3_cqe cqe, *rd_cqe; 39 struct t3_cqe cqe;
49 struct t3_wq *wq;
50 u32 credit = 0; 40 u32 credit = 0;
51 u8 cqe_flushed; 41 u8 cqe_flushed;
52 u64 cookie; 42 u64 cookie;
53 int ret = 1; 43 int ret = 1;
54 44
55 rd_cqe = cxio_next_cqe(&chp->cq);
56
57 if (!rd_cqe)
58 return 0;
59
60 qhp = get_qhp(rhp, CQE_QPID(*rd_cqe));
61 if (!qhp)
62 wq = NULL;
63 else {
64 spin_lock(&qhp->lock);
65 wq = &(qhp->wq);
66 }
67 ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, 45 ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie,
68 &credit); 46 &credit);
69 if (t3a_device(chp->rhp) && credit) { 47 if (t3a_device(chp->rhp) && credit) {
@@ -79,7 +57,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
79 ret = 1; 57 ret = 1;
80 58
81 wc->wr_id = cookie; 59 wc->wr_id = cookie;
82 wc->qp = &qhp->ibqp; 60 wc->qp = qhp ? &qhp->ibqp : NULL;
83 wc->vendor_err = CQE_STATUS(cqe); 61 wc->vendor_err = CQE_STATUS(cqe);
84 wc->wc_flags = 0; 62 wc->wc_flags = 0;
85 63
@@ -182,8 +160,38 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
182 } 160 }
183 } 161 }
184out: 162out:
185 if (wq) 163 return ret;
164}
165
166/*
167 * Get one cq entry from cxio and map it to openib.
168 *
169 * Returns:
170 * 0 EMPTY;
171 * 1 cqe returned
172 * -EAGAIN caller must try again
173 * any other -errno fatal error
174 */
175static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
176 struct ib_wc *wc)
177{
178 struct iwch_qp *qhp;
179 struct t3_cqe *rd_cqe;
180 int ret;
181
182 rd_cqe = cxio_next_cqe(&chp->cq);
183
184 if (!rd_cqe)
185 return 0;
186
187 qhp = get_qhp(rhp, CQE_QPID(*rd_cqe));
188 if (qhp) {
189 spin_lock(&qhp->lock);
190 ret = __iwch_poll_cq_one(rhp, chp, qhp, wc);
186 spin_unlock(&qhp->lock); 191 spin_unlock(&qhp->lock);
192 } else {
193 ret = __iwch_poll_cq_one(rhp, chp, NULL, wc);
194 }
187 return ret; 195 return ret;
188} 196}
189 197
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index be097c6723c0..1b9ff21aa1d5 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -61,42 +61,6 @@
61#include <rdma/cxgb3-abi.h> 61#include <rdma/cxgb3-abi.h>
62#include "common.h" 62#include "common.h"
63 63
64static struct ib_ah *iwch_ah_create(struct ib_pd *pd,
65 struct rdma_ah_attr *ah_attr,
66 struct ib_udata *udata)
67{
68 return ERR_PTR(-ENOSYS);
69}
70
71static int iwch_ah_destroy(struct ib_ah *ah)
72{
73 return -ENOSYS;
74}
75
76static int iwch_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
77{
78 return -ENOSYS;
79}
80
81static int iwch_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
82{
83 return -ENOSYS;
84}
85
86static int iwch_process_mad(struct ib_device *ibdev,
87 int mad_flags,
88 u8 port_num,
89 const struct ib_wc *in_wc,
90 const struct ib_grh *in_grh,
91 const struct ib_mad_hdr *in_mad,
92 size_t in_mad_size,
93 struct ib_mad_hdr *out_mad,
94 size_t *out_mad_size,
95 u16 *out_mad_pkey_index)
96{
97 return -ENOSYS;
98}
99
100static int iwch_dealloc_ucontext(struct ib_ucontext *context) 64static int iwch_dealloc_ucontext(struct ib_ucontext *context)
101{ 65{
102 struct iwch_dev *rhp = to_iwch_dev(context->device); 66 struct iwch_dev *rhp = to_iwch_dev(context->device);
@@ -1103,7 +1067,8 @@ static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
1103 props->max_mr_size = dev->attr.max_mr_size; 1067 props->max_mr_size = dev->attr.max_mr_size;
1104 props->max_qp = dev->attr.max_qps; 1068 props->max_qp = dev->attr.max_qps;
1105 props->max_qp_wr = dev->attr.max_wrs; 1069 props->max_qp_wr = dev->attr.max_wrs;
1106 props->max_sge = dev->attr.max_sge_per_wr; 1070 props->max_send_sge = dev->attr.max_sge_per_wr;
1071 props->max_recv_sge = dev->attr.max_sge_per_wr;
1107 props->max_sge_rd = 1; 1072 props->max_sge_rd = 1;
1108 props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; 1073 props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp;
1109 props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; 1074 props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp;
@@ -1398,8 +1363,6 @@ int iwch_register_device(struct iwch_dev *dev)
1398 dev->ibdev.mmap = iwch_mmap; 1363 dev->ibdev.mmap = iwch_mmap;
1399 dev->ibdev.alloc_pd = iwch_allocate_pd; 1364 dev->ibdev.alloc_pd = iwch_allocate_pd;
1400 dev->ibdev.dealloc_pd = iwch_deallocate_pd; 1365 dev->ibdev.dealloc_pd = iwch_deallocate_pd;
1401 dev->ibdev.create_ah = iwch_ah_create;
1402 dev->ibdev.destroy_ah = iwch_ah_destroy;
1403 dev->ibdev.create_qp = iwch_create_qp; 1366 dev->ibdev.create_qp = iwch_create_qp;
1404 dev->ibdev.modify_qp = iwch_ib_modify_qp; 1367 dev->ibdev.modify_qp = iwch_ib_modify_qp;
1405 dev->ibdev.destroy_qp = iwch_destroy_qp; 1368 dev->ibdev.destroy_qp = iwch_destroy_qp;
@@ -1414,9 +1377,6 @@ int iwch_register_device(struct iwch_dev *dev)
1414 dev->ibdev.dealloc_mw = iwch_dealloc_mw; 1377 dev->ibdev.dealloc_mw = iwch_dealloc_mw;
1415 dev->ibdev.alloc_mr = iwch_alloc_mr; 1378 dev->ibdev.alloc_mr = iwch_alloc_mr;
1416 dev->ibdev.map_mr_sg = iwch_map_mr_sg; 1379 dev->ibdev.map_mr_sg = iwch_map_mr_sg;
1417 dev->ibdev.attach_mcast = iwch_multicast_attach;
1418 dev->ibdev.detach_mcast = iwch_multicast_detach;
1419 dev->ibdev.process_mad = iwch_process_mad;
1420 dev->ibdev.req_notify_cq = iwch_arm_cq; 1380 dev->ibdev.req_notify_cq = iwch_arm_cq;
1421 dev->ibdev.post_send = iwch_post_send; 1381 dev->ibdev.post_send = iwch_post_send;
1422 dev->ibdev.post_recv = iwch_post_receive; 1382 dev->ibdev.post_recv = iwch_post_receive;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 2e38ddefea8a..8adbe9658935 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -326,10 +326,10 @@ enum iwch_qp_query_flags {
326}; 326};
327 327
328u16 iwch_rqes_posted(struct iwch_qp *qhp); 328u16 iwch_rqes_posted(struct iwch_qp *qhp);
329int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 329int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
330 struct ib_send_wr **bad_wr); 330 const struct ib_send_wr **bad_wr);
331int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 331int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
332 struct ib_recv_wr **bad_wr); 332 const struct ib_recv_wr **bad_wr);
333int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); 333int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
334int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); 334int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
335int iwch_post_zb_read(struct iwch_ep *ep); 335int iwch_post_zb_read(struct iwch_ep *ep);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 3871e1fd8395..c649faad63f9 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -39,8 +39,8 @@
39 39
40#define NO_SUPPORT -1 40#define NO_SUPPORT -1
41 41
42static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, 42static int build_rdma_send(union t3_wr *wqe, const struct ib_send_wr *wr,
43 u8 * flit_cnt) 43 u8 *flit_cnt)
44{ 44{
45 int i; 45 int i;
46 u32 plen; 46 u32 plen;
@@ -84,8 +84,8 @@ static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
84 return 0; 84 return 0;
85} 85}
86 86
87static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, 87static int build_rdma_write(union t3_wr *wqe, const struct ib_send_wr *wr,
88 u8 *flit_cnt) 88 u8 *flit_cnt)
89{ 89{
90 int i; 90 int i;
91 u32 plen; 91 u32 plen;
@@ -125,8 +125,8 @@ static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
125 return 0; 125 return 0;
126} 126}
127 127
128static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, 128static int build_rdma_read(union t3_wr *wqe, const struct ib_send_wr *wr,
129 u8 *flit_cnt) 129 u8 *flit_cnt)
130{ 130{
131 if (wr->num_sge > 1) 131 if (wr->num_sge > 1)
132 return -EINVAL; 132 return -EINVAL;
@@ -146,8 +146,8 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
146 return 0; 146 return 0;
147} 147}
148 148
149static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr, 149static int build_memreg(union t3_wr *wqe, const struct ib_reg_wr *wr,
150 u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) 150 u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
151{ 151{
152 struct iwch_mr *mhp = to_iwch_mr(wr->mr); 152 struct iwch_mr *mhp = to_iwch_mr(wr->mr);
153 int i; 153 int i;
@@ -189,8 +189,8 @@ static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr,
189 return 0; 189 return 0;
190} 190}
191 191
192static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr, 192static int build_inv_stag(union t3_wr *wqe, const struct ib_send_wr *wr,
193 u8 *flit_cnt) 193 u8 *flit_cnt)
194{ 194{
195 wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey); 195 wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey);
196 wqe->local_inv.reserved = 0; 196 wqe->local_inv.reserved = 0;
@@ -246,7 +246,7 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
246} 246}
247 247
248static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, 248static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
249 struct ib_recv_wr *wr) 249 const struct ib_recv_wr *wr)
250{ 250{
251 int i, err = 0; 251 int i, err = 0;
252 u32 pbl_addr[T3_MAX_SGE]; 252 u32 pbl_addr[T3_MAX_SGE];
@@ -286,7 +286,7 @@ static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
286} 286}
287 287
288static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, 288static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
289 struct ib_recv_wr *wr) 289 const struct ib_recv_wr *wr)
290{ 290{
291 int i; 291 int i;
292 u32 pbl_addr; 292 u32 pbl_addr;
@@ -348,8 +348,8 @@ static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
348 return 0; 348 return 0;
349} 349}
350 350
351int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 351int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
352 struct ib_send_wr **bad_wr) 352 const struct ib_send_wr **bad_wr)
353{ 353{
354 int err = 0; 354 int err = 0;
355 u8 uninitialized_var(t3_wr_flit_cnt); 355 u8 uninitialized_var(t3_wr_flit_cnt);
@@ -463,8 +463,8 @@ out:
463 return err; 463 return err;
464} 464}
465 465
466int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 466int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
467 struct ib_recv_wr **bad_wr) 467 const struct ib_recv_wr **bad_wr)
468{ 468{
469 int err = 0; 469 int err = 0;
470 struct iwch_qp *qhp; 470 struct iwch_qp *qhp;
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 0912fa026327..0f83cbec33f3 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -587,24 +587,29 @@ static int send_flowc(struct c4iw_ep *ep)
587{ 587{
588 struct fw_flowc_wr *flowc; 588 struct fw_flowc_wr *flowc;
589 struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); 589 struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
590 int i;
591 u16 vlan = ep->l2t->vlan; 590 u16 vlan = ep->l2t->vlan;
592 int nparams; 591 int nparams;
592 int flowclen, flowclen16;
593 593
594 if (WARN_ON(!skb)) 594 if (WARN_ON(!skb))
595 return -ENOMEM; 595 return -ENOMEM;
596 596
597 if (vlan == CPL_L2T_VLAN_NONE) 597 if (vlan == CPL_L2T_VLAN_NONE)
598 nparams = 8;
599 else
600 nparams = 9; 598 nparams = 9;
599 else
600 nparams = 10;
601 601
602 flowc = __skb_put(skb, FLOWC_LEN); 602 flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
603 flowclen16 = DIV_ROUND_UP(flowclen, 16);
604 flowclen = flowclen16 * 16;
605
606 flowc = __skb_put(skb, flowclen);
607 memset(flowc, 0, flowclen);
603 608
604 flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | 609 flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
605 FW_FLOWC_WR_NPARAMS_V(nparams)); 610 FW_FLOWC_WR_NPARAMS_V(nparams));
606 flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(FLOWC_LEN, 611 flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
607 16)) | FW_WR_FLOWID_V(ep->hwtid)); 612 FW_WR_FLOWID_V(ep->hwtid));
608 613
609 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 614 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
610 flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V 615 flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V
@@ -623,21 +628,13 @@ static int send_flowc(struct c4iw_ep *ep)
623 flowc->mnemval[6].val = cpu_to_be32(ep->snd_win); 628 flowc->mnemval[6].val = cpu_to_be32(ep->snd_win);
624 flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; 629 flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
625 flowc->mnemval[7].val = cpu_to_be32(ep->emss); 630 flowc->mnemval[7].val = cpu_to_be32(ep->emss);
626 if (nparams == 9) { 631 flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_RCV_SCALE;
632 flowc->mnemval[8].val = cpu_to_be32(ep->snd_wscale);
633 if (nparams == 10) {
627 u16 pri; 634 u16 pri;
628
629 pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; 635 pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
630 flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; 636 flowc->mnemval[9].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
631 flowc->mnemval[8].val = cpu_to_be32(pri); 637 flowc->mnemval[9].val = cpu_to_be32(pri);
632 } else {
633 /* Pad WR to 16 byte boundary */
634 flowc->mnemval[8].mnemonic = 0;
635 flowc->mnemval[8].val = 0;
636 }
637 for (i = 0; i < 9; i++) {
638 flowc->mnemval[i].r4[0] = 0;
639 flowc->mnemval[i].r4[1] = 0;
640 flowc->mnemval[i].r4[2] = 0;
641 } 638 }
642 639
643 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); 640 set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
@@ -1176,6 +1173,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
1176{ 1173{
1177 struct c4iw_ep *ep; 1174 struct c4iw_ep *ep;
1178 struct cpl_act_establish *req = cplhdr(skb); 1175 struct cpl_act_establish *req = cplhdr(skb);
1176 unsigned short tcp_opt = ntohs(req->tcp_opt);
1179 unsigned int tid = GET_TID(req); 1177 unsigned int tid = GET_TID(req);
1180 unsigned int atid = TID_TID_G(ntohl(req->tos_atid)); 1178 unsigned int atid = TID_TID_G(ntohl(req->tos_atid));
1181 struct tid_info *t = dev->rdev.lldi.tids; 1179 struct tid_info *t = dev->rdev.lldi.tids;
@@ -1196,8 +1194,9 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
1196 1194
1197 ep->snd_seq = be32_to_cpu(req->snd_isn); 1195 ep->snd_seq = be32_to_cpu(req->snd_isn);
1198 ep->rcv_seq = be32_to_cpu(req->rcv_isn); 1196 ep->rcv_seq = be32_to_cpu(req->rcv_isn);
1197 ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
1199 1198
1200 set_emss(ep, ntohs(req->tcp_opt)); 1199 set_emss(ep, tcp_opt);
1201 1200
1202 /* dealloc the atid */ 1201 /* dealloc the atid */
1203 remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); 1202 remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid);
@@ -1853,10 +1852,33 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
1853 return 0; 1852 return 0;
1854} 1853}
1855 1854
1855static void complete_cached_srq_buffers(struct c4iw_ep *ep,
1856 __be32 srqidx_status)
1857{
1858 enum chip_type adapter_type;
1859 u32 srqidx;
1860
1861 adapter_type = ep->com.dev->rdev.lldi.adapter_type;
1862 srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(srqidx_status));
1863
1864 /*
1865 * If this TCB had a srq buffer cached, then we must complete
1866 * it. For user mode, that means saving the srqidx in the
1867 * user/kernel status page for this qp. For kernel mode, just
1868 * synthesize the CQE now.
1869 */
1870 if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) {
1871 if (ep->com.qp->ibqp.uobject)
1872 t4_set_wq_in_error(&ep->com.qp->wq, srqidx);
1873 else
1874 c4iw_flush_srqidx(ep->com.qp, srqidx);
1875 }
1876}
1877
1856static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 1878static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1857{ 1879{
1858 struct c4iw_ep *ep; 1880 struct c4iw_ep *ep;
1859 struct cpl_abort_rpl_rss *rpl = cplhdr(skb); 1881 struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb);
1860 int release = 0; 1882 int release = 0;
1861 unsigned int tid = GET_TID(rpl); 1883 unsigned int tid = GET_TID(rpl);
1862 1884
@@ -1865,6 +1887,9 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1865 pr_warn("Abort rpl to freed endpoint\n"); 1887 pr_warn("Abort rpl to freed endpoint\n");
1866 return 0; 1888 return 0;
1867 } 1889 }
1890
1891 complete_cached_srq_buffers(ep, rpl->srqidx_status);
1892
1868 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 1893 pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1869 mutex_lock(&ep->com.mutex); 1894 mutex_lock(&ep->com.mutex);
1870 switch (ep->com.state) { 1895 switch (ep->com.state) {
@@ -2603,16 +2628,17 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
2603 struct cpl_pass_establish *req = cplhdr(skb); 2628 struct cpl_pass_establish *req = cplhdr(skb);
2604 unsigned int tid = GET_TID(req); 2629 unsigned int tid = GET_TID(req);
2605 int ret; 2630 int ret;
2631 u16 tcp_opt = ntohs(req->tcp_opt);
2606 2632
2607 ep = get_ep_from_tid(dev, tid); 2633 ep = get_ep_from_tid(dev, tid);
2608 pr_debug("ep %p tid %u\n", ep, ep->hwtid); 2634 pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2609 ep->snd_seq = be32_to_cpu(req->snd_isn); 2635 ep->snd_seq = be32_to_cpu(req->snd_isn);
2610 ep->rcv_seq = be32_to_cpu(req->rcv_isn); 2636 ep->rcv_seq = be32_to_cpu(req->rcv_isn);
2637 ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
2611 2638
2612 pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, 2639 pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, tcp_opt);
2613 ntohs(req->tcp_opt));
2614 2640
2615 set_emss(ep, ntohs(req->tcp_opt)); 2641 set_emss(ep, tcp_opt);
2616 2642
2617 dst_confirm(ep->dst); 2643 dst_confirm(ep->dst);
2618 mutex_lock(&ep->com.mutex); 2644 mutex_lock(&ep->com.mutex);
@@ -2719,28 +2745,35 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
2719 2745
2720static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) 2746static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2721{ 2747{
2722 struct cpl_abort_req_rss *req = cplhdr(skb); 2748 struct cpl_abort_req_rss6 *req = cplhdr(skb);
2723 struct c4iw_ep *ep; 2749 struct c4iw_ep *ep;
2724 struct sk_buff *rpl_skb; 2750 struct sk_buff *rpl_skb;
2725 struct c4iw_qp_attributes attrs; 2751 struct c4iw_qp_attributes attrs;
2726 int ret; 2752 int ret;
2727 int release = 0; 2753 int release = 0;
2728 unsigned int tid = GET_TID(req); 2754 unsigned int tid = GET_TID(req);
2755 u8 status;
2756
2729 u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); 2757 u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
2730 2758
2731 ep = get_ep_from_tid(dev, tid); 2759 ep = get_ep_from_tid(dev, tid);
2732 if (!ep) 2760 if (!ep)
2733 return 0; 2761 return 0;
2734 2762
2735 if (cxgb_is_neg_adv(req->status)) { 2763 status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status));
2764
2765 if (cxgb_is_neg_adv(status)) {
2736 pr_debug("Negative advice on abort- tid %u status %d (%s)\n", 2766 pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
2737 ep->hwtid, req->status, neg_adv_str(req->status)); 2767 ep->hwtid, status, neg_adv_str(status));
2738 ep->stats.abort_neg_adv++; 2768 ep->stats.abort_neg_adv++;
2739 mutex_lock(&dev->rdev.stats.lock); 2769 mutex_lock(&dev->rdev.stats.lock);
2740 dev->rdev.stats.neg_adv++; 2770 dev->rdev.stats.neg_adv++;
2741 mutex_unlock(&dev->rdev.stats.lock); 2771 mutex_unlock(&dev->rdev.stats.lock);
2742 goto deref_ep; 2772 goto deref_ep;
2743 } 2773 }
2774
2775 complete_cached_srq_buffers(ep, req->srqidx_status);
2776
2744 pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, 2777 pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid,
2745 ep->com.state); 2778 ep->com.state);
2746 set_bit(PEER_ABORT, &ep->com.history); 2779 set_bit(PEER_ABORT, &ep->com.history);
@@ -3444,9 +3477,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
3444 } 3477 }
3445 insert_handle(dev, &dev->stid_idr, ep, ep->stid); 3478 insert_handle(dev, &dev->stid_idr, ep, ep->stid);
3446 3479
3447 memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3448 sizeof(ep->com.local_addr));
3449
3450 state_set(&ep->com, LISTEN); 3480 state_set(&ep->com, LISTEN);
3451 if (ep->com.local_addr.ss_family == AF_INET) 3481 if (ep->com.local_addr.ss_family == AF_INET)
3452 err = create_server4(dev, ep); 3482 err = create_server4(dev, ep);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 2be2e1ac1b5f..6d3042794094 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -77,6 +77,10 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
77 int user = (uctx != &rdev->uctx); 77 int user = (uctx != &rdev->uctx);
78 int ret; 78 int ret;
79 struct sk_buff *skb; 79 struct sk_buff *skb;
80 struct c4iw_ucontext *ucontext = NULL;
81
82 if (user)
83 ucontext = container_of(uctx, struct c4iw_ucontext, uctx);
80 84
81 cq->cqid = c4iw_get_cqid(rdev, uctx); 85 cq->cqid = c4iw_get_cqid(rdev, uctx);
82 if (!cq->cqid) { 86 if (!cq->cqid) {
@@ -100,6 +104,16 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
100 dma_unmap_addr_set(cq, mapping, cq->dma_addr); 104 dma_unmap_addr_set(cq, mapping, cq->dma_addr);
101 memset(cq->queue, 0, cq->memsize); 105 memset(cq->queue, 0, cq->memsize);
102 106
107 if (user && ucontext->is_32b_cqe) {
108 cq->qp_errp = &((struct t4_status_page *)
109 ((u8 *)cq->queue + (cq->size - 1) *
110 (sizeof(*cq->queue) / 2)))->qp_err;
111 } else {
112 cq->qp_errp = &((struct t4_status_page *)
113 ((u8 *)cq->queue + (cq->size - 1) *
114 sizeof(*cq->queue)))->qp_err;
115 }
116
103 /* build fw_ri_res_wr */ 117 /* build fw_ri_res_wr */
104 wr_len = sizeof *res_wr + sizeof *res; 118 wr_len = sizeof *res_wr + sizeof *res;
105 119
@@ -132,7 +146,9 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
132 FW_RI_RES_WR_IQPCIECH_V(2) | 146 FW_RI_RES_WR_IQPCIECH_V(2) |
133 FW_RI_RES_WR_IQINTCNTTHRESH_V(0) | 147 FW_RI_RES_WR_IQINTCNTTHRESH_V(0) |
134 FW_RI_RES_WR_IQO_F | 148 FW_RI_RES_WR_IQO_F |
135 FW_RI_RES_WR_IQESIZE_V(1)); 149 ((user && ucontext->is_32b_cqe) ?
150 FW_RI_RES_WR_IQESIZE_V(1) :
151 FW_RI_RES_WR_IQESIZE_V(2)));
136 res->u.cq.iqsize = cpu_to_be16(cq->size); 152 res->u.cq.iqsize = cpu_to_be16(cq->size);
137 res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr); 153 res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr);
138 154
@@ -166,7 +182,7 @@ err1:
166 return ret; 182 return ret;
167} 183}
168 184
169static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) 185static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq, u32 srqidx)
170{ 186{
171 struct t4_cqe cqe; 187 struct t4_cqe cqe;
172 188
@@ -179,6 +195,8 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
179 CQE_SWCQE_V(1) | 195 CQE_SWCQE_V(1) |
180 CQE_QPID_V(wq->sq.qid)); 196 CQE_QPID_V(wq->sq.qid));
181 cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); 197 cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
198 if (srqidx)
199 cqe.u.srcqe.abs_rqe_idx = cpu_to_be32(srqidx);
182 cq->sw_queue[cq->sw_pidx] = cqe; 200 cq->sw_queue[cq->sw_pidx] = cqe;
183 t4_swcq_produce(cq); 201 t4_swcq_produce(cq);
184} 202}
@@ -191,7 +209,7 @@ int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count)
191 pr_debug("wq %p cq %p rq.in_use %u skip count %u\n", 209 pr_debug("wq %p cq %p rq.in_use %u skip count %u\n",
192 wq, cq, wq->rq.in_use, count); 210 wq, cq, wq->rq.in_use, count);
193 while (in_use--) { 211 while (in_use--) {
194 insert_recv_cqe(wq, cq); 212 insert_recv_cqe(wq, cq, 0);
195 flushed++; 213 flushed++;
196 } 214 }
197 return flushed; 215 return flushed;
@@ -442,6 +460,72 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
442 pr_debug("cq %p count %d\n", cq, *count); 460 pr_debug("cq %p count %d\n", cq, *count);
443} 461}
444 462
463static void post_pending_srq_wrs(struct t4_srq *srq)
464{
465 struct t4_srq_pending_wr *pwr;
466 u16 idx = 0;
467
468 while (srq->pending_in_use) {
469 pwr = &srq->pending_wrs[srq->pending_cidx];
470 srq->sw_rq[srq->pidx].wr_id = pwr->wr_id;
471 srq->sw_rq[srq->pidx].valid = 1;
472
473 pr_debug("%s posting pending cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n",
474 __func__,
475 srq->cidx, srq->pidx, srq->wq_pidx,
476 srq->in_use, srq->size,
477 (unsigned long long)pwr->wr_id);
478
479 c4iw_copy_wr_to_srq(srq, &pwr->wqe, pwr->len16);
480 t4_srq_consume_pending_wr(srq);
481 t4_srq_produce(srq, pwr->len16);
482 idx += DIV_ROUND_UP(pwr->len16 * 16, T4_EQ_ENTRY_SIZE);
483 }
484
485 if (idx) {
486 t4_ring_srq_db(srq, idx, pwr->len16, &pwr->wqe);
487 srq->queue[srq->size].status.host_wq_pidx =
488 srq->wq_pidx;
489 }
490}
491
492static u64 reap_srq_cqe(struct t4_cqe *hw_cqe, struct t4_srq *srq)
493{
494 int rel_idx = CQE_ABS_RQE_IDX(hw_cqe) - srq->rqt_abs_idx;
495 u64 wr_id;
496
497 srq->sw_rq[rel_idx].valid = 0;
498 wr_id = srq->sw_rq[rel_idx].wr_id;
499
500 if (rel_idx == srq->cidx) {
501 pr_debug("%s in order cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n",
502 __func__, rel_idx, srq->cidx, srq->pidx,
503 srq->wq_pidx, srq->in_use, srq->size,
504 (unsigned long long)srq->sw_rq[rel_idx].wr_id);
505 t4_srq_consume(srq);
506 while (srq->ooo_count && !srq->sw_rq[srq->cidx].valid) {
507 pr_debug("%s eat ooo cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n",
508 __func__, srq->cidx, srq->pidx,
509 srq->wq_pidx, srq->in_use,
510 srq->size, srq->ooo_count,
511 (unsigned long long)
512 srq->sw_rq[srq->cidx].wr_id);
513 t4_srq_consume_ooo(srq);
514 }
515 if (srq->ooo_count == 0 && srq->pending_in_use)
516 post_pending_srq_wrs(srq);
517 } else {
518 pr_debug("%s ooo cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n",
519 __func__, rel_idx, srq->cidx,
520 srq->pidx, srq->wq_pidx,
521 srq->in_use, srq->size,
522 srq->ooo_count,
523 (unsigned long long)srq->sw_rq[rel_idx].wr_id);
524 t4_srq_produce_ooo(srq);
525 }
526 return wr_id;
527}
528
445/* 529/*
446 * poll_cq 530 * poll_cq
447 * 531 *
@@ -459,7 +543,8 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
459 * -EOVERFLOW CQ overflow detected. 543 * -EOVERFLOW CQ overflow detected.
460 */ 544 */
461static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, 545static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
462 u8 *cqe_flushed, u64 *cookie, u32 *credit) 546 u8 *cqe_flushed, u64 *cookie, u32 *credit,
547 struct t4_srq *srq)
463{ 548{
464 int ret = 0; 549 int ret = 0;
465 struct t4_cqe *hw_cqe, read_cqe; 550 struct t4_cqe *hw_cqe, read_cqe;
@@ -524,7 +609,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
524 */ 609 */
525 if (CQE_TYPE(hw_cqe) == 1) { 610 if (CQE_TYPE(hw_cqe) == 1) {
526 if (CQE_STATUS(hw_cqe)) 611 if (CQE_STATUS(hw_cqe))
527 t4_set_wq_in_error(wq); 612 t4_set_wq_in_error(wq, 0);
528 ret = -EAGAIN; 613 ret = -EAGAIN;
529 goto skip_cqe; 614 goto skip_cqe;
530 } 615 }
@@ -535,7 +620,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
535 */ 620 */
536 if (CQE_WRID_STAG(hw_cqe) == 1) { 621 if (CQE_WRID_STAG(hw_cqe) == 1) {
537 if (CQE_STATUS(hw_cqe)) 622 if (CQE_STATUS(hw_cqe))
538 t4_set_wq_in_error(wq); 623 t4_set_wq_in_error(wq, 0);
539 ret = -EAGAIN; 624 ret = -EAGAIN;
540 goto skip_cqe; 625 goto skip_cqe;
541 } 626 }
@@ -560,7 +645,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
560 645
561 if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { 646 if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
562 *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH); 647 *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);
563 t4_set_wq_in_error(wq); 648 t4_set_wq_in_error(wq, 0);
564 } 649 }
565 650
566 /* 651 /*
@@ -574,15 +659,9 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
574 * then we complete this with T4_ERR_MSN and mark the wq in 659 * then we complete this with T4_ERR_MSN and mark the wq in
575 * error. 660 * error.
576 */ 661 */
577
578 if (t4_rq_empty(wq)) {
579 t4_set_wq_in_error(wq);
580 ret = -EAGAIN;
581 goto skip_cqe;
582 }
583 if (unlikely(!CQE_STATUS(hw_cqe) && 662 if (unlikely(!CQE_STATUS(hw_cqe) &&
584 CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) { 663 CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) {
585 t4_set_wq_in_error(wq); 664 t4_set_wq_in_error(wq, 0);
586 hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN)); 665 hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN));
587 } 666 }
588 goto proc_cqe; 667 goto proc_cqe;
@@ -641,11 +720,16 @@ proc_cqe:
641 c4iw_log_wr_stats(wq, hw_cqe); 720 c4iw_log_wr_stats(wq, hw_cqe);
642 t4_sq_consume(wq); 721 t4_sq_consume(wq);
643 } else { 722 } else {
644 pr_debug("completing rq idx %u\n", wq->rq.cidx); 723 if (!srq) {
645 *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; 724 pr_debug("completing rq idx %u\n", wq->rq.cidx);
646 if (c4iw_wr_log) 725 *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
647 c4iw_log_wr_stats(wq, hw_cqe); 726 if (c4iw_wr_log)
648 t4_rq_consume(wq); 727 c4iw_log_wr_stats(wq, hw_cqe);
728 t4_rq_consume(wq);
729 } else {
730 *cookie = reap_srq_cqe(hw_cqe, srq);
731 }
732 wq->rq.msn++;
649 goto skip_cqe; 733 goto skip_cqe;
650 } 734 }
651 735
@@ -668,46 +752,33 @@ skip_cqe:
668 return ret; 752 return ret;
669} 753}
670 754
671/* 755static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp,
672 * Get one cq entry from c4iw and map it to openib. 756 struct ib_wc *wc, struct c4iw_srq *srq)
673 *
674 * Returns:
675 * 0 cqe returned
676 * -ENODATA EMPTY;
677 * -EAGAIN caller must try again
678 * any other -errno fatal error
679 */
680static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
681{ 757{
682 struct c4iw_qp *qhp = NULL; 758 struct t4_cqe uninitialized_var(cqe);
683 struct t4_cqe uninitialized_var(cqe), *rd_cqe; 759 struct t4_wq *wq = qhp ? &qhp->wq : NULL;
684 struct t4_wq *wq;
685 u32 credit = 0; 760 u32 credit = 0;
686 u8 cqe_flushed; 761 u8 cqe_flushed;
687 u64 cookie = 0; 762 u64 cookie = 0;
688 int ret; 763 int ret;
689 764
690 ret = t4_next_cqe(&chp->cq, &rd_cqe); 765 ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit,
691 766 srq ? &srq->wq : NULL);
692 if (ret)
693 return ret;
694
695 qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
696 if (!qhp)
697 wq = NULL;
698 else {
699 spin_lock(&qhp->lock);
700 wq = &(qhp->wq);
701 }
702 ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit);
703 if (ret) 767 if (ret)
704 goto out; 768 goto out;
705 769
706 wc->wr_id = cookie; 770 wc->wr_id = cookie;
707 wc->qp = &qhp->ibqp; 771 wc->qp = qhp ? &qhp->ibqp : NULL;
708 wc->vendor_err = CQE_STATUS(&cqe); 772 wc->vendor_err = CQE_STATUS(&cqe);
709 wc->wc_flags = 0; 773 wc->wc_flags = 0;
710 774
775 /*
776 * Simulate a SRQ_LIMIT_REACHED HW notification if required.
777 */
778 if (srq && !(srq->flags & T4_SRQ_LIMIT_SUPPORT) && srq->armed &&
779 srq->wq.in_use < srq->srq_limit)
780 c4iw_dispatch_srq_limit_reached_event(srq);
781
711 pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n", 782 pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n",
712 CQE_QPID(&cqe), 783 CQE_QPID(&cqe),
713 CQE_TYPE(&cqe), CQE_OPCODE(&cqe), 784 CQE_TYPE(&cqe), CQE_OPCODE(&cqe),
@@ -720,15 +791,32 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
720 wc->byte_len = CQE_LEN(&cqe); 791 wc->byte_len = CQE_LEN(&cqe);
721 else 792 else
722 wc->byte_len = 0; 793 wc->byte_len = 0;
723 wc->opcode = IB_WC_RECV; 794
724 if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV || 795 switch (CQE_OPCODE(&cqe)) {
725 CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { 796 case FW_RI_SEND:
797 wc->opcode = IB_WC_RECV;
798 break;
799 case FW_RI_SEND_WITH_INV:
800 case FW_RI_SEND_WITH_SE_INV:
801 wc->opcode = IB_WC_RECV;
726 wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); 802 wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
727 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 803 wc->wc_flags |= IB_WC_WITH_INVALIDATE;
728 c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); 804 c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
805 break;
806 case FW_RI_WRITE_IMMEDIATE:
807 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
808 wc->ex.imm_data = CQE_IMM_DATA(&cqe);
809 wc->wc_flags |= IB_WC_WITH_IMM;
810 break;
811 default:
812 pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
813 CQE_OPCODE(&cqe), CQE_QPID(&cqe));
814 ret = -EINVAL;
815 goto out;
729 } 816 }
730 } else { 817 } else {
731 switch (CQE_OPCODE(&cqe)) { 818 switch (CQE_OPCODE(&cqe)) {
819 case FW_RI_WRITE_IMMEDIATE:
732 case FW_RI_RDMA_WRITE: 820 case FW_RI_RDMA_WRITE:
733 wc->opcode = IB_WC_RDMA_WRITE; 821 wc->opcode = IB_WC_RDMA_WRITE;
734 break; 822 break;
@@ -819,8 +907,43 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
819 } 907 }
820 } 908 }
821out: 909out:
822 if (wq) 910 return ret;
911}
912
913/*
914 * Get one cq entry from c4iw and map it to openib.
915 *
916 * Returns:
917 * 0 cqe returned
918 * -ENODATA EMPTY;
919 * -EAGAIN caller must try again
920 * any other -errno fatal error
921 */
922static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
923{
924 struct c4iw_srq *srq = NULL;
925 struct c4iw_qp *qhp = NULL;
926 struct t4_cqe *rd_cqe;
927 int ret;
928
929 ret = t4_next_cqe(&chp->cq, &rd_cqe);
930
931 if (ret)
932 return ret;
933
934 qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
935 if (qhp) {
936 spin_lock(&qhp->lock);
937 srq = qhp->srq;
938 if (srq)
939 spin_lock(&srq->lock);
940 ret = __c4iw_poll_cq_one(chp, qhp, wc, srq);
823 spin_unlock(&qhp->lock); 941 spin_unlock(&qhp->lock);
942 if (srq)
943 spin_unlock(&srq->lock);
944 } else {
945 ret = __c4iw_poll_cq_one(chp, NULL, wc, NULL);
946 }
824 return ret; 947 return ret;
825} 948}
826 949
@@ -876,6 +999,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
876 int vector = attr->comp_vector; 999 int vector = attr->comp_vector;
877 struct c4iw_dev *rhp; 1000 struct c4iw_dev *rhp;
878 struct c4iw_cq *chp; 1001 struct c4iw_cq *chp;
1002 struct c4iw_create_cq ucmd;
879 struct c4iw_create_cq_resp uresp; 1003 struct c4iw_create_cq_resp uresp;
880 struct c4iw_ucontext *ucontext = NULL; 1004 struct c4iw_ucontext *ucontext = NULL;
881 int ret, wr_len; 1005 int ret, wr_len;
@@ -891,9 +1015,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
891 if (vector >= rhp->rdev.lldi.nciq) 1015 if (vector >= rhp->rdev.lldi.nciq)
892 return ERR_PTR(-EINVAL); 1016 return ERR_PTR(-EINVAL);
893 1017
1018 if (ib_context) {
1019 ucontext = to_c4iw_ucontext(ib_context);
1020 if (udata->inlen < sizeof(ucmd))
1021 ucontext->is_32b_cqe = 1;
1022 }
1023
894 chp = kzalloc(sizeof(*chp), GFP_KERNEL); 1024 chp = kzalloc(sizeof(*chp), GFP_KERNEL);
895 if (!chp) 1025 if (!chp)
896 return ERR_PTR(-ENOMEM); 1026 return ERR_PTR(-ENOMEM);
1027
897 chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); 1028 chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
898 if (!chp->wr_waitp) { 1029 if (!chp->wr_waitp) {
899 ret = -ENOMEM; 1030 ret = -ENOMEM;
@@ -908,9 +1039,6 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
908 goto err_free_wr_wait; 1039 goto err_free_wr_wait;
909 } 1040 }
910 1041
911 if (ib_context)
912 ucontext = to_c4iw_ucontext(ib_context);
913
914 /* account for the status page. */ 1042 /* account for the status page. */
915 entries++; 1043 entries++;
916 1044
@@ -934,13 +1062,15 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
934 if (hwentries < 64) 1062 if (hwentries < 64)
935 hwentries = 64; 1063 hwentries = 64;
936 1064
937 memsize = hwentries * sizeof *chp->cq.queue; 1065 memsize = hwentries * ((ucontext && ucontext->is_32b_cqe) ?
1066 (sizeof(*chp->cq.queue) / 2) : sizeof(*chp->cq.queue));
938 1067
939 /* 1068 /*
940 * memsize must be a multiple of the page size if its a user cq. 1069 * memsize must be a multiple of the page size if its a user cq.
941 */ 1070 */
942 if (ucontext) 1071 if (ucontext)
943 memsize = roundup(memsize, PAGE_SIZE); 1072 memsize = roundup(memsize, PAGE_SIZE);
1073
944 chp->cq.size = hwentries; 1074 chp->cq.size = hwentries;
945 chp->cq.memsize = memsize; 1075 chp->cq.memsize = memsize;
946 chp->cq.vector = vector; 1076 chp->cq.vector = vector;
@@ -971,6 +1101,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
971 if (!mm2) 1101 if (!mm2)
972 goto err_free_mm; 1102 goto err_free_mm;
973 1103
1104 memset(&uresp, 0, sizeof(uresp));
974 uresp.qid_mask = rhp->rdev.cqmask; 1105 uresp.qid_mask = rhp->rdev.cqmask;
975 uresp.cqid = chp->cq.cqid; 1106 uresp.cqid = chp->cq.cqid;
976 uresp.size = chp->cq.size; 1107 uresp.size = chp->cq.size;
@@ -980,9 +1111,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
980 ucontext->key += PAGE_SIZE; 1111 ucontext->key += PAGE_SIZE;
981 uresp.gts_key = ucontext->key; 1112 uresp.gts_key = ucontext->key;
982 ucontext->key += PAGE_SIZE; 1113 ucontext->key += PAGE_SIZE;
1114 /* communicate to the userspace that
1115 * kernel driver supports 64B CQE
1116 */
1117 uresp.flags |= C4IW_64B_CQE;
1118
983 spin_unlock(&ucontext->mmap_lock); 1119 spin_unlock(&ucontext->mmap_lock);
984 ret = ib_copy_to_udata(udata, &uresp, 1120 ret = ib_copy_to_udata(udata, &uresp,
985 sizeof(uresp) - sizeof(uresp.reserved)); 1121 ucontext->is_32b_cqe ?
1122 sizeof(uresp) - sizeof(uresp.flags) :
1123 sizeof(uresp));
986 if (ret) 1124 if (ret)
987 goto err_free_mm2; 1125 goto err_free_mm2;
988 1126
@@ -1019,11 +1157,6 @@ err_free_chp:
1019 return ERR_PTR(ret); 1157 return ERR_PTR(ret);
1020} 1158}
1021 1159
1022int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
1023{
1024 return -ENOSYS;
1025}
1026
1027int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 1160int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
1028{ 1161{
1029 struct c4iw_cq *chp; 1162 struct c4iw_cq *chp;
@@ -1039,3 +1172,19 @@ int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
1039 spin_unlock_irqrestore(&chp->lock, flag); 1172 spin_unlock_irqrestore(&chp->lock, flag);
1040 return ret; 1173 return ret;
1041} 1174}
1175
1176void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx)
1177{
1178 struct c4iw_cq *rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
1179 unsigned long flag;
1180
1181 /* locking heirarchy: cq lock first, then qp lock. */
1182 spin_lock_irqsave(&rchp->lock, flag);
1183 spin_lock(&qhp->lock);
1184
1185 /* create a SRQ RECV CQE for srqidx */
1186 insert_recv_cqe(&qhp->wq, &rchp->cq, srqidx);
1187
1188 spin_unlock(&qhp->lock);
1189 spin_unlock_irqrestore(&rchp->lock, flag);
1190}
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index a3c3418afd73..c13c0ba30f63 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -275,10 +275,11 @@ static int dump_qp(int id, void *p, void *data)
275 275
276 set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin); 276 set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin);
277 cc = snprintf(qpd->buf + qpd->pos, space, 277 cc = snprintf(qpd->buf + qpd->pos, space,
278 "rc qp sq id %u rq id %u state %u " 278 "rc qp sq id %u %s id %u state %u "
279 "onchip %u ep tid %u state %u " 279 "onchip %u ep tid %u state %u "
280 "%pI4:%u/%u->%pI4:%u/%u\n", 280 "%pI4:%u/%u->%pI4:%u/%u\n",
281 qp->wq.sq.qid, qp->wq.rq.qid, 281 qp->wq.sq.qid, qp->srq ? "srq" : "rq",
282 qp->srq ? qp->srq->idx : qp->wq.rq.qid,
282 (int)qp->attr.state, 283 (int)qp->attr.state,
283 qp->wq.sq.flags & T4_SQ_ONCHIP, 284 qp->wq.sq.flags & T4_SQ_ONCHIP,
284 ep->hwtid, (int)ep->com.state, 285 ep->hwtid, (int)ep->com.state,
@@ -480,6 +481,9 @@ static int stats_show(struct seq_file *seq, void *v)
480 seq_printf(seq, " QID: %10llu %10llu %10llu %10llu\n", 481 seq_printf(seq, " QID: %10llu %10llu %10llu %10llu\n",
481 dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur, 482 dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur,
482 dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail); 483 dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail);
484 seq_printf(seq, " SRQS: %10llu %10llu %10llu %10llu\n",
485 dev->rdev.stats.srqt.total, dev->rdev.stats.srqt.cur,
486 dev->rdev.stats.srqt.max, dev->rdev.stats.srqt.fail);
483 seq_printf(seq, " TPTMEM: %10llu %10llu %10llu %10llu\n", 487 seq_printf(seq, " TPTMEM: %10llu %10llu %10llu %10llu\n",
484 dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur, 488 dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur,
485 dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail); 489 dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail);
@@ -530,6 +534,8 @@ static ssize_t stats_clear(struct file *file, const char __user *buf,
530 dev->rdev.stats.pbl.fail = 0; 534 dev->rdev.stats.pbl.fail = 0;
531 dev->rdev.stats.rqt.max = 0; 535 dev->rdev.stats.rqt.max = 0;
532 dev->rdev.stats.rqt.fail = 0; 536 dev->rdev.stats.rqt.fail = 0;
537 dev->rdev.stats.rqt.max = 0;
538 dev->rdev.stats.rqt.fail = 0;
533 dev->rdev.stats.ocqp.max = 0; 539 dev->rdev.stats.ocqp.max = 0;
534 dev->rdev.stats.ocqp.fail = 0; 540 dev->rdev.stats.ocqp.fail = 0;
535 dev->rdev.stats.db_full = 0; 541 dev->rdev.stats.db_full = 0;
@@ -802,7 +808,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
802 808
803 rdev->qpmask = rdev->lldi.udb_density - 1; 809 rdev->qpmask = rdev->lldi.udb_density - 1;
804 rdev->cqmask = rdev->lldi.ucq_density - 1; 810 rdev->cqmask = rdev->lldi.ucq_density - 1;
805 pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u\n", 811 pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u srq size %u\n",
806 pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start, 812 pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
807 rdev->lldi.vr->stag.size, c4iw_num_stags(rdev), 813 rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
808 rdev->lldi.vr->pbl.start, 814 rdev->lldi.vr->pbl.start,
@@ -811,7 +817,8 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
811 rdev->lldi.vr->qp.start, 817 rdev->lldi.vr->qp.start,
812 rdev->lldi.vr->qp.size, 818 rdev->lldi.vr->qp.size,
813 rdev->lldi.vr->cq.start, 819 rdev->lldi.vr->cq.start,
814 rdev->lldi.vr->cq.size); 820 rdev->lldi.vr->cq.size,
821 rdev->lldi.vr->srq.size);
815 pr_debug("udb %pR db_reg %p gts_reg %p qpmask 0x%x cqmask 0x%x\n", 822 pr_debug("udb %pR db_reg %p gts_reg %p qpmask 0x%x cqmask 0x%x\n",
816 &rdev->lldi.pdev->resource[2], 823 &rdev->lldi.pdev->resource[2],
817 rdev->lldi.db_reg, rdev->lldi.gts_reg, 824 rdev->lldi.db_reg, rdev->lldi.gts_reg,
@@ -824,10 +831,12 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
824 rdev->stats.stag.total = rdev->lldi.vr->stag.size; 831 rdev->stats.stag.total = rdev->lldi.vr->stag.size;
825 rdev->stats.pbl.total = rdev->lldi.vr->pbl.size; 832 rdev->stats.pbl.total = rdev->lldi.vr->pbl.size;
826 rdev->stats.rqt.total = rdev->lldi.vr->rq.size; 833 rdev->stats.rqt.total = rdev->lldi.vr->rq.size;
834 rdev->stats.srqt.total = rdev->lldi.vr->srq.size;
827 rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size; 835 rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size;
828 rdev->stats.qid.total = rdev->lldi.vr->qp.size; 836 rdev->stats.qid.total = rdev->lldi.vr->qp.size;
829 837
830 err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD); 838 err = c4iw_init_resource(rdev, c4iw_num_stags(rdev),
839 T4_MAX_NUM_PD, rdev->lldi.vr->srq.size);
831 if (err) { 840 if (err) {
832 pr_err("error %d initializing resources\n", err); 841 pr_err("error %d initializing resources\n", err);
833 return err; 842 return err;
@@ -857,6 +866,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
857 rdev->status_page->qp_size = rdev->lldi.vr->qp.size; 866 rdev->status_page->qp_size = rdev->lldi.vr->qp.size;
858 rdev->status_page->cq_start = rdev->lldi.vr->cq.start; 867 rdev->status_page->cq_start = rdev->lldi.vr->cq.start;
859 rdev->status_page->cq_size = rdev->lldi.vr->cq.size; 868 rdev->status_page->cq_size = rdev->lldi.vr->cq.size;
869 rdev->status_page->write_cmpl_supported = rdev->lldi.write_cmpl_support;
860 870
861 if (c4iw_wr_log) { 871 if (c4iw_wr_log) {
862 rdev->wr_log = kcalloc(1 << c4iw_wr_log_size_order, 872 rdev->wr_log = kcalloc(1 << c4iw_wr_log_size_order,
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index 3e9d8b277ab9..8741d23168f3 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -70,9 +70,10 @@ static void dump_err_cqe(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
70 CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), ntohl(err_cqe->len), 70 CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), ntohl(err_cqe->len),
71 CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); 71 CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe));
72 72
73 pr_debug("%016llx %016llx %016llx %016llx\n", 73 pr_debug("%016llx %016llx %016llx %016llx - %016llx %016llx %016llx %016llx\n",
74 be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]), 74 be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]),
75 be64_to_cpu(p[3])); 75 be64_to_cpu(p[3]), be64_to_cpu(p[4]), be64_to_cpu(p[5]),
76 be64_to_cpu(p[6]), be64_to_cpu(p[7]));
76 77
77 /* 78 /*
78 * Ingress WRITE and READ_RESP errors provide 79 * Ingress WRITE and READ_RESP errors provide
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 870649ff049c..f0fceadd0d12 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -97,6 +97,7 @@ struct c4iw_resource {
97 struct c4iw_id_table tpt_table; 97 struct c4iw_id_table tpt_table;
98 struct c4iw_id_table qid_table; 98 struct c4iw_id_table qid_table;
99 struct c4iw_id_table pdid_table; 99 struct c4iw_id_table pdid_table;
100 struct c4iw_id_table srq_table;
100}; 101};
101 102
102struct c4iw_qid_list { 103struct c4iw_qid_list {
@@ -130,6 +131,8 @@ struct c4iw_stats {
130 struct c4iw_stat stag; 131 struct c4iw_stat stag;
131 struct c4iw_stat pbl; 132 struct c4iw_stat pbl;
132 struct c4iw_stat rqt; 133 struct c4iw_stat rqt;
134 struct c4iw_stat srqt;
135 struct c4iw_stat srq;
133 struct c4iw_stat ocqp; 136 struct c4iw_stat ocqp;
134 u64 db_full; 137 u64 db_full;
135 u64 db_empty; 138 u64 db_empty;
@@ -549,6 +552,7 @@ struct c4iw_qp {
549 struct kref kref; 552 struct kref kref;
550 wait_queue_head_t wait; 553 wait_queue_head_t wait;
551 int sq_sig_all; 554 int sq_sig_all;
555 struct c4iw_srq *srq;
552 struct work_struct free_work; 556 struct work_struct free_work;
553 struct c4iw_ucontext *ucontext; 557 struct c4iw_ucontext *ucontext;
554 struct c4iw_wr_wait *wr_waitp; 558 struct c4iw_wr_wait *wr_waitp;
@@ -559,6 +563,26 @@ static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
559 return container_of(ibqp, struct c4iw_qp, ibqp); 563 return container_of(ibqp, struct c4iw_qp, ibqp);
560} 564}
561 565
566struct c4iw_srq {
567 struct ib_srq ibsrq;
568 struct list_head db_fc_entry;
569 struct c4iw_dev *rhp;
570 struct t4_srq wq;
571 struct sk_buff *destroy_skb;
572 u32 srq_limit;
573 u32 pdid;
574 int idx;
575 u32 flags;
576 spinlock_t lock; /* protects srq */
577 struct c4iw_wr_wait *wr_waitp;
578 bool armed;
579};
580
581static inline struct c4iw_srq *to_c4iw_srq(struct ib_srq *ibsrq)
582{
583 return container_of(ibsrq, struct c4iw_srq, ibsrq);
584}
585
562struct c4iw_ucontext { 586struct c4iw_ucontext {
563 struct ib_ucontext ibucontext; 587 struct ib_ucontext ibucontext;
564 struct c4iw_dev_ucontext uctx; 588 struct c4iw_dev_ucontext uctx;
@@ -566,6 +590,7 @@ struct c4iw_ucontext {
566 spinlock_t mmap_lock; 590 spinlock_t mmap_lock;
567 struct list_head mmaps; 591 struct list_head mmaps;
568 struct kref kref; 592 struct kref kref;
593 bool is_32b_cqe;
569}; 594};
570 595
571static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) 596static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c)
@@ -885,7 +910,10 @@ enum conn_pre_alloc_buffers {
885 CN_MAX_CON_BUF 910 CN_MAX_CON_BUF
886}; 911};
887 912
888#define FLOWC_LEN 80 913enum {
914 FLOWC_LEN = offsetof(struct fw_flowc_wr, mnemval[FW_FLOWC_MNEM_MAX])
915};
916
889union cpl_wr_size { 917union cpl_wr_size {
890 struct cpl_abort_req abrt_req; 918 struct cpl_abort_req abrt_req;
891 struct cpl_abort_rpl abrt_rpl; 919 struct cpl_abort_rpl abrt_rpl;
@@ -952,6 +980,7 @@ struct c4iw_ep {
952 unsigned int retry_count; 980 unsigned int retry_count;
953 int snd_win; 981 int snd_win;
954 int rcv_win; 982 int rcv_win;
983 u32 snd_wscale;
955 struct c4iw_ep_stats stats; 984 struct c4iw_ep_stats stats;
956}; 985};
957 986
@@ -988,7 +1017,8 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid,
988 struct c4iw_dev_ucontext *uctx); 1017 struct c4iw_dev_ucontext *uctx);
989u32 c4iw_get_resource(struct c4iw_id_table *id_table); 1018u32 c4iw_get_resource(struct c4iw_id_table *id_table);
990void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry); 1019void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry);
991int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid); 1020int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt,
1021 u32 nr_pdid, u32 nr_srqt);
992int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev); 1022int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev);
993int c4iw_pblpool_create(struct c4iw_rdev *rdev); 1023int c4iw_pblpool_create(struct c4iw_rdev *rdev);
994int c4iw_rqtpool_create(struct c4iw_rdev *rdev); 1024int c4iw_rqtpool_create(struct c4iw_rdev *rdev);
@@ -1007,10 +1037,10 @@ void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
1007void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev, 1037void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
1008 struct c4iw_dev_ucontext *uctx); 1038 struct c4iw_dev_ucontext *uctx);
1009int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); 1039int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
1010int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 1040int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
1011 struct ib_send_wr **bad_wr); 1041 const struct ib_send_wr **bad_wr);
1012int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 1042int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
1013 struct ib_recv_wr **bad_wr); 1043 const struct ib_recv_wr **bad_wr);
1014int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); 1044int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
1015int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog); 1045int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog);
1016int c4iw_destroy_listen(struct iw_cm_id *cm_id); 1046int c4iw_destroy_listen(struct iw_cm_id *cm_id);
@@ -1037,8 +1067,14 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
1037 const struct ib_cq_init_attr *attr, 1067 const struct ib_cq_init_attr *attr,
1038 struct ib_ucontext *ib_context, 1068 struct ib_ucontext *ib_context,
1039 struct ib_udata *udata); 1069 struct ib_udata *udata);
1040int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
1041int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); 1070int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
1071int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
1072 enum ib_srq_attr_mask srq_attr_mask,
1073 struct ib_udata *udata);
1074int c4iw_destroy_srq(struct ib_srq *ib_srq);
1075struct ib_srq *c4iw_create_srq(struct ib_pd *pd,
1076 struct ib_srq_init_attr *attrs,
1077 struct ib_udata *udata);
1042int c4iw_destroy_qp(struct ib_qp *ib_qp); 1078int c4iw_destroy_qp(struct ib_qp *ib_qp);
1043struct ib_qp *c4iw_create_qp(struct ib_pd *pd, 1079struct ib_qp *c4iw_create_qp(struct ib_pd *pd,
1044 struct ib_qp_init_attr *attrs, 1080 struct ib_qp_init_attr *attrs,
@@ -1075,12 +1111,19 @@ extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS];
1075void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, 1111void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
1076 enum cxgb4_bar2_qtype qtype, 1112 enum cxgb4_bar2_qtype qtype,
1077 unsigned int *pbar2_qid, u64 *pbar2_pa); 1113 unsigned int *pbar2_qid, u64 *pbar2_pa);
1114int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev);
1115void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx);
1078extern void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe); 1116extern void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe);
1079extern int c4iw_wr_log; 1117extern int c4iw_wr_log;
1080extern int db_fc_threshold; 1118extern int db_fc_threshold;
1081extern int db_coalescing_threshold; 1119extern int db_coalescing_threshold;
1082extern int use_dsgl; 1120extern int use_dsgl;
1083void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey); 1121void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey);
1122void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq);
1123void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16);
1124void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx);
1125int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
1126 const struct ib_recv_wr **bad_wr);
1084struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp); 1127struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp);
1085 1128
1086typedef int c4iw_restrack_func(struct sk_buff *msg, 1129typedef int c4iw_restrack_func(struct sk_buff *msg,
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 1feade8bb4b3..4eda6872e617 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -58,41 +58,6 @@ static int fastreg_support = 1;
58module_param(fastreg_support, int, 0644); 58module_param(fastreg_support, int, 0644);
59MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)"); 59MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)");
60 60
61static struct ib_ah *c4iw_ah_create(struct ib_pd *pd,
62 struct rdma_ah_attr *ah_attr,
63 struct ib_udata *udata)
64
65{
66 return ERR_PTR(-ENOSYS);
67}
68
69static int c4iw_ah_destroy(struct ib_ah *ah)
70{
71 return -ENOSYS;
72}
73
74static int c4iw_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
75{
76 return -ENOSYS;
77}
78
79static int c4iw_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
80{
81 return -ENOSYS;
82}
83
84static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags,
85 u8 port_num, const struct ib_wc *in_wc,
86 const struct ib_grh *in_grh,
87 const struct ib_mad_hdr *in_mad,
88 size_t in_mad_size,
89 struct ib_mad_hdr *out_mad,
90 size_t *out_mad_size,
91 u16 *out_mad_pkey_index)
92{
93 return -ENOSYS;
94}
95
96void _c4iw_free_ucontext(struct kref *kref) 61void _c4iw_free_ucontext(struct kref *kref)
97{ 62{
98 struct c4iw_ucontext *ucontext; 63 struct c4iw_ucontext *ucontext;
@@ -342,8 +307,12 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
342 props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device; 307 props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device;
343 props->max_mr_size = T4_MAX_MR_SIZE; 308 props->max_mr_size = T4_MAX_MR_SIZE;
344 props->max_qp = dev->rdev.lldi.vr->qp.size / 2; 309 props->max_qp = dev->rdev.lldi.vr->qp.size / 2;
310 props->max_srq = dev->rdev.lldi.vr->srq.size;
345 props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth; 311 props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth;
346 props->max_sge = T4_MAX_RECV_SGE; 312 props->max_srq_wr = dev->rdev.hw_queue.t4_max_qp_depth;
313 props->max_send_sge = min(T4_MAX_SEND_SGE, T4_MAX_WRITE_SGE);
314 props->max_recv_sge = T4_MAX_RECV_SGE;
315 props->max_srq_sge = T4_MAX_RECV_SGE;
347 props->max_sge_rd = 1; 316 props->max_sge_rd = 1;
348 props->max_res_rd_atom = dev->rdev.lldi.max_ird_adapter; 317 props->max_res_rd_atom = dev->rdev.lldi.max_ird_adapter;
349 props->max_qp_rd_atom = min(dev->rdev.lldi.max_ordird_qp, 318 props->max_qp_rd_atom = min(dev->rdev.lldi.max_ordird_qp,
@@ -592,7 +561,10 @@ void c4iw_register_device(struct work_struct *work)
592 (1ull << IB_USER_VERBS_CMD_POLL_CQ) | 561 (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
593 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 562 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
594 (1ull << IB_USER_VERBS_CMD_POST_SEND) | 563 (1ull << IB_USER_VERBS_CMD_POST_SEND) |
595 (1ull << IB_USER_VERBS_CMD_POST_RECV); 564 (1ull << IB_USER_VERBS_CMD_POST_RECV) |
565 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
566 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
567 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
596 dev->ibdev.node_type = RDMA_NODE_RNIC; 568 dev->ibdev.node_type = RDMA_NODE_RNIC;
597 BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); 569 BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
598 memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); 570 memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC));
@@ -608,15 +580,15 @@ void c4iw_register_device(struct work_struct *work)
608 dev->ibdev.mmap = c4iw_mmap; 580 dev->ibdev.mmap = c4iw_mmap;
609 dev->ibdev.alloc_pd = c4iw_allocate_pd; 581 dev->ibdev.alloc_pd = c4iw_allocate_pd;
610 dev->ibdev.dealloc_pd = c4iw_deallocate_pd; 582 dev->ibdev.dealloc_pd = c4iw_deallocate_pd;
611 dev->ibdev.create_ah = c4iw_ah_create;
612 dev->ibdev.destroy_ah = c4iw_ah_destroy;
613 dev->ibdev.create_qp = c4iw_create_qp; 583 dev->ibdev.create_qp = c4iw_create_qp;
614 dev->ibdev.modify_qp = c4iw_ib_modify_qp; 584 dev->ibdev.modify_qp = c4iw_ib_modify_qp;
615 dev->ibdev.query_qp = c4iw_ib_query_qp; 585 dev->ibdev.query_qp = c4iw_ib_query_qp;
616 dev->ibdev.destroy_qp = c4iw_destroy_qp; 586 dev->ibdev.destroy_qp = c4iw_destroy_qp;
587 dev->ibdev.create_srq = c4iw_create_srq;
588 dev->ibdev.modify_srq = c4iw_modify_srq;
589 dev->ibdev.destroy_srq = c4iw_destroy_srq;
617 dev->ibdev.create_cq = c4iw_create_cq; 590 dev->ibdev.create_cq = c4iw_create_cq;
618 dev->ibdev.destroy_cq = c4iw_destroy_cq; 591 dev->ibdev.destroy_cq = c4iw_destroy_cq;
619 dev->ibdev.resize_cq = c4iw_resize_cq;
620 dev->ibdev.poll_cq = c4iw_poll_cq; 592 dev->ibdev.poll_cq = c4iw_poll_cq;
621 dev->ibdev.get_dma_mr = c4iw_get_dma_mr; 593 dev->ibdev.get_dma_mr = c4iw_get_dma_mr;
622 dev->ibdev.reg_user_mr = c4iw_reg_user_mr; 594 dev->ibdev.reg_user_mr = c4iw_reg_user_mr;
@@ -625,12 +597,10 @@ void c4iw_register_device(struct work_struct *work)
625 dev->ibdev.dealloc_mw = c4iw_dealloc_mw; 597 dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
626 dev->ibdev.alloc_mr = c4iw_alloc_mr; 598 dev->ibdev.alloc_mr = c4iw_alloc_mr;
627 dev->ibdev.map_mr_sg = c4iw_map_mr_sg; 599 dev->ibdev.map_mr_sg = c4iw_map_mr_sg;
628 dev->ibdev.attach_mcast = c4iw_multicast_attach;
629 dev->ibdev.detach_mcast = c4iw_multicast_detach;
630 dev->ibdev.process_mad = c4iw_process_mad;
631 dev->ibdev.req_notify_cq = c4iw_arm_cq; 600 dev->ibdev.req_notify_cq = c4iw_arm_cq;
632 dev->ibdev.post_send = c4iw_post_send; 601 dev->ibdev.post_send = c4iw_post_send;
633 dev->ibdev.post_recv = c4iw_post_receive; 602 dev->ibdev.post_recv = c4iw_post_receive;
603 dev->ibdev.post_srq_recv = c4iw_post_srq_recv;
634 dev->ibdev.alloc_hw_stats = c4iw_alloc_stats; 604 dev->ibdev.alloc_hw_stats = c4iw_alloc_stats;
635 dev->ibdev.get_hw_stats = c4iw_get_mib; 605 dev->ibdev.get_hw_stats = c4iw_get_mib;
636 dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; 606 dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index aef53305f1c3..b3203afa3b1d 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -147,21 +147,24 @@ static int alloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq, int user)
147} 147}
148 148
149static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, 149static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
150 struct c4iw_dev_ucontext *uctx) 150 struct c4iw_dev_ucontext *uctx, int has_rq)
151{ 151{
152 /* 152 /*
153 * uP clears EQ contexts when the connection exits rdma mode, 153 * uP clears EQ contexts when the connection exits rdma mode,
154 * so no need to post a RESET WR for these EQs. 154 * so no need to post a RESET WR for these EQs.
155 */ 155 */
156 dma_free_coherent(&(rdev->lldi.pdev->dev),
157 wq->rq.memsize, wq->rq.queue,
158 dma_unmap_addr(&wq->rq, mapping));
159 dealloc_sq(rdev, &wq->sq); 156 dealloc_sq(rdev, &wq->sq);
160 c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
161 kfree(wq->rq.sw_rq);
162 kfree(wq->sq.sw_sq); 157 kfree(wq->sq.sw_sq);
163 c4iw_put_qpid(rdev, wq->rq.qid, uctx);
164 c4iw_put_qpid(rdev, wq->sq.qid, uctx); 158 c4iw_put_qpid(rdev, wq->sq.qid, uctx);
159
160 if (has_rq) {
161 dma_free_coherent(&rdev->lldi.pdev->dev,
162 wq->rq.memsize, wq->rq.queue,
163 dma_unmap_addr(&wq->rq, mapping));
164 c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
165 kfree(wq->rq.sw_rq);
166 c4iw_put_qpid(rdev, wq->rq.qid, uctx);
167 }
165 return 0; 168 return 0;
166} 169}
167 170
@@ -195,7 +198,8 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
195static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, 198static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
196 struct t4_cq *rcq, struct t4_cq *scq, 199 struct t4_cq *rcq, struct t4_cq *scq,
197 struct c4iw_dev_ucontext *uctx, 200 struct c4iw_dev_ucontext *uctx,
198 struct c4iw_wr_wait *wr_waitp) 201 struct c4iw_wr_wait *wr_waitp,
202 int need_rq)
199{ 203{
200 int user = (uctx != &rdev->uctx); 204 int user = (uctx != &rdev->uctx);
201 struct fw_ri_res_wr *res_wr; 205 struct fw_ri_res_wr *res_wr;
@@ -209,10 +213,12 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
209 if (!wq->sq.qid) 213 if (!wq->sq.qid)
210 return -ENOMEM; 214 return -ENOMEM;
211 215
212 wq->rq.qid = c4iw_get_qpid(rdev, uctx); 216 if (need_rq) {
213 if (!wq->rq.qid) { 217 wq->rq.qid = c4iw_get_qpid(rdev, uctx);
214 ret = -ENOMEM; 218 if (!wq->rq.qid) {
215 goto free_sq_qid; 219 ret = -ENOMEM;
220 goto free_sq_qid;
221 }
216 } 222 }
217 223
218 if (!user) { 224 if (!user) {
@@ -220,25 +226,31 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
220 GFP_KERNEL); 226 GFP_KERNEL);
221 if (!wq->sq.sw_sq) { 227 if (!wq->sq.sw_sq) {
222 ret = -ENOMEM; 228 ret = -ENOMEM;
223 goto free_rq_qid; 229 goto free_rq_qid;//FIXME
224 } 230 }
225 231
226 wq->rq.sw_rq = kcalloc(wq->rq.size, sizeof(*wq->rq.sw_rq), 232 if (need_rq) {
227 GFP_KERNEL); 233 wq->rq.sw_rq = kcalloc(wq->rq.size,
228 if (!wq->rq.sw_rq) { 234 sizeof(*wq->rq.sw_rq),
229 ret = -ENOMEM; 235 GFP_KERNEL);
230 goto free_sw_sq; 236 if (!wq->rq.sw_rq) {
237 ret = -ENOMEM;
238 goto free_sw_sq;
239 }
231 } 240 }
232 } 241 }
233 242
234 /* 243 if (need_rq) {
235 * RQT must be a power of 2 and at least 16 deep. 244 /*
236 */ 245 * RQT must be a power of 2 and at least 16 deep.
237 wq->rq.rqt_size = roundup_pow_of_two(max_t(u16, wq->rq.size, 16)); 246 */
238 wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); 247 wq->rq.rqt_size =
239 if (!wq->rq.rqt_hwaddr) { 248 roundup_pow_of_two(max_t(u16, wq->rq.size, 16));
240 ret = -ENOMEM; 249 wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
241 goto free_sw_rq; 250 if (!wq->rq.rqt_hwaddr) {
251 ret = -ENOMEM;
252 goto free_sw_rq;
253 }
242 } 254 }
243 255
244 ret = alloc_sq(rdev, &wq->sq, user); 256 ret = alloc_sq(rdev, &wq->sq, user);
@@ -247,34 +259,39 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
247 memset(wq->sq.queue, 0, wq->sq.memsize); 259 memset(wq->sq.queue, 0, wq->sq.memsize);
248 dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); 260 dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr);
249 261
250 wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), 262 if (need_rq) {
251 wq->rq.memsize, &(wq->rq.dma_addr), 263 wq->rq.queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
252 GFP_KERNEL); 264 wq->rq.memsize,
253 if (!wq->rq.queue) { 265 &wq->rq.dma_addr,
254 ret = -ENOMEM; 266 GFP_KERNEL);
255 goto free_sq; 267 if (!wq->rq.queue) {
268 ret = -ENOMEM;
269 goto free_sq;
270 }
271 pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
272 wq->sq.queue,
273 (unsigned long long)virt_to_phys(wq->sq.queue),
274 wq->rq.queue,
275 (unsigned long long)virt_to_phys(wq->rq.queue));
276 memset(wq->rq.queue, 0, wq->rq.memsize);
277 dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
256 } 278 }
257 pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
258 wq->sq.queue,
259 (unsigned long long)virt_to_phys(wq->sq.queue),
260 wq->rq.queue,
261 (unsigned long long)virt_to_phys(wq->rq.queue));
262 memset(wq->rq.queue, 0, wq->rq.memsize);
263 dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
264 279
265 wq->db = rdev->lldi.db_reg; 280 wq->db = rdev->lldi.db_reg;
266 281
267 wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS, 282 wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS,
268 &wq->sq.bar2_qid, 283 &wq->sq.bar2_qid,
269 user ? &wq->sq.bar2_pa : NULL); 284 user ? &wq->sq.bar2_pa : NULL);
270 wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, T4_BAR2_QTYPE_EGRESS, 285 if (need_rq)
271 &wq->rq.bar2_qid, 286 wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid,
272 user ? &wq->rq.bar2_pa : NULL); 287 T4_BAR2_QTYPE_EGRESS,
288 &wq->rq.bar2_qid,
289 user ? &wq->rq.bar2_pa : NULL);
273 290
274 /* 291 /*
275 * User mode must have bar2 access. 292 * User mode must have bar2 access.
276 */ 293 */
277 if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) { 294 if (user && (!wq->sq.bar2_pa || (need_rq && !wq->rq.bar2_pa))) {
278 pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n", 295 pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n",
279 pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid); 296 pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
280 goto free_dma; 297 goto free_dma;
@@ -285,7 +302,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
285 302
286 /* build fw_ri_res_wr */ 303 /* build fw_ri_res_wr */
287 wr_len = sizeof *res_wr + 2 * sizeof *res; 304 wr_len = sizeof *res_wr + 2 * sizeof *res;
288 305 if (need_rq)
306 wr_len += sizeof(*res);
289 skb = alloc_skb(wr_len, GFP_KERNEL); 307 skb = alloc_skb(wr_len, GFP_KERNEL);
290 if (!skb) { 308 if (!skb) {
291 ret = -ENOMEM; 309 ret = -ENOMEM;
@@ -296,7 +314,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
296 res_wr = __skb_put_zero(skb, wr_len); 314 res_wr = __skb_put_zero(skb, wr_len);
297 res_wr->op_nres = cpu_to_be32( 315 res_wr->op_nres = cpu_to_be32(
298 FW_WR_OP_V(FW_RI_RES_WR) | 316 FW_WR_OP_V(FW_RI_RES_WR) |
299 FW_RI_RES_WR_NRES_V(2) | 317 FW_RI_RES_WR_NRES_V(need_rq ? 2 : 1) |
300 FW_WR_COMPL_F); 318 FW_WR_COMPL_F);
301 res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); 319 res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
302 res_wr->cookie = (uintptr_t)wr_waitp; 320 res_wr->cookie = (uintptr_t)wr_waitp;
@@ -327,30 +345,36 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
327 FW_RI_RES_WR_EQSIZE_V(eqsize)); 345 FW_RI_RES_WR_EQSIZE_V(eqsize));
328 res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid); 346 res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid);
329 res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr); 347 res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr);
330 res++;
331 res->u.sqrq.restype = FW_RI_RES_TYPE_RQ;
332 res->u.sqrq.op = FW_RI_RES_OP_WRITE;
333 348
334 /* 349 if (need_rq) {
335 * eqsize is the number of 64B entries plus the status page size. 350 res++;
336 */ 351 res->u.sqrq.restype = FW_RI_RES_TYPE_RQ;
337 eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + 352 res->u.sqrq.op = FW_RI_RES_OP_WRITE;
338 rdev->hw_queue.t4_eq_status_entries; 353
339 res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( 354 /*
340 FW_RI_RES_WR_HOSTFCMODE_V(0) | /* no host cidx updates */ 355 * eqsize is the number of 64B entries plus the status page size
341 FW_RI_RES_WR_CPRIO_V(0) | /* don't keep in chip cache */ 356 */
342 FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */ 357 eqsize = wq->rq.size * T4_RQ_NUM_SLOTS +
343 FW_RI_RES_WR_IQID_V(rcq->cqid)); 358 rdev->hw_queue.t4_eq_status_entries;
344 res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( 359 res->u.sqrq.fetchszm_to_iqid =
345 FW_RI_RES_WR_DCAEN_V(0) | 360 /* no host cidx updates */
346 FW_RI_RES_WR_DCACPU_V(0) | 361 cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) |
347 FW_RI_RES_WR_FBMIN_V(2) | 362 /* don't keep in chip cache */
348 FW_RI_RES_WR_FBMAX_V(3) | 363 FW_RI_RES_WR_CPRIO_V(0) |
349 FW_RI_RES_WR_CIDXFTHRESHO_V(0) | 364 /* set by uP at ri_init time */
350 FW_RI_RES_WR_CIDXFTHRESH_V(0) | 365 FW_RI_RES_WR_PCIECHN_V(0) |
351 FW_RI_RES_WR_EQSIZE_V(eqsize)); 366 FW_RI_RES_WR_IQID_V(rcq->cqid));
352 res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); 367 res->u.sqrq.dcaen_to_eqsize =
353 res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); 368 cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) |
369 FW_RI_RES_WR_DCACPU_V(0) |
370 FW_RI_RES_WR_FBMIN_V(2) |
371 FW_RI_RES_WR_FBMAX_V(3) |
372 FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
373 FW_RI_RES_WR_CIDXFTHRESH_V(0) |
374 FW_RI_RES_WR_EQSIZE_V(eqsize));
375 res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid);
376 res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
377 }
354 378
355 c4iw_init_wr_wait(wr_waitp); 379 c4iw_init_wr_wait(wr_waitp);
356 ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->sq.qid, __func__); 380 ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->sq.qid, __func__);
@@ -363,26 +387,30 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
363 387
364 return 0; 388 return 0;
365free_dma: 389free_dma:
366 dma_free_coherent(&(rdev->lldi.pdev->dev), 390 if (need_rq)
367 wq->rq.memsize, wq->rq.queue, 391 dma_free_coherent(&rdev->lldi.pdev->dev,
368 dma_unmap_addr(&wq->rq, mapping)); 392 wq->rq.memsize, wq->rq.queue,
393 dma_unmap_addr(&wq->rq, mapping));
369free_sq: 394free_sq:
370 dealloc_sq(rdev, &wq->sq); 395 dealloc_sq(rdev, &wq->sq);
371free_hwaddr: 396free_hwaddr:
372 c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); 397 if (need_rq)
398 c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
373free_sw_rq: 399free_sw_rq:
374 kfree(wq->rq.sw_rq); 400 if (need_rq)
401 kfree(wq->rq.sw_rq);
375free_sw_sq: 402free_sw_sq:
376 kfree(wq->sq.sw_sq); 403 kfree(wq->sq.sw_sq);
377free_rq_qid: 404free_rq_qid:
378 c4iw_put_qpid(rdev, wq->rq.qid, uctx); 405 if (need_rq)
406 c4iw_put_qpid(rdev, wq->rq.qid, uctx);
379free_sq_qid: 407free_sq_qid:
380 c4iw_put_qpid(rdev, wq->sq.qid, uctx); 408 c4iw_put_qpid(rdev, wq->sq.qid, uctx);
381 return ret; 409 return ret;
382} 410}
383 411
384static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, 412static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
385 struct ib_send_wr *wr, int max, u32 *plenp) 413 const struct ib_send_wr *wr, int max, u32 *plenp)
386{ 414{
387 u8 *dstp, *srcp; 415 u8 *dstp, *srcp;
388 u32 plen = 0; 416 u32 plen = 0;
@@ -427,7 +455,12 @@ static int build_isgl(__be64 *queue_start, __be64 *queue_end,
427{ 455{
428 int i; 456 int i;
429 u32 plen = 0; 457 u32 plen = 0;
430 __be64 *flitp = (__be64 *)isglp->sge; 458 __be64 *flitp;
459
460 if ((__be64 *)isglp == queue_end)
461 isglp = (struct fw_ri_isgl *)queue_start;
462
463 flitp = (__be64 *)isglp->sge;
431 464
432 for (i = 0; i < num_sge; i++) { 465 for (i = 0; i < num_sge; i++) {
433 if ((plen + sg_list[i].length) < plen) 466 if ((plen + sg_list[i].length) < plen)
@@ -452,7 +485,7 @@ static int build_isgl(__be64 *queue_start, __be64 *queue_end,
452} 485}
453 486
454static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, 487static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
455 struct ib_send_wr *wr, u8 *len16) 488 const struct ib_send_wr *wr, u8 *len16)
456{ 489{
457 u32 plen; 490 u32 plen;
458 int size; 491 int size;
@@ -519,7 +552,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
519} 552}
520 553
521static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, 554static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
522 struct ib_send_wr *wr, u8 *len16) 555 const struct ib_send_wr *wr, u8 *len16)
523{ 556{
524 u32 plen; 557 u32 plen;
525 int size; 558 int size;
@@ -527,7 +560,15 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
527 560
528 if (wr->num_sge > T4_MAX_SEND_SGE) 561 if (wr->num_sge > T4_MAX_SEND_SGE)
529 return -EINVAL; 562 return -EINVAL;
530 wqe->write.r2 = 0; 563
564 /*
565 * iWARP protocol supports 64 bit immediate data but rdma api
566 * limits it to 32bit.
567 */
568 if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
569 wqe->write.iw_imm_data.ib_imm_data.imm_data32 = wr->ex.imm_data;
570 else
571 wqe->write.iw_imm_data.ib_imm_data.imm_data32 = 0;
531 wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); 572 wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
532 wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); 573 wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
533 if (wr->num_sge) { 574 if (wr->num_sge) {
@@ -561,7 +602,58 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
561 return 0; 602 return 0;
562} 603}
563 604
564static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) 605static void build_immd_cmpl(struct t4_sq *sq, struct fw_ri_immd_cmpl *immdp,
606 struct ib_send_wr *wr)
607{
608 memcpy((u8 *)immdp->data, (u8 *)(uintptr_t)wr->sg_list->addr, 16);
609 memset(immdp->r1, 0, 6);
610 immdp->op = FW_RI_DATA_IMMD;
611 immdp->immdlen = 16;
612}
613
614static void build_rdma_write_cmpl(struct t4_sq *sq,
615 struct fw_ri_rdma_write_cmpl_wr *wcwr,
616 const struct ib_send_wr *wr, u8 *len16)
617{
618 u32 plen;
619 int size;
620
621 /*
622 * This code assumes the struct fields preceding the write isgl
623 * fit in one 64B WR slot. This is because the WQE is built
624 * directly in the dma queue, and wrapping is only handled
625 * by the code buildling sgls. IE the "fixed part" of the wr
626 * structs must all fit in 64B. The WQE build code should probably be
627 * redesigned to avoid this restriction, but for now just add
628 * the BUILD_BUG_ON() to catch if this WQE struct gets too big.
629 */
630 BUILD_BUG_ON(offsetof(struct fw_ri_rdma_write_cmpl_wr, u) > 64);
631
632 wcwr->stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
633 wcwr->to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
634 wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey);
635 wcwr->r2 = 0;
636 wcwr->r3 = 0;
637
638 /* SEND_INV SGL */
639 if (wr->next->send_flags & IB_SEND_INLINE)
640 build_immd_cmpl(sq, &wcwr->u_cmpl.immd_src, wr->next);
641 else
642 build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size],
643 &wcwr->u_cmpl.isgl_src, wr->next->sg_list, 1, NULL);
644
645 /* WRITE SGL */
646 build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size],
647 wcwr->u.isgl_src, wr->sg_list, wr->num_sge, &plen);
648
649 size = sizeof(*wcwr) + sizeof(struct fw_ri_isgl) +
650 wr->num_sge * sizeof(struct fw_ri_sge);
651 wcwr->plen = cpu_to_be32(plen);
652 *len16 = DIV_ROUND_UP(size, 16);
653}
654
655static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr,
656 u8 *len16)
565{ 657{
566 if (wr->num_sge > 1) 658 if (wr->num_sge > 1)
567 return -EINVAL; 659 return -EINVAL;
@@ -590,8 +682,74 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
590 return 0; 682 return 0;
591} 683}
592 684
685static void post_write_cmpl(struct c4iw_qp *qhp, const struct ib_send_wr *wr)
686{
687 bool send_signaled = (wr->next->send_flags & IB_SEND_SIGNALED) ||
688 qhp->sq_sig_all;
689 bool write_signaled = (wr->send_flags & IB_SEND_SIGNALED) ||
690 qhp->sq_sig_all;
691 struct t4_swsqe *swsqe;
692 union t4_wr *wqe;
693 u16 write_wrid;
694 u8 len16;
695 u16 idx;
696
697 /*
698 * The sw_sq entries still look like a WRITE and a SEND and consume
699 * 2 slots. The FW WR, however, will be a single uber-WR.
700 */
701 wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue +
702 qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE);
703 build_rdma_write_cmpl(&qhp->wq.sq, &wqe->write_cmpl, wr, &len16);
704
705 /* WRITE swsqe */
706 swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
707 swsqe->opcode = FW_RI_RDMA_WRITE;
708 swsqe->idx = qhp->wq.sq.pidx;
709 swsqe->complete = 0;
710 swsqe->signaled = write_signaled;
711 swsqe->flushed = 0;
712 swsqe->wr_id = wr->wr_id;
713 if (c4iw_wr_log) {
714 swsqe->sge_ts =
715 cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]);
716 swsqe->host_time = ktime_get();
717 }
718
719 write_wrid = qhp->wq.sq.pidx;
720
721 /* just bump the sw_sq */
722 qhp->wq.sq.in_use++;
723 if (++qhp->wq.sq.pidx == qhp->wq.sq.size)
724 qhp->wq.sq.pidx = 0;
725
726 /* SEND_WITH_INV swsqe */
727 swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
728 swsqe->opcode = FW_RI_SEND_WITH_INV;
729 swsqe->idx = qhp->wq.sq.pidx;
730 swsqe->complete = 0;
731 swsqe->signaled = send_signaled;
732 swsqe->flushed = 0;
733 swsqe->wr_id = wr->next->wr_id;
734 if (c4iw_wr_log) {
735 swsqe->sge_ts =
736 cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]);
737 swsqe->host_time = ktime_get();
738 }
739
740 wqe->write_cmpl.flags_send = send_signaled ? FW_RI_COMPLETION_FLAG : 0;
741 wqe->write_cmpl.wrid_send = qhp->wq.sq.pidx;
742
743 init_wr_hdr(wqe, write_wrid, FW_RI_RDMA_WRITE_CMPL_WR,
744 write_signaled ? FW_RI_COMPLETION_FLAG : 0, len16);
745 t4_sq_produce(&qhp->wq, len16);
746 idx = DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
747
748 t4_ring_sq_db(&qhp->wq, idx, wqe);
749}
750
593static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, 751static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
594 struct ib_recv_wr *wr, u8 *len16) 752 const struct ib_recv_wr *wr, u8 *len16)
595{ 753{
596 int ret; 754 int ret;
597 755
@@ -605,8 +763,22 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
605 return 0; 763 return 0;
606} 764}
607 765
766static int build_srq_recv(union t4_recv_wr *wqe, const struct ib_recv_wr *wr,
767 u8 *len16)
768{
769 int ret;
770
771 ret = build_isgl((__be64 *)wqe, (__be64 *)(wqe + 1),
772 &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
773 if (ret)
774 return ret;
775 *len16 = DIV_ROUND_UP(sizeof(wqe->recv) +
776 wr->num_sge * sizeof(struct fw_ri_sge), 16);
777 return 0;
778}
779
608static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr, 780static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr,
609 struct ib_reg_wr *wr, struct c4iw_mr *mhp, 781 const struct ib_reg_wr *wr, struct c4iw_mr *mhp,
610 u8 *len16) 782 u8 *len16)
611{ 783{
612 __be64 *p = (__be64 *)fr->pbl; 784 __be64 *p = (__be64 *)fr->pbl;
@@ -638,8 +810,8 @@ static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr,
638} 810}
639 811
640static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, 812static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
641 struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16, 813 const struct ib_reg_wr *wr, struct c4iw_mr *mhp,
642 bool dsgl_supported) 814 u8 *len16, bool dsgl_supported)
643{ 815{
644 struct fw_ri_immd *imdp; 816 struct fw_ri_immd *imdp;
645 __be64 *p; 817 __be64 *p;
@@ -701,7 +873,8 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
701 return 0; 873 return 0;
702} 874}
703 875
704static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) 876static int build_inv_stag(union t4_wr *wqe, const struct ib_send_wr *wr,
877 u8 *len16)
705{ 878{
706 wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); 879 wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
707 wqe->inv.r2 = 0; 880 wqe->inv.r2 = 0;
@@ -721,7 +894,7 @@ static void free_qp_work(struct work_struct *work)
721 894
722 pr_debug("qhp %p ucontext %p\n", qhp, ucontext); 895 pr_debug("qhp %p ucontext %p\n", qhp, ucontext);
723 destroy_qp(&rhp->rdev, &qhp->wq, 896 destroy_qp(&rhp->rdev, &qhp->wq,
724 ucontext ? &ucontext->uctx : &rhp->rdev.uctx); 897 ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq);
725 898
726 if (ucontext) 899 if (ucontext)
727 c4iw_put_ucontext(ucontext); 900 c4iw_put_ucontext(ucontext);
@@ -804,6 +977,9 @@ static int ib_to_fw_opcode(int ib_opcode)
804 case IB_WR_RDMA_WRITE: 977 case IB_WR_RDMA_WRITE:
805 opcode = FW_RI_RDMA_WRITE; 978 opcode = FW_RI_RDMA_WRITE;
806 break; 979 break;
980 case IB_WR_RDMA_WRITE_WITH_IMM:
981 opcode = FW_RI_WRITE_IMMEDIATE;
982 break;
807 case IB_WR_RDMA_READ: 983 case IB_WR_RDMA_READ:
808 case IB_WR_RDMA_READ_WITH_INV: 984 case IB_WR_RDMA_READ_WITH_INV:
809 opcode = FW_RI_READ_REQ; 985 opcode = FW_RI_READ_REQ;
@@ -820,7 +996,8 @@ static int ib_to_fw_opcode(int ib_opcode)
820 return opcode; 996 return opcode;
821} 997}
822 998
823static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) 999static int complete_sq_drain_wr(struct c4iw_qp *qhp,
1000 const struct ib_send_wr *wr)
824{ 1001{
825 struct t4_cqe cqe = {}; 1002 struct t4_cqe cqe = {};
826 struct c4iw_cq *schp; 1003 struct c4iw_cq *schp;
@@ -858,8 +1035,9 @@ static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
858 return 0; 1035 return 0;
859} 1036}
860 1037
861static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr, 1038static int complete_sq_drain_wrs(struct c4iw_qp *qhp,
862 struct ib_send_wr **bad_wr) 1039 const struct ib_send_wr *wr,
1040 const struct ib_send_wr **bad_wr)
863{ 1041{
864 int ret = 0; 1042 int ret = 0;
865 1043
@@ -874,7 +1052,8 @@ static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr,
874 return ret; 1052 return ret;
875} 1053}
876 1054
877static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) 1055static void complete_rq_drain_wr(struct c4iw_qp *qhp,
1056 const struct ib_recv_wr *wr)
878{ 1057{
879 struct t4_cqe cqe = {}; 1058 struct t4_cqe cqe = {};
880 struct c4iw_cq *rchp; 1059 struct c4iw_cq *rchp;
@@ -906,7 +1085,8 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
906 } 1085 }
907} 1086}
908 1087
909static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr) 1088static void complete_rq_drain_wrs(struct c4iw_qp *qhp,
1089 const struct ib_recv_wr *wr)
910{ 1090{
911 while (wr) { 1091 while (wr) {
912 complete_rq_drain_wr(qhp, wr); 1092 complete_rq_drain_wr(qhp, wr);
@@ -914,14 +1094,15 @@ static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
914 } 1094 }
915} 1095}
916 1096
917int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 1097int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
918 struct ib_send_wr **bad_wr) 1098 const struct ib_send_wr **bad_wr)
919{ 1099{
920 int err = 0; 1100 int err = 0;
921 u8 len16 = 0; 1101 u8 len16 = 0;
922 enum fw_wr_opcodes fw_opcode = 0; 1102 enum fw_wr_opcodes fw_opcode = 0;
923 enum fw_ri_wr_flags fw_flags; 1103 enum fw_ri_wr_flags fw_flags;
924 struct c4iw_qp *qhp; 1104 struct c4iw_qp *qhp;
1105 struct c4iw_dev *rhp;
925 union t4_wr *wqe = NULL; 1106 union t4_wr *wqe = NULL;
926 u32 num_wrs; 1107 u32 num_wrs;
927 struct t4_swsqe *swsqe; 1108 struct t4_swsqe *swsqe;
@@ -929,6 +1110,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
929 u16 idx = 0; 1110 u16 idx = 0;
930 1111
931 qhp = to_c4iw_qp(ibqp); 1112 qhp = to_c4iw_qp(ibqp);
1113 rhp = qhp->rhp;
932 spin_lock_irqsave(&qhp->lock, flag); 1114 spin_lock_irqsave(&qhp->lock, flag);
933 1115
934 /* 1116 /*
@@ -946,6 +1128,30 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
946 *bad_wr = wr; 1128 *bad_wr = wr;
947 return -ENOMEM; 1129 return -ENOMEM;
948 } 1130 }
1131
1132 /*
1133 * Fastpath for NVMe-oF target WRITE + SEND_WITH_INV wr chain which is
1134 * the response for small NVMEe-oF READ requests. If the chain is
1135 * exactly a WRITE->SEND_WITH_INV and the sgl depths and lengths
1136 * meet the requirements of the fw_ri_write_cmpl_wr work request,
1137 * then build and post the write_cmpl WR. If any of the tests
1138 * below are not true, then we continue on with the tradtional WRITE
1139 * and SEND WRs.
1140 */
1141 if (qhp->rhp->rdev.lldi.write_cmpl_support &&
1142 CHELSIO_CHIP_VERSION(qhp->rhp->rdev.lldi.adapter_type) >=
1143 CHELSIO_T5 &&
1144 wr && wr->next && !wr->next->next &&
1145 wr->opcode == IB_WR_RDMA_WRITE &&
1146 wr->sg_list[0].length && wr->num_sge <= T4_WRITE_CMPL_MAX_SGL &&
1147 wr->next->opcode == IB_WR_SEND_WITH_INV &&
1148 wr->next->sg_list[0].length == T4_WRITE_CMPL_MAX_CQE &&
1149 wr->next->num_sge == 1 && num_wrs >= 2) {
1150 post_write_cmpl(qhp, wr);
1151 spin_unlock_irqrestore(&qhp->lock, flag);
1152 return 0;
1153 }
1154
949 while (wr) { 1155 while (wr) {
950 if (num_wrs == 0) { 1156 if (num_wrs == 0) {
951 err = -ENOMEM; 1157 err = -ENOMEM;
@@ -973,6 +1179,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
973 swsqe->opcode = FW_RI_SEND_WITH_INV; 1179 swsqe->opcode = FW_RI_SEND_WITH_INV;
974 err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16); 1180 err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16);
975 break; 1181 break;
1182 case IB_WR_RDMA_WRITE_WITH_IMM:
1183 if (unlikely(!rhp->rdev.lldi.write_w_imm_support)) {
1184 err = -EINVAL;
1185 break;
1186 }
1187 fw_flags |= FW_RI_RDMA_WRITE_WITH_IMMEDIATE;
1188 /*FALLTHROUGH*/
976 case IB_WR_RDMA_WRITE: 1189 case IB_WR_RDMA_WRITE:
977 fw_opcode = FW_RI_RDMA_WRITE_WR; 1190 fw_opcode = FW_RI_RDMA_WRITE_WR;
978 swsqe->opcode = FW_RI_RDMA_WRITE; 1191 swsqe->opcode = FW_RI_RDMA_WRITE;
@@ -983,8 +1196,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
983 fw_opcode = FW_RI_RDMA_READ_WR; 1196 fw_opcode = FW_RI_RDMA_READ_WR;
984 swsqe->opcode = FW_RI_READ_REQ; 1197 swsqe->opcode = FW_RI_READ_REQ;
985 if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) { 1198 if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) {
986 c4iw_invalidate_mr(qhp->rhp, 1199 c4iw_invalidate_mr(rhp, wr->sg_list[0].lkey);
987 wr->sg_list[0].lkey);
988 fw_flags = FW_RI_RDMA_READ_INVALIDATE; 1200 fw_flags = FW_RI_RDMA_READ_INVALIDATE;
989 } else { 1201 } else {
990 fw_flags = 0; 1202 fw_flags = 0;
@@ -1000,7 +1212,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1000 struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr); 1212 struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr);
1001 1213
1002 swsqe->opcode = FW_RI_FAST_REGISTER; 1214 swsqe->opcode = FW_RI_FAST_REGISTER;
1003 if (qhp->rhp->rdev.lldi.fr_nsmr_tpte_wr_support && 1215 if (rhp->rdev.lldi.fr_nsmr_tpte_wr_support &&
1004 !mhp->attr.state && mhp->mpl_len <= 2) { 1216 !mhp->attr.state && mhp->mpl_len <= 2) {
1005 fw_opcode = FW_RI_FR_NSMR_TPTE_WR; 1217 fw_opcode = FW_RI_FR_NSMR_TPTE_WR;
1006 build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr), 1218 build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr),
@@ -1009,7 +1221,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1009 fw_opcode = FW_RI_FR_NSMR_WR; 1221 fw_opcode = FW_RI_FR_NSMR_WR;
1010 err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), 1222 err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr),
1011 mhp, &len16, 1223 mhp, &len16,
1012 qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl); 1224 rhp->rdev.lldi.ulptx_memwrite_dsgl);
1013 if (err) 1225 if (err)
1014 break; 1226 break;
1015 } 1227 }
@@ -1022,7 +1234,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1022 fw_opcode = FW_RI_INV_LSTAG_WR; 1234 fw_opcode = FW_RI_INV_LSTAG_WR;
1023 swsqe->opcode = FW_RI_LOCAL_INV; 1235 swsqe->opcode = FW_RI_LOCAL_INV;
1024 err = build_inv_stag(wqe, wr, &len16); 1236 err = build_inv_stag(wqe, wr, &len16);
1025 c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey); 1237 c4iw_invalidate_mr(rhp, wr->ex.invalidate_rkey);
1026 break; 1238 break;
1027 default: 1239 default:
1028 pr_warn("%s post of type=%d TBD!\n", __func__, 1240 pr_warn("%s post of type=%d TBD!\n", __func__,
@@ -1041,7 +1253,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1041 swsqe->wr_id = wr->wr_id; 1253 swsqe->wr_id = wr->wr_id;
1042 if (c4iw_wr_log) { 1254 if (c4iw_wr_log) {
1043 swsqe->sge_ts = cxgb4_read_sge_timestamp( 1255 swsqe->sge_ts = cxgb4_read_sge_timestamp(
1044 qhp->rhp->rdev.lldi.ports[0]); 1256 rhp->rdev.lldi.ports[0]);
1045 swsqe->host_time = ktime_get(); 1257 swsqe->host_time = ktime_get();
1046 } 1258 }
1047 1259
@@ -1055,7 +1267,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1055 t4_sq_produce(&qhp->wq, len16); 1267 t4_sq_produce(&qhp->wq, len16);
1056 idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); 1268 idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
1057 } 1269 }
1058 if (!qhp->rhp->rdev.status_page->db_off) { 1270 if (!rhp->rdev.status_page->db_off) {
1059 t4_ring_sq_db(&qhp->wq, idx, wqe); 1271 t4_ring_sq_db(&qhp->wq, idx, wqe);
1060 spin_unlock_irqrestore(&qhp->lock, flag); 1272 spin_unlock_irqrestore(&qhp->lock, flag);
1061 } else { 1273 } else {
@@ -1065,8 +1277,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1065 return err; 1277 return err;
1066} 1278}
1067 1279
1068int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 1280int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
1069 struct ib_recv_wr **bad_wr) 1281 const struct ib_recv_wr **bad_wr)
1070{ 1282{
1071 int err = 0; 1283 int err = 0;
1072 struct c4iw_qp *qhp; 1284 struct c4iw_qp *qhp;
@@ -1145,6 +1357,89 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1145 return err; 1357 return err;
1146} 1358}
1147 1359
1360static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe,
1361 u64 wr_id, u8 len16)
1362{
1363 struct t4_srq_pending_wr *pwr = &srq->pending_wrs[srq->pending_pidx];
1364
1365 pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u ooo_count %u wr_id 0x%llx pending_cidx %u pending_pidx %u pending_in_use %u\n",
1366 __func__, srq->cidx, srq->pidx, srq->wq_pidx,
1367 srq->in_use, srq->ooo_count,
1368 (unsigned long long)wr_id, srq->pending_cidx,
1369 srq->pending_pidx, srq->pending_in_use);
1370 pwr->wr_id = wr_id;
1371 pwr->len16 = len16;
1372 memcpy(&pwr->wqe, wqe, len16 * 16);
1373 t4_srq_produce_pending_wr(srq);
1374}
1375
1376int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
1377 const struct ib_recv_wr **bad_wr)
1378{
1379 union t4_recv_wr *wqe, lwqe;
1380 struct c4iw_srq *srq;
1381 unsigned long flag;
1382 u8 len16 = 0;
1383 u16 idx = 0;
1384 int err = 0;
1385 u32 num_wrs;
1386
1387 srq = to_c4iw_srq(ibsrq);
1388 spin_lock_irqsave(&srq->lock, flag);
1389 num_wrs = t4_srq_avail(&srq->wq);
1390 if (num_wrs == 0) {
1391 spin_unlock_irqrestore(&srq->lock, flag);
1392 return -ENOMEM;
1393 }
1394 while (wr) {
1395 if (wr->num_sge > T4_MAX_RECV_SGE) {
1396 err = -EINVAL;
1397 *bad_wr = wr;
1398 break;
1399 }
1400 wqe = &lwqe;
1401 if (num_wrs)
1402 err = build_srq_recv(wqe, wr, &len16);
1403 else
1404 err = -ENOMEM;
1405 if (err) {
1406 *bad_wr = wr;
1407 break;
1408 }
1409
1410 wqe->recv.opcode = FW_RI_RECV_WR;
1411 wqe->recv.r1 = 0;
1412 wqe->recv.wrid = srq->wq.pidx;
1413 wqe->recv.r2[0] = 0;
1414 wqe->recv.r2[1] = 0;
1415 wqe->recv.r2[2] = 0;
1416 wqe->recv.len16 = len16;
1417
1418 if (srq->wq.ooo_count ||
1419 srq->wq.pending_in_use ||
1420 srq->wq.sw_rq[srq->wq.pidx].valid) {
1421 defer_srq_wr(&srq->wq, wqe, wr->wr_id, len16);
1422 } else {
1423 srq->wq.sw_rq[srq->wq.pidx].wr_id = wr->wr_id;
1424 srq->wq.sw_rq[srq->wq.pidx].valid = 1;
1425 c4iw_copy_wr_to_srq(&srq->wq, wqe, len16);
1426 pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u wr_id 0x%llx\n",
1427 __func__, srq->wq.cidx,
1428 srq->wq.pidx, srq->wq.wq_pidx,
1429 srq->wq.in_use,
1430 (unsigned long long)wr->wr_id);
1431 t4_srq_produce(&srq->wq, len16);
1432 idx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
1433 }
1434 wr = wr->next;
1435 num_wrs--;
1436 }
1437 if (idx)
1438 t4_ring_srq_db(&srq->wq, idx, len16, wqe);
1439 spin_unlock_irqrestore(&srq->lock, flag);
1440 return err;
1441}
1442
1148static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type, 1443static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
1149 u8 *ecode) 1444 u8 *ecode)
1150{ 1445{
@@ -1321,7 +1616,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
1321 struct c4iw_cq *schp) 1616 struct c4iw_cq *schp)
1322{ 1617{
1323 int count; 1618 int count;
1324 int rq_flushed, sq_flushed; 1619 int rq_flushed = 0, sq_flushed;
1325 unsigned long flag; 1620 unsigned long flag;
1326 1621
1327 pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp); 1622 pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp);
@@ -1340,11 +1635,13 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
1340 return; 1635 return;
1341 } 1636 }
1342 qhp->wq.flushed = 1; 1637 qhp->wq.flushed = 1;
1343 t4_set_wq_in_error(&qhp->wq); 1638 t4_set_wq_in_error(&qhp->wq, 0);
1344 1639
1345 c4iw_flush_hw_cq(rchp, qhp); 1640 c4iw_flush_hw_cq(rchp, qhp);
1346 c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); 1641 if (!qhp->srq) {
1347 rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); 1642 c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
1643 rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
1644 }
1348 1645
1349 if (schp != rchp) 1646 if (schp != rchp)
1350 c4iw_flush_hw_cq(schp, qhp); 1647 c4iw_flush_hw_cq(schp, qhp);
@@ -1388,7 +1685,7 @@ static void flush_qp(struct c4iw_qp *qhp)
1388 schp = to_c4iw_cq(qhp->ibqp.send_cq); 1685 schp = to_c4iw_cq(qhp->ibqp.send_cq);
1389 1686
1390 if (qhp->ibqp.uobject) { 1687 if (qhp->ibqp.uobject) {
1391 t4_set_wq_in_error(&qhp->wq); 1688 t4_set_wq_in_error(&qhp->wq, 0);
1392 t4_set_cq_in_error(&rchp->cq); 1689 t4_set_cq_in_error(&rchp->cq);
1393 spin_lock_irqsave(&rchp->comp_handler_lock, flag); 1690 spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1394 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); 1691 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
@@ -1517,16 +1814,21 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
1517 wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd); 1814 wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd);
1518 wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid); 1815 wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid);
1519 wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid); 1816 wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid);
1520 wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid); 1817 if (qhp->srq) {
1818 wqe->u.init.rq_eqid = cpu_to_be32(FW_RI_INIT_RQEQID_SRQ |
1819 qhp->srq->idx);
1820 } else {
1821 wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid);
1822 wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
1823 wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
1824 rhp->rdev.lldi.vr->rq.start);
1825 }
1521 wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq); 1826 wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq);
1522 wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq); 1827 wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq);
1523 wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord); 1828 wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord);
1524 wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird); 1829 wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird);
1525 wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq); 1830 wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq);
1526 wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq); 1831 wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq);
1527 wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
1528 wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
1529 rhp->rdev.lldi.vr->rq.start);
1530 if (qhp->attr.mpa_attr.initiator) 1832 if (qhp->attr.mpa_attr.initiator)
1531 build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init); 1833 build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init);
1532 1834
@@ -1643,7 +1945,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1643 case C4IW_QP_STATE_RTS: 1945 case C4IW_QP_STATE_RTS:
1644 switch (attrs->next_state) { 1946 switch (attrs->next_state) {
1645 case C4IW_QP_STATE_CLOSING: 1947 case C4IW_QP_STATE_CLOSING:
1646 t4_set_wq_in_error(&qhp->wq); 1948 t4_set_wq_in_error(&qhp->wq, 0);
1647 set_state(qhp, C4IW_QP_STATE_CLOSING); 1949 set_state(qhp, C4IW_QP_STATE_CLOSING);
1648 ep = qhp->ep; 1950 ep = qhp->ep;
1649 if (!internal) { 1951 if (!internal) {
@@ -1656,7 +1958,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1656 goto err; 1958 goto err;
1657 break; 1959 break;
1658 case C4IW_QP_STATE_TERMINATE: 1960 case C4IW_QP_STATE_TERMINATE:
1659 t4_set_wq_in_error(&qhp->wq); 1961 t4_set_wq_in_error(&qhp->wq, 0);
1660 set_state(qhp, C4IW_QP_STATE_TERMINATE); 1962 set_state(qhp, C4IW_QP_STATE_TERMINATE);
1661 qhp->attr.layer_etype = attrs->layer_etype; 1963 qhp->attr.layer_etype = attrs->layer_etype;
1662 qhp->attr.ecode = attrs->ecode; 1964 qhp->attr.ecode = attrs->ecode;
@@ -1673,7 +1975,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1673 } 1975 }
1674 break; 1976 break;
1675 case C4IW_QP_STATE_ERROR: 1977 case C4IW_QP_STATE_ERROR:
1676 t4_set_wq_in_error(&qhp->wq); 1978 t4_set_wq_in_error(&qhp->wq, 0);
1677 set_state(qhp, C4IW_QP_STATE_ERROR); 1979 set_state(qhp, C4IW_QP_STATE_ERROR);
1678 if (!internal) { 1980 if (!internal) {
1679 abort = 1; 1981 abort = 1;
@@ -1819,7 +2121,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1819 struct c4iw_cq *schp; 2121 struct c4iw_cq *schp;
1820 struct c4iw_cq *rchp; 2122 struct c4iw_cq *rchp;
1821 struct c4iw_create_qp_resp uresp; 2123 struct c4iw_create_qp_resp uresp;
1822 unsigned int sqsize, rqsize; 2124 unsigned int sqsize, rqsize = 0;
1823 struct c4iw_ucontext *ucontext; 2125 struct c4iw_ucontext *ucontext;
1824 int ret; 2126 int ret;
1825 struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm; 2127 struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
@@ -1840,11 +2142,13 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1840 if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE) 2142 if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE)
1841 return ERR_PTR(-EINVAL); 2143 return ERR_PTR(-EINVAL);
1842 2144
1843 if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size) 2145 if (!attrs->srq) {
1844 return ERR_PTR(-E2BIG); 2146 if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size)
1845 rqsize = attrs->cap.max_recv_wr + 1; 2147 return ERR_PTR(-E2BIG);
1846 if (rqsize < 8) 2148 rqsize = attrs->cap.max_recv_wr + 1;
1847 rqsize = 8; 2149 if (rqsize < 8)
2150 rqsize = 8;
2151 }
1848 2152
1849 if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size) 2153 if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size)
1850 return ERR_PTR(-E2BIG); 2154 return ERR_PTR(-E2BIG);
@@ -1869,19 +2173,23 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1869 (sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * 2173 (sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
1870 sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64); 2174 sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64);
1871 qhp->wq.sq.flush_cidx = -1; 2175 qhp->wq.sq.flush_cidx = -1;
1872 qhp->wq.rq.size = rqsize; 2176 if (!attrs->srq) {
1873 qhp->wq.rq.memsize = 2177 qhp->wq.rq.size = rqsize;
1874 (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * 2178 qhp->wq.rq.memsize =
1875 sizeof(*qhp->wq.rq.queue); 2179 (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
2180 sizeof(*qhp->wq.rq.queue);
2181 }
1876 2182
1877 if (ucontext) { 2183 if (ucontext) {
1878 qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE); 2184 qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE);
1879 qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE); 2185 if (!attrs->srq)
2186 qhp->wq.rq.memsize =
2187 roundup(qhp->wq.rq.memsize, PAGE_SIZE);
1880 } 2188 }
1881 2189
1882 ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq, 2190 ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq,
1883 ucontext ? &ucontext->uctx : &rhp->rdev.uctx, 2191 ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
1884 qhp->wr_waitp); 2192 qhp->wr_waitp, !attrs->srq);
1885 if (ret) 2193 if (ret)
1886 goto err_free_wr_wait; 2194 goto err_free_wr_wait;
1887 2195
@@ -1894,10 +2202,12 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1894 qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid; 2202 qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid;
1895 qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid; 2203 qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid;
1896 qhp->attr.sq_num_entries = attrs->cap.max_send_wr; 2204 qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
1897 qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
1898 qhp->attr.sq_max_sges = attrs->cap.max_send_sge; 2205 qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
1899 qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge; 2206 qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
1900 qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; 2207 if (!attrs->srq) {
2208 qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
2209 qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
2210 }
1901 qhp->attr.state = C4IW_QP_STATE_IDLE; 2211 qhp->attr.state = C4IW_QP_STATE_IDLE;
1902 qhp->attr.next_state = C4IW_QP_STATE_IDLE; 2212 qhp->attr.next_state = C4IW_QP_STATE_IDLE;
1903 qhp->attr.enable_rdma_read = 1; 2213 qhp->attr.enable_rdma_read = 1;
@@ -1922,21 +2232,27 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1922 ret = -ENOMEM; 2232 ret = -ENOMEM;
1923 goto err_remove_handle; 2233 goto err_remove_handle;
1924 } 2234 }
1925 rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL); 2235 if (!attrs->srq) {
1926 if (!rq_key_mm) { 2236 rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL);
1927 ret = -ENOMEM; 2237 if (!rq_key_mm) {
1928 goto err_free_sq_key; 2238 ret = -ENOMEM;
2239 goto err_free_sq_key;
2240 }
1929 } 2241 }
1930 sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL); 2242 sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL);
1931 if (!sq_db_key_mm) { 2243 if (!sq_db_key_mm) {
1932 ret = -ENOMEM; 2244 ret = -ENOMEM;
1933 goto err_free_rq_key; 2245 goto err_free_rq_key;
1934 } 2246 }
1935 rq_db_key_mm = kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL); 2247 if (!attrs->srq) {
1936 if (!rq_db_key_mm) { 2248 rq_db_key_mm =
1937 ret = -ENOMEM; 2249 kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL);
1938 goto err_free_sq_db_key; 2250 if (!rq_db_key_mm) {
2251 ret = -ENOMEM;
2252 goto err_free_sq_db_key;
2253 }
1939 } 2254 }
2255 memset(&uresp, 0, sizeof(uresp));
1940 if (t4_sq_onchip(&qhp->wq.sq)) { 2256 if (t4_sq_onchip(&qhp->wq.sq)) {
1941 ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm), 2257 ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm),
1942 GFP_KERNEL); 2258 GFP_KERNEL);
@@ -1945,30 +2261,35 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1945 goto err_free_rq_db_key; 2261 goto err_free_rq_db_key;
1946 } 2262 }
1947 uresp.flags = C4IW_QPF_ONCHIP; 2263 uresp.flags = C4IW_QPF_ONCHIP;
1948 } else 2264 }
1949 uresp.flags = 0; 2265 if (rhp->rdev.lldi.write_w_imm_support)
2266 uresp.flags |= C4IW_QPF_WRITE_W_IMM;
1950 uresp.qid_mask = rhp->rdev.qpmask; 2267 uresp.qid_mask = rhp->rdev.qpmask;
1951 uresp.sqid = qhp->wq.sq.qid; 2268 uresp.sqid = qhp->wq.sq.qid;
1952 uresp.sq_size = qhp->wq.sq.size; 2269 uresp.sq_size = qhp->wq.sq.size;
1953 uresp.sq_memsize = qhp->wq.sq.memsize; 2270 uresp.sq_memsize = qhp->wq.sq.memsize;
1954 uresp.rqid = qhp->wq.rq.qid; 2271 if (!attrs->srq) {
1955 uresp.rq_size = qhp->wq.rq.size; 2272 uresp.rqid = qhp->wq.rq.qid;
1956 uresp.rq_memsize = qhp->wq.rq.memsize; 2273 uresp.rq_size = qhp->wq.rq.size;
2274 uresp.rq_memsize = qhp->wq.rq.memsize;
2275 }
1957 spin_lock(&ucontext->mmap_lock); 2276 spin_lock(&ucontext->mmap_lock);
1958 if (ma_sync_key_mm) { 2277 if (ma_sync_key_mm) {
1959 uresp.ma_sync_key = ucontext->key; 2278 uresp.ma_sync_key = ucontext->key;
1960 ucontext->key += PAGE_SIZE; 2279 ucontext->key += PAGE_SIZE;
1961 } else {
1962 uresp.ma_sync_key = 0;
1963 } 2280 }
1964 uresp.sq_key = ucontext->key; 2281 uresp.sq_key = ucontext->key;
1965 ucontext->key += PAGE_SIZE; 2282 ucontext->key += PAGE_SIZE;
1966 uresp.rq_key = ucontext->key; 2283 if (!attrs->srq) {
1967 ucontext->key += PAGE_SIZE; 2284 uresp.rq_key = ucontext->key;
2285 ucontext->key += PAGE_SIZE;
2286 }
1968 uresp.sq_db_gts_key = ucontext->key; 2287 uresp.sq_db_gts_key = ucontext->key;
1969 ucontext->key += PAGE_SIZE; 2288 ucontext->key += PAGE_SIZE;
1970 uresp.rq_db_gts_key = ucontext->key; 2289 if (!attrs->srq) {
1971 ucontext->key += PAGE_SIZE; 2290 uresp.rq_db_gts_key = ucontext->key;
2291 ucontext->key += PAGE_SIZE;
2292 }
1972 spin_unlock(&ucontext->mmap_lock); 2293 spin_unlock(&ucontext->mmap_lock);
1973 ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); 2294 ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
1974 if (ret) 2295 if (ret)
@@ -1977,18 +2298,23 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1977 sq_key_mm->addr = qhp->wq.sq.phys_addr; 2298 sq_key_mm->addr = qhp->wq.sq.phys_addr;
1978 sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize); 2299 sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
1979 insert_mmap(ucontext, sq_key_mm); 2300 insert_mmap(ucontext, sq_key_mm);
1980 rq_key_mm->key = uresp.rq_key; 2301 if (!attrs->srq) {
1981 rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue); 2302 rq_key_mm->key = uresp.rq_key;
1982 rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize); 2303 rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
1983 insert_mmap(ucontext, rq_key_mm); 2304 rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
2305 insert_mmap(ucontext, rq_key_mm);
2306 }
1984 sq_db_key_mm->key = uresp.sq_db_gts_key; 2307 sq_db_key_mm->key = uresp.sq_db_gts_key;
1985 sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa; 2308 sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
1986 sq_db_key_mm->len = PAGE_SIZE; 2309 sq_db_key_mm->len = PAGE_SIZE;
1987 insert_mmap(ucontext, sq_db_key_mm); 2310 insert_mmap(ucontext, sq_db_key_mm);
1988 rq_db_key_mm->key = uresp.rq_db_gts_key; 2311 if (!attrs->srq) {
1989 rq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.rq.bar2_pa; 2312 rq_db_key_mm->key = uresp.rq_db_gts_key;
1990 rq_db_key_mm->len = PAGE_SIZE; 2313 rq_db_key_mm->addr =
1991 insert_mmap(ucontext, rq_db_key_mm); 2314 (u64)(unsigned long)qhp->wq.rq.bar2_pa;
2315 rq_db_key_mm->len = PAGE_SIZE;
2316 insert_mmap(ucontext, rq_db_key_mm);
2317 }
1992 if (ma_sync_key_mm) { 2318 if (ma_sync_key_mm) {
1993 ma_sync_key_mm->key = uresp.ma_sync_key; 2319 ma_sync_key_mm->key = uresp.ma_sync_key;
1994 ma_sync_key_mm->addr = 2320 ma_sync_key_mm->addr =
@@ -2001,7 +2327,19 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
2001 c4iw_get_ucontext(ucontext); 2327 c4iw_get_ucontext(ucontext);
2002 qhp->ucontext = ucontext; 2328 qhp->ucontext = ucontext;
2003 } 2329 }
2330 if (!attrs->srq) {
2331 qhp->wq.qp_errp =
2332 &qhp->wq.rq.queue[qhp->wq.rq.size].status.qp_err;
2333 } else {
2334 qhp->wq.qp_errp =
2335 &qhp->wq.sq.queue[qhp->wq.sq.size].status.qp_err;
2336 qhp->wq.srqidxp =
2337 &qhp->wq.sq.queue[qhp->wq.sq.size].status.srqidx;
2338 }
2339
2004 qhp->ibqp.qp_num = qhp->wq.sq.qid; 2340 qhp->ibqp.qp_num = qhp->wq.sq.qid;
2341 if (attrs->srq)
2342 qhp->srq = to_c4iw_srq(attrs->srq);
2005 INIT_LIST_HEAD(&qhp->db_fc_entry); 2343 INIT_LIST_HEAD(&qhp->db_fc_entry);
2006 pr_debug("sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n", 2344 pr_debug("sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n",
2007 qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize, 2345 qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
@@ -2011,18 +2349,20 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
2011err_free_ma_sync_key: 2349err_free_ma_sync_key:
2012 kfree(ma_sync_key_mm); 2350 kfree(ma_sync_key_mm);
2013err_free_rq_db_key: 2351err_free_rq_db_key:
2014 kfree(rq_db_key_mm); 2352 if (!attrs->srq)
2353 kfree(rq_db_key_mm);
2015err_free_sq_db_key: 2354err_free_sq_db_key:
2016 kfree(sq_db_key_mm); 2355 kfree(sq_db_key_mm);
2017err_free_rq_key: 2356err_free_rq_key:
2018 kfree(rq_key_mm); 2357 if (!attrs->srq)
2358 kfree(rq_key_mm);
2019err_free_sq_key: 2359err_free_sq_key:
2020 kfree(sq_key_mm); 2360 kfree(sq_key_mm);
2021err_remove_handle: 2361err_remove_handle:
2022 remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); 2362 remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
2023err_destroy_qp: 2363err_destroy_qp:
2024 destroy_qp(&rhp->rdev, &qhp->wq, 2364 destroy_qp(&rhp->rdev, &qhp->wq,
2025 ucontext ? &ucontext->uctx : &rhp->rdev.uctx); 2365 ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq);
2026err_free_wr_wait: 2366err_free_wr_wait:
2027 c4iw_put_wr_wait(qhp->wr_waitp); 2367 c4iw_put_wr_wait(qhp->wr_waitp);
2028err_free_qhp: 2368err_free_qhp:
@@ -2088,6 +2428,45 @@ struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn)
2088 return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn); 2428 return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn);
2089} 2429}
2090 2430
2431void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq)
2432{
2433 struct ib_event event = {};
2434
2435 event.device = &srq->rhp->ibdev;
2436 event.element.srq = &srq->ibsrq;
2437 event.event = IB_EVENT_SRQ_LIMIT_REACHED;
2438 ib_dispatch_event(&event);
2439}
2440
2441int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
2442 enum ib_srq_attr_mask srq_attr_mask,
2443 struct ib_udata *udata)
2444{
2445 struct c4iw_srq *srq = to_c4iw_srq(ib_srq);
2446 int ret = 0;
2447
2448 /*
2449 * XXX 0 mask == a SW interrupt for srq_limit reached...
2450 */
2451 if (udata && !srq_attr_mask) {
2452 c4iw_dispatch_srq_limit_reached_event(srq);
2453 goto out;
2454 }
2455
2456 /* no support for this yet */
2457 if (srq_attr_mask & IB_SRQ_MAX_WR) {
2458 ret = -EINVAL;
2459 goto out;
2460 }
2461
2462 if (!udata && (srq_attr_mask & IB_SRQ_LIMIT)) {
2463 srq->armed = true;
2464 srq->srq_limit = attr->srq_limit;
2465 }
2466out:
2467 return ret;
2468}
2469
2091int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 2470int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2092 int attr_mask, struct ib_qp_init_attr *init_attr) 2471 int attr_mask, struct ib_qp_init_attr *init_attr)
2093{ 2472{
@@ -2104,3 +2483,359 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2104 init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; 2483 init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
2105 return 0; 2484 return 0;
2106} 2485}
2486
2487static void free_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
2488 struct c4iw_wr_wait *wr_waitp)
2489{
2490 struct c4iw_rdev *rdev = &srq->rhp->rdev;
2491 struct sk_buff *skb = srq->destroy_skb;
2492 struct t4_srq *wq = &srq->wq;
2493 struct fw_ri_res_wr *res_wr;
2494 struct fw_ri_res *res;
2495 int wr_len;
2496
2497 wr_len = sizeof(*res_wr) + sizeof(*res);
2498 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
2499
2500 res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
2501 memset(res_wr, 0, wr_len);
2502 res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) |
2503 FW_RI_RES_WR_NRES_V(1) |
2504 FW_WR_COMPL_F);
2505 res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
2506 res_wr->cookie = (uintptr_t)wr_waitp;
2507 res = res_wr->res;
2508 res->u.srq.restype = FW_RI_RES_TYPE_SRQ;
2509 res->u.srq.op = FW_RI_RES_OP_RESET;
2510 res->u.srq.srqid = cpu_to_be32(srq->idx);
2511 res->u.srq.eqid = cpu_to_be32(wq->qid);
2512
2513 c4iw_init_wr_wait(wr_waitp);
2514 c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
2515
2516 dma_free_coherent(&rdev->lldi.pdev->dev,
2517 wq->memsize, wq->queue,
2518 pci_unmap_addr(wq, mapping));
2519 c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
2520 kfree(wq->sw_rq);
2521 c4iw_put_qpid(rdev, wq->qid, uctx);
2522}
2523
2524static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
2525 struct c4iw_wr_wait *wr_waitp)
2526{
2527 struct c4iw_rdev *rdev = &srq->rhp->rdev;
2528 int user = (uctx != &rdev->uctx);
2529 struct t4_srq *wq = &srq->wq;
2530 struct fw_ri_res_wr *res_wr;
2531 struct fw_ri_res *res;
2532 struct sk_buff *skb;
2533 int wr_len;
2534 int eqsize;
2535 int ret = -ENOMEM;
2536
2537 wq->qid = c4iw_get_qpid(rdev, uctx);
2538 if (!wq->qid)
2539 goto err;
2540
2541 if (!user) {
2542 wq->sw_rq = kcalloc(wq->size, sizeof(*wq->sw_rq),
2543 GFP_KERNEL);
2544 if (!wq->sw_rq)
2545 goto err_put_qpid;
2546 wq->pending_wrs = kcalloc(srq->wq.size,
2547 sizeof(*srq->wq.pending_wrs),
2548 GFP_KERNEL);
2549 if (!wq->pending_wrs)
2550 goto err_free_sw_rq;
2551 }
2552
2553 wq->rqt_size = wq->size;
2554 wq->rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rqt_size);
2555 if (!wq->rqt_hwaddr)
2556 goto err_free_pending_wrs;
2557 wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >>
2558 T4_RQT_ENTRY_SHIFT;
2559
2560 wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
2561 wq->memsize, &wq->dma_addr,
2562 GFP_KERNEL);
2563 if (!wq->queue)
2564 goto err_free_rqtpool;
2565
2566 memset(wq->queue, 0, wq->memsize);
2567 pci_unmap_addr_set(wq, mapping, wq->dma_addr);
2568
2569 wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS,
2570 &wq->bar2_qid,
2571 user ? &wq->bar2_pa : NULL);
2572
2573 /*
2574 * User mode must have bar2 access.
2575 */
2576
2577 if (user && !wq->bar2_va) {
2578 pr_warn(MOD "%s: srqid %u not in BAR2 range.\n",
2579 pci_name(rdev->lldi.pdev), wq->qid);
2580 ret = -EINVAL;
2581 goto err_free_queue;
2582 }
2583
2584 /* build fw_ri_res_wr */
2585 wr_len = sizeof(*res_wr) + sizeof(*res);
2586
2587 skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
2588 if (!skb)
2589 goto err_free_queue;
2590 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
2591
2592 res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
2593 memset(res_wr, 0, wr_len);
2594 res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) |
2595 FW_RI_RES_WR_NRES_V(1) |
2596 FW_WR_COMPL_F);
2597 res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
2598 res_wr->cookie = (uintptr_t)wr_waitp;
2599 res = res_wr->res;
2600 res->u.srq.restype = FW_RI_RES_TYPE_SRQ;
2601 res->u.srq.op = FW_RI_RES_OP_WRITE;
2602
2603 /*
2604 * eqsize is the number of 64B entries plus the status page size.
2605 */
2606 eqsize = wq->size * T4_RQ_NUM_SLOTS +
2607 rdev->hw_queue.t4_eq_status_entries;
2608 res->u.srq.eqid = cpu_to_be32(wq->qid);
2609 res->u.srq.fetchszm_to_iqid =
2610 /* no host cidx updates */
2611 cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) |
2612 FW_RI_RES_WR_CPRIO_V(0) | /* don't keep in chip cache */
2613 FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */
2614 FW_RI_RES_WR_FETCHRO_V(0)); /* relaxed_ordering */
2615 res->u.srq.dcaen_to_eqsize =
2616 cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) |
2617 FW_RI_RES_WR_DCACPU_V(0) |
2618 FW_RI_RES_WR_FBMIN_V(2) |
2619 FW_RI_RES_WR_FBMAX_V(3) |
2620 FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
2621 FW_RI_RES_WR_CIDXFTHRESH_V(0) |
2622 FW_RI_RES_WR_EQSIZE_V(eqsize));
2623 res->u.srq.eqaddr = cpu_to_be64(wq->dma_addr);
2624 res->u.srq.srqid = cpu_to_be32(srq->idx);
2625 res->u.srq.pdid = cpu_to_be32(srq->pdid);
2626 res->u.srq.hwsrqsize = cpu_to_be32(wq->rqt_size);
2627 res->u.srq.hwsrqaddr = cpu_to_be32(wq->rqt_hwaddr -
2628 rdev->lldi.vr->rq.start);
2629
2630 c4iw_init_wr_wait(wr_waitp);
2631
2632 ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->qid, __func__);
2633 if (ret)
2634 goto err_free_queue;
2635
2636 pr_debug("%s srq %u eqid %u pdid %u queue va %p pa 0x%llx\n"
2637 " bar2_addr %p rqt addr 0x%x size %d\n",
2638 __func__, srq->idx, wq->qid, srq->pdid, wq->queue,
2639 (u64)virt_to_phys(wq->queue), wq->bar2_va,
2640 wq->rqt_hwaddr, wq->rqt_size);
2641
2642 return 0;
2643err_free_queue:
2644 dma_free_coherent(&rdev->lldi.pdev->dev,
2645 wq->memsize, wq->queue,
2646 pci_unmap_addr(wq, mapping));
2647err_free_rqtpool:
2648 c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
2649err_free_pending_wrs:
2650 if (!user)
2651 kfree(wq->pending_wrs);
2652err_free_sw_rq:
2653 if (!user)
2654 kfree(wq->sw_rq);
2655err_put_qpid:
2656 c4iw_put_qpid(rdev, wq->qid, uctx);
2657err:
2658 return ret;
2659}
2660
2661void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16)
2662{
2663 u64 *src, *dst;
2664
2665 src = (u64 *)wqe;
2666 dst = (u64 *)((u8 *)srq->queue + srq->wq_pidx * T4_EQ_ENTRY_SIZE);
2667 while (len16) {
2668 *dst++ = *src++;
2669 if (dst >= (u64 *)&srq->queue[srq->size])
2670 dst = (u64 *)srq->queue;
2671 *dst++ = *src++;
2672 if (dst >= (u64 *)&srq->queue[srq->size])
2673 dst = (u64 *)srq->queue;
2674 len16--;
2675 }
2676}
2677
2678struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs,
2679 struct ib_udata *udata)
2680{
2681 struct c4iw_dev *rhp;
2682 struct c4iw_srq *srq;
2683 struct c4iw_pd *php;
2684 struct c4iw_create_srq_resp uresp;
2685 struct c4iw_ucontext *ucontext;
2686 struct c4iw_mm_entry *srq_key_mm, *srq_db_key_mm;
2687 int rqsize;
2688 int ret;
2689 int wr_len;
2690
2691 pr_debug("%s ib_pd %p\n", __func__, pd);
2692
2693 php = to_c4iw_pd(pd);
2694 rhp = php->rhp;
2695
2696 if (!rhp->rdev.lldi.vr->srq.size)
2697 return ERR_PTR(-EINVAL);
2698 if (attrs->attr.max_wr > rhp->rdev.hw_queue.t4_max_rq_size)
2699 return ERR_PTR(-E2BIG);
2700 if (attrs->attr.max_sge > T4_MAX_RECV_SGE)
2701 return ERR_PTR(-E2BIG);
2702
2703 /*
2704 * SRQ RQT and RQ must be a power of 2 and at least 16 deep.
2705 */
2706 rqsize = attrs->attr.max_wr + 1;
2707 rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16));
2708
2709 ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
2710
2711 srq = kzalloc(sizeof(*srq), GFP_KERNEL);
2712 if (!srq)
2713 return ERR_PTR(-ENOMEM);
2714
2715 srq->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
2716 if (!srq->wr_waitp) {
2717 ret = -ENOMEM;
2718 goto err_free_srq;
2719 }
2720
2721 srq->idx = c4iw_alloc_srq_idx(&rhp->rdev);
2722 if (srq->idx < 0) {
2723 ret = -ENOMEM;
2724 goto err_free_wr_wait;
2725 }
2726
2727 wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res);
2728 srq->destroy_skb = alloc_skb(wr_len, GFP_KERNEL);
2729 if (!srq->destroy_skb) {
2730 ret = -ENOMEM;
2731 goto err_free_srq_idx;
2732 }
2733
2734 srq->rhp = rhp;
2735 srq->pdid = php->pdid;
2736
2737 srq->wq.size = rqsize;
2738 srq->wq.memsize =
2739 (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
2740 sizeof(*srq->wq.queue);
2741 if (ucontext)
2742 srq->wq.memsize = roundup(srq->wq.memsize, PAGE_SIZE);
2743
2744 ret = alloc_srq_queue(srq, ucontext ? &ucontext->uctx :
2745 &rhp->rdev.uctx, srq->wr_waitp);
2746 if (ret)
2747 goto err_free_skb;
2748 attrs->attr.max_wr = rqsize - 1;
2749
2750 if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6)
2751 srq->flags = T4_SRQ_LIMIT_SUPPORT;
2752
2753 ret = insert_handle(rhp, &rhp->qpidr, srq, srq->wq.qid);
2754 if (ret)
2755 goto err_free_queue;
2756
2757 if (udata) {
2758 srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL);
2759 if (!srq_key_mm) {
2760 ret = -ENOMEM;
2761 goto err_remove_handle;
2762 }
2763 srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL);
2764 if (!srq_db_key_mm) {
2765 ret = -ENOMEM;
2766 goto err_free_srq_key_mm;
2767 }
2768 memset(&uresp, 0, sizeof(uresp));
2769 uresp.flags = srq->flags;
2770 uresp.qid_mask = rhp->rdev.qpmask;
2771 uresp.srqid = srq->wq.qid;
2772 uresp.srq_size = srq->wq.size;
2773 uresp.srq_memsize = srq->wq.memsize;
2774 uresp.rqt_abs_idx = srq->wq.rqt_abs_idx;
2775 spin_lock(&ucontext->mmap_lock);
2776 uresp.srq_key = ucontext->key;
2777 ucontext->key += PAGE_SIZE;
2778 uresp.srq_db_gts_key = ucontext->key;
2779 ucontext->key += PAGE_SIZE;
2780 spin_unlock(&ucontext->mmap_lock);
2781 ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
2782 if (ret)
2783 goto err_free_srq_db_key_mm;
2784 srq_key_mm->key = uresp.srq_key;
2785 srq_key_mm->addr = virt_to_phys(srq->wq.queue);
2786 srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize);
2787 insert_mmap(ucontext, srq_key_mm);
2788 srq_db_key_mm->key = uresp.srq_db_gts_key;
2789 srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa;
2790 srq_db_key_mm->len = PAGE_SIZE;
2791 insert_mmap(ucontext, srq_db_key_mm);
2792 }
2793
2794 pr_debug("%s srq qid %u idx %u size %u memsize %lu num_entries %u\n",
2795 __func__, srq->wq.qid, srq->idx, srq->wq.size,
2796 (unsigned long)srq->wq.memsize, attrs->attr.max_wr);
2797
2798 spin_lock_init(&srq->lock);
2799 return &srq->ibsrq;
2800err_free_srq_db_key_mm:
2801 kfree(srq_db_key_mm);
2802err_free_srq_key_mm:
2803 kfree(srq_key_mm);
2804err_remove_handle:
2805 remove_handle(rhp, &rhp->qpidr, srq->wq.qid);
2806err_free_queue:
2807 free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
2808 srq->wr_waitp);
2809err_free_skb:
2810 if (srq->destroy_skb)
2811 kfree_skb(srq->destroy_skb);
2812err_free_srq_idx:
2813 c4iw_free_srq_idx(&rhp->rdev, srq->idx);
2814err_free_wr_wait:
2815 c4iw_put_wr_wait(srq->wr_waitp);
2816err_free_srq:
2817 kfree(srq);
2818 return ERR_PTR(ret);
2819}
2820
2821int c4iw_destroy_srq(struct ib_srq *ibsrq)
2822{
2823 struct c4iw_dev *rhp;
2824 struct c4iw_srq *srq;
2825 struct c4iw_ucontext *ucontext;
2826
2827 srq = to_c4iw_srq(ibsrq);
2828 rhp = srq->rhp;
2829
2830 pr_debug("%s id %d\n", __func__, srq->wq.qid);
2831
2832 remove_handle(rhp, &rhp->qpidr, srq->wq.qid);
2833 ucontext = ibsrq->uobject ?
2834 to_c4iw_ucontext(ibsrq->uobject->context) : NULL;
2835 free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
2836 srq->wr_waitp);
2837 c4iw_free_srq_idx(&rhp->rdev, srq->idx);
2838 c4iw_put_wr_wait(srq->wr_waitp);
2839 kfree(srq);
2840 return 0;
2841}
diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c
index 0ef25ae05e6f..57ed26b3cc21 100644
--- a/drivers/infiniband/hw/cxgb4/resource.c
+++ b/drivers/infiniband/hw/cxgb4/resource.c
@@ -53,7 +53,8 @@ static int c4iw_init_qid_table(struct c4iw_rdev *rdev)
53} 53}
54 54
55/* nr_* must be power of 2 */ 55/* nr_* must be power of 2 */
56int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid) 56int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt,
57 u32 nr_pdid, u32 nr_srqt)
57{ 58{
58 int err = 0; 59 int err = 0;
59 err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1, 60 err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1,
@@ -67,7 +68,17 @@ int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid)
67 nr_pdid, 1, 0); 68 nr_pdid, 1, 0);
68 if (err) 69 if (err)
69 goto pdid_err; 70 goto pdid_err;
71 if (!nr_srqt)
72 err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0,
73 1, 1, 0);
74 else
75 err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0,
76 nr_srqt, 0, 0);
77 if (err)
78 goto srq_err;
70 return 0; 79 return 0;
80 srq_err:
81 c4iw_id_table_free(&rdev->resource.pdid_table);
71 pdid_err: 82 pdid_err:
72 c4iw_id_table_free(&rdev->resource.qid_table); 83 c4iw_id_table_free(&rdev->resource.qid_table);
73 qid_err: 84 qid_err:
@@ -371,13 +382,21 @@ void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
371int c4iw_rqtpool_create(struct c4iw_rdev *rdev) 382int c4iw_rqtpool_create(struct c4iw_rdev *rdev)
372{ 383{
373 unsigned rqt_start, rqt_chunk, rqt_top; 384 unsigned rqt_start, rqt_chunk, rqt_top;
385 int skip = 0;
374 386
375 rdev->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1); 387 rdev->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1);
376 if (!rdev->rqt_pool) 388 if (!rdev->rqt_pool)
377 return -ENOMEM; 389 return -ENOMEM;
378 390
379 rqt_start = rdev->lldi.vr->rq.start; 391 /*
380 rqt_chunk = rdev->lldi.vr->rq.size; 392 * If SRQs are supported, then never use the first RQE from
393 * the RQT region. This is because HW uses RQT index 0 as NULL.
394 */
395 if (rdev->lldi.vr->srq.size)
396 skip = T4_RQT_ENTRY_SIZE;
397
398 rqt_start = rdev->lldi.vr->rq.start + skip;
399 rqt_chunk = rdev->lldi.vr->rq.size - skip;
381 rqt_top = rqt_start + rqt_chunk; 400 rqt_top = rqt_start + rqt_chunk;
382 401
383 while (rqt_start < rqt_top) { 402 while (rqt_start < rqt_top) {
@@ -405,6 +424,32 @@ void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev)
405 kref_put(&rdev->rqt_kref, destroy_rqtpool); 424 kref_put(&rdev->rqt_kref, destroy_rqtpool);
406} 425}
407 426
427int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev)
428{
429 int idx;
430
431 idx = c4iw_id_alloc(&rdev->resource.srq_table);
432 mutex_lock(&rdev->stats.lock);
433 if (idx == -1) {
434 rdev->stats.srqt.fail++;
435 mutex_unlock(&rdev->stats.lock);
436 return -ENOMEM;
437 }
438 rdev->stats.srqt.cur++;
439 if (rdev->stats.srqt.cur > rdev->stats.srqt.max)
440 rdev->stats.srqt.max = rdev->stats.srqt.cur;
441 mutex_unlock(&rdev->stats.lock);
442 return idx;
443}
444
445void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx)
446{
447 c4iw_id_free(&rdev->resource.srq_table, idx);
448 mutex_lock(&rdev->stats.lock);
449 rdev->stats.srqt.cur--;
450 mutex_unlock(&rdev->stats.lock);
451}
452
408/* 453/*
409 * On-Chip QP Memory. 454 * On-Chip QP Memory.
410 */ 455 */
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 8369c7c8de83..e42021fd6fd6 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -52,12 +52,16 @@ struct t4_status_page {
52 __be16 pidx; 52 __be16 pidx;
53 u8 qp_err; /* flit 1 - sw owns */ 53 u8 qp_err; /* flit 1 - sw owns */
54 u8 db_off; 54 u8 db_off;
55 u8 pad; 55 u8 pad[2];
56 u16 host_wq_pidx; 56 u16 host_wq_pidx;
57 u16 host_cidx; 57 u16 host_cidx;
58 u16 host_pidx; 58 u16 host_pidx;
59 u16 pad2;
60 u32 srqidx;
59}; 61};
60 62
63#define T4_RQT_ENTRY_SHIFT 6
64#define T4_RQT_ENTRY_SIZE BIT(T4_RQT_ENTRY_SHIFT)
61#define T4_EQ_ENTRY_SIZE 64 65#define T4_EQ_ENTRY_SIZE 64
62 66
63#define T4_SQ_NUM_SLOTS 5 67#define T4_SQ_NUM_SLOTS 5
@@ -87,6 +91,9 @@ static inline int t4_max_fr_depth(int use_dsgl)
87#define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) 91#define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS)
88#define T4_MAX_RECV_SGE 4 92#define T4_MAX_RECV_SGE 4
89 93
94#define T4_WRITE_CMPL_MAX_SGL 4
95#define T4_WRITE_CMPL_MAX_CQE 16
96
90union t4_wr { 97union t4_wr {
91 struct fw_ri_res_wr res; 98 struct fw_ri_res_wr res;
92 struct fw_ri_wr ri; 99 struct fw_ri_wr ri;
@@ -97,6 +104,7 @@ union t4_wr {
97 struct fw_ri_fr_nsmr_wr fr; 104 struct fw_ri_fr_nsmr_wr fr;
98 struct fw_ri_fr_nsmr_tpte_wr fr_tpte; 105 struct fw_ri_fr_nsmr_tpte_wr fr_tpte;
99 struct fw_ri_inv_lstag_wr inv; 106 struct fw_ri_inv_lstag_wr inv;
107 struct fw_ri_rdma_write_cmpl_wr write_cmpl;
100 struct t4_status_page status; 108 struct t4_status_page status;
101 __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; 109 __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS];
102}; 110};
@@ -179,9 +187,32 @@ struct t4_cqe {
179 __be32 wrid_hi; 187 __be32 wrid_hi;
180 __be32 wrid_low; 188 __be32 wrid_low;
181 } gen; 189 } gen;
190 struct {
191 __be32 stag;
192 __be32 msn;
193 __be32 reserved;
194 __be32 abs_rqe_idx;
195 } srcqe;
196 struct {
197 __be32 mo;
198 __be32 msn;
199 /*
200 * Use union for immediate data to be consistent with
201 * stack's 32 bit data and iWARP spec's 64 bit data.
202 */
203 union {
204 struct {
205 __be32 imm_data32;
206 u32 reserved;
207 } ib_imm_data;
208 __be64 imm_data64;
209 } iw_imm_data;
210 } imm_data_rcqe;
211
182 u64 drain_cookie; 212 u64 drain_cookie;
213 __be64 flits[3];
183 } u; 214 } u;
184 __be64 reserved; 215 __be64 reserved[3];
185 __be64 bits_type_ts; 216 __be64 bits_type_ts;
186}; 217};
187 218
@@ -237,6 +268,9 @@ struct t4_cqe {
237/* used for RQ completion processing */ 268/* used for RQ completion processing */
238#define CQE_WRID_STAG(x) (be32_to_cpu((x)->u.rcqe.stag)) 269#define CQE_WRID_STAG(x) (be32_to_cpu((x)->u.rcqe.stag))
239#define CQE_WRID_MSN(x) (be32_to_cpu((x)->u.rcqe.msn)) 270#define CQE_WRID_MSN(x) (be32_to_cpu((x)->u.rcqe.msn))
271#define CQE_ABS_RQE_IDX(x) (be32_to_cpu((x)->u.srcqe.abs_rqe_idx))
272#define CQE_IMM_DATA(x)( \
273 (x)->u.imm_data_rcqe.iw_imm_data.ib_imm_data.imm_data32)
240 274
241/* used for SQ completion processing */ 275/* used for SQ completion processing */
242#define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx) 276#define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx)
@@ -320,6 +354,7 @@ struct t4_swrqe {
320 u64 wr_id; 354 u64 wr_id;
321 ktime_t host_time; 355 ktime_t host_time;
322 u64 sge_ts; 356 u64 sge_ts;
357 int valid;
323}; 358};
324 359
325struct t4_rq { 360struct t4_rq {
@@ -349,8 +384,98 @@ struct t4_wq {
349 void __iomem *db; 384 void __iomem *db;
350 struct c4iw_rdev *rdev; 385 struct c4iw_rdev *rdev;
351 int flushed; 386 int flushed;
387 u8 *qp_errp;
388 u32 *srqidxp;
389};
390
391struct t4_srq_pending_wr {
392 u64 wr_id;
393 union t4_recv_wr wqe;
394 u8 len16;
395};
396
397struct t4_srq {
398 union t4_recv_wr *queue;
399 dma_addr_t dma_addr;
400 DECLARE_PCI_UNMAP_ADDR(mapping);
401 struct t4_swrqe *sw_rq;
402 void __iomem *bar2_va;
403 u64 bar2_pa;
404 size_t memsize;
405 u32 bar2_qid;
406 u32 qid;
407 u32 msn;
408 u32 rqt_hwaddr;
409 u32 rqt_abs_idx;
410 u16 rqt_size;
411 u16 size;
412 u16 cidx;
413 u16 pidx;
414 u16 wq_pidx;
415 u16 wq_pidx_inc;
416 u16 in_use;
417 struct t4_srq_pending_wr *pending_wrs;
418 u16 pending_cidx;
419 u16 pending_pidx;
420 u16 pending_in_use;
421 u16 ooo_count;
352}; 422};
353 423
424static inline u32 t4_srq_avail(struct t4_srq *srq)
425{
426 return srq->size - 1 - srq->in_use;
427}
428
429static inline void t4_srq_produce(struct t4_srq *srq, u8 len16)
430{
431 srq->in_use++;
432 if (++srq->pidx == srq->size)
433 srq->pidx = 0;
434 srq->wq_pidx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
435 if (srq->wq_pidx >= srq->size * T4_RQ_NUM_SLOTS)
436 srq->wq_pidx %= srq->size * T4_RQ_NUM_SLOTS;
437 srq->queue[srq->size].status.host_pidx = srq->pidx;
438}
439
440static inline void t4_srq_produce_pending_wr(struct t4_srq *srq)
441{
442 srq->pending_in_use++;
443 srq->in_use++;
444 if (++srq->pending_pidx == srq->size)
445 srq->pending_pidx = 0;
446}
447
448static inline void t4_srq_consume_pending_wr(struct t4_srq *srq)
449{
450 srq->pending_in_use--;
451 srq->in_use--;
452 if (++srq->pending_cidx == srq->size)
453 srq->pending_cidx = 0;
454}
455
456static inline void t4_srq_produce_ooo(struct t4_srq *srq)
457{
458 srq->in_use--;
459 srq->ooo_count++;
460}
461
462static inline void t4_srq_consume_ooo(struct t4_srq *srq)
463{
464 srq->cidx++;
465 if (srq->cidx == srq->size)
466 srq->cidx = 0;
467 srq->queue[srq->size].status.host_cidx = srq->cidx;
468 srq->ooo_count--;
469}
470
471static inline void t4_srq_consume(struct t4_srq *srq)
472{
473 srq->in_use--;
474 if (++srq->cidx == srq->size)
475 srq->cidx = 0;
476 srq->queue[srq->size].status.host_cidx = srq->cidx;
477}
478
354static inline int t4_rqes_posted(struct t4_wq *wq) 479static inline int t4_rqes_posted(struct t4_wq *wq)
355{ 480{
356 return wq->rq.in_use; 481 return wq->rq.in_use;
@@ -384,7 +509,6 @@ static inline void t4_rq_produce(struct t4_wq *wq, u8 len16)
384static inline void t4_rq_consume(struct t4_wq *wq) 509static inline void t4_rq_consume(struct t4_wq *wq)
385{ 510{
386 wq->rq.in_use--; 511 wq->rq.in_use--;
387 wq->rq.msn++;
388 if (++wq->rq.cidx == wq->rq.size) 512 if (++wq->rq.cidx == wq->rq.size)
389 wq->rq.cidx = 0; 513 wq->rq.cidx = 0;
390} 514}
@@ -464,6 +588,25 @@ static inline void pio_copy(u64 __iomem *dst, u64 *src)
464 } 588 }
465} 589}
466 590
591static inline void t4_ring_srq_db(struct t4_srq *srq, u16 inc, u8 len16,
592 union t4_recv_wr *wqe)
593{
594 /* Flush host queue memory writes. */
595 wmb();
596 if (inc == 1 && srq->bar2_qid == 0 && wqe) {
597 pr_debug("%s : WC srq->pidx = %d; len16=%d\n",
598 __func__, srq->pidx, len16);
599 pio_copy(srq->bar2_va + SGE_UDB_WCDOORBELL, (u64 *)wqe);
600 } else {
601 pr_debug("%s: DB srq->pidx = %d; len16=%d\n",
602 __func__, srq->pidx, len16);
603 writel(PIDX_T5_V(inc) | QID_V(srq->bar2_qid),
604 srq->bar2_va + SGE_UDB_KDOORBELL);
605 }
606 /* Flush user doorbell area writes. */
607 wmb();
608}
609
467static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe) 610static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe)
468{ 611{
469 612
@@ -515,12 +658,14 @@ static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc,
515 658
516static inline int t4_wq_in_error(struct t4_wq *wq) 659static inline int t4_wq_in_error(struct t4_wq *wq)
517{ 660{
518 return wq->rq.queue[wq->rq.size].status.qp_err; 661 return *wq->qp_errp;
519} 662}
520 663
521static inline void t4_set_wq_in_error(struct t4_wq *wq) 664static inline void t4_set_wq_in_error(struct t4_wq *wq, u32 srqidx)
522{ 665{
523 wq->rq.queue[wq->rq.size].status.qp_err = 1; 666 if (srqidx)
667 *wq->srqidxp = srqidx;
668 *wq->qp_errp = 1;
524} 669}
525 670
526static inline void t4_disable_wq_db(struct t4_wq *wq) 671static inline void t4_disable_wq_db(struct t4_wq *wq)
@@ -565,6 +710,7 @@ struct t4_cq {
565 u16 cidx_inc; 710 u16 cidx_inc;
566 u8 gen; 711 u8 gen;
567 u8 error; 712 u8 error;
713 u8 *qp_errp;
568 unsigned long flags; 714 unsigned long flags;
569}; 715};
570 716
@@ -698,18 +844,18 @@ static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
698 844
699static inline int t4_cq_in_error(struct t4_cq *cq) 845static inline int t4_cq_in_error(struct t4_cq *cq)
700{ 846{
701 return ((struct t4_status_page *)&cq->queue[cq->size])->qp_err; 847 return *cq->qp_errp;
702} 848}
703 849
704static inline void t4_set_cq_in_error(struct t4_cq *cq) 850static inline void t4_set_cq_in_error(struct t4_cq *cq)
705{ 851{
706 ((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1; 852 *cq->qp_errp = 1;
707} 853}
708#endif 854#endif
709 855
710struct t4_dev_status_page { 856struct t4_dev_status_page {
711 u8 db_off; 857 u8 db_off;
712 u8 pad1; 858 u8 write_cmpl_supported;
713 u16 pad2; 859 u16 pad2;
714 u32 pad3; 860 u32 pad3;
715 u64 qp_start; 861 u64 qp_start;
diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
index 58c531db4f4a..cbdb300a4794 100644
--- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
+++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
@@ -50,7 +50,8 @@ enum fw_ri_wr_opcode {
50 FW_RI_BYPASS = 0xd, 50 FW_RI_BYPASS = 0xd,
51 FW_RI_RECEIVE = 0xe, 51 FW_RI_RECEIVE = 0xe,
52 52
53 FW_RI_SGE_EC_CR_RETURN = 0xf 53 FW_RI_SGE_EC_CR_RETURN = 0xf,
54 FW_RI_WRITE_IMMEDIATE = FW_RI_RDMA_INIT
54}; 55};
55 56
56enum fw_ri_wr_flags { 57enum fw_ri_wr_flags {
@@ -59,7 +60,8 @@ enum fw_ri_wr_flags {
59 FW_RI_SOLICITED_EVENT_FLAG = 0x04, 60 FW_RI_SOLICITED_EVENT_FLAG = 0x04,
60 FW_RI_READ_FENCE_FLAG = 0x08, 61 FW_RI_READ_FENCE_FLAG = 0x08,
61 FW_RI_LOCAL_FENCE_FLAG = 0x10, 62 FW_RI_LOCAL_FENCE_FLAG = 0x10,
62 FW_RI_RDMA_READ_INVALIDATE = 0x20 63 FW_RI_RDMA_READ_INVALIDATE = 0x20,
64 FW_RI_RDMA_WRITE_WITH_IMMEDIATE = 0x40
63}; 65};
64 66
65enum fw_ri_mpa_attrs { 67enum fw_ri_mpa_attrs {
@@ -263,6 +265,7 @@ enum fw_ri_res_type {
263 FW_RI_RES_TYPE_SQ, 265 FW_RI_RES_TYPE_SQ,
264 FW_RI_RES_TYPE_RQ, 266 FW_RI_RES_TYPE_RQ,
265 FW_RI_RES_TYPE_CQ, 267 FW_RI_RES_TYPE_CQ,
268 FW_RI_RES_TYPE_SRQ,
266}; 269};
267 270
268enum fw_ri_res_op { 271enum fw_ri_res_op {
@@ -296,6 +299,20 @@ struct fw_ri_res {
296 __be32 r6_lo; 299 __be32 r6_lo;
297 __be64 r7; 300 __be64 r7;
298 } cq; 301 } cq;
302 struct fw_ri_res_srq {
303 __u8 restype;
304 __u8 op;
305 __be16 r3;
306 __be32 eqid;
307 __be32 r4[2];
308 __be32 fetchszm_to_iqid;
309 __be32 dcaen_to_eqsize;
310 __be64 eqaddr;
311 __be32 srqid;
312 __be32 pdid;
313 __be32 hwsrqsize;
314 __be32 hwsrqaddr;
315 } srq;
299 } u; 316 } u;
300}; 317};
301 318
@@ -531,7 +548,17 @@ struct fw_ri_rdma_write_wr {
531 __u16 wrid; 548 __u16 wrid;
532 __u8 r1[3]; 549 __u8 r1[3];
533 __u8 len16; 550 __u8 len16;
534 __be64 r2; 551 /*
552 * Use union for immediate data to be consistent with stack's 32 bit
553 * data and iWARP spec's 64 bit data.
554 */
555 union {
556 struct {
557 __be32 imm_data32;
558 u32 reserved;
559 } ib_imm_data;
560 __be64 imm_data64;
561 } iw_imm_data;
535 __be32 plen; 562 __be32 plen;
536 __be32 stag_sink; 563 __be32 stag_sink;
537 __be64 to_sink; 564 __be64 to_sink;
@@ -568,6 +595,37 @@ struct fw_ri_send_wr {
568#define FW_RI_SEND_WR_SENDOP_G(x) \ 595#define FW_RI_SEND_WR_SENDOP_G(x) \
569 (((x) >> FW_RI_SEND_WR_SENDOP_S) & FW_RI_SEND_WR_SENDOP_M) 596 (((x) >> FW_RI_SEND_WR_SENDOP_S) & FW_RI_SEND_WR_SENDOP_M)
570 597
598struct fw_ri_rdma_write_cmpl_wr {
599 __u8 opcode;
600 __u8 flags;
601 __u16 wrid;
602 __u8 r1[3];
603 __u8 len16;
604 __u8 r2;
605 __u8 flags_send;
606 __u16 wrid_send;
607 __be32 stag_inv;
608 __be32 plen;
609 __be32 stag_sink;
610 __be64 to_sink;
611 union fw_ri_cmpl {
612 struct fw_ri_immd_cmpl {
613 __u8 op;
614 __u8 r1[6];
615 __u8 immdlen;
616 __u8 data[16];
617 } immd_src;
618 struct fw_ri_isgl isgl_src;
619 } u_cmpl;
620 __be64 r3;
621#ifndef C99_NOT_SUPPORTED
622 union fw_ri_write {
623 struct fw_ri_immd immd_src[0];
624 struct fw_ri_isgl isgl_src[0];
625 } u;
626#endif
627};
628
571struct fw_ri_rdma_read_wr { 629struct fw_ri_rdma_read_wr {
572 __u8 opcode; 630 __u8 opcode;
573 __u8 flags; 631 __u8 flags;
@@ -707,6 +765,10 @@ enum fw_ri_init_p2ptype {
707 FW_RI_INIT_P2PTYPE_DISABLED = 0xf, 765 FW_RI_INIT_P2PTYPE_DISABLED = 0xf,
708}; 766};
709 767
768enum fw_ri_init_rqeqid_srq {
769 FW_RI_INIT_RQEQID_SRQ = 1 << 31,
770};
771
710struct fw_ri_wr { 772struct fw_ri_wr {
711 __be32 op_compl; 773 __be32 op_compl;
712 __be32 flowid_len16; 774 __be32 flowid_len16;
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 6deb101cdd43..2c19bf772451 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -8143,8 +8143,15 @@ static void is_sdma_eng_int(struct hfi1_devdata *dd, unsigned int source)
8143 } 8143 }
8144} 8144}
8145 8145
8146/* 8146/**
8147 * is_rcv_avail_int() - User receive context available IRQ handler
8148 * @dd: valid dd
8149 * @source: logical IRQ source (offset from IS_RCVAVAIL_START)
8150 *
8147 * RX block receive available interrupt. Source is < 160. 8151 * RX block receive available interrupt. Source is < 160.
8152 *
8153 * This is the general interrupt handler for user (PSM) receive contexts,
8154 * and can only be used for non-threaded IRQs.
8148 */ 8155 */
8149static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) 8156static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
8150{ 8157{
@@ -8154,12 +8161,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
8154 if (likely(source < dd->num_rcv_contexts)) { 8161 if (likely(source < dd->num_rcv_contexts)) {
8155 rcd = hfi1_rcd_get_by_index(dd, source); 8162 rcd = hfi1_rcd_get_by_index(dd, source);
8156 if (rcd) { 8163 if (rcd) {
8157 /* Check for non-user contexts, including vnic */ 8164 handle_user_interrupt(rcd);
8158 if (source < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
8159 rcd->do_interrupt(rcd, 0);
8160 else
8161 handle_user_interrupt(rcd);
8162
8163 hfi1_rcd_put(rcd); 8165 hfi1_rcd_put(rcd);
8164 return; /* OK */ 8166 return; /* OK */
8165 } 8167 }
@@ -8173,8 +8175,14 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
8173 err_detail, source); 8175 err_detail, source);
8174} 8176}
8175 8177
8176/* 8178/**
8179 * is_rcv_urgent_int() - User receive context urgent IRQ handler
8180 * @dd: valid dd
8181 * @source: logical IRQ source (ofse from IS_RCVURGENT_START)
8182 *
8177 * RX block receive urgent interrupt. Source is < 160. 8183 * RX block receive urgent interrupt. Source is < 160.
8184 *
8185 * NOTE: kernel receive contexts specifically do NOT enable this IRQ.
8178 */ 8186 */
8179static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source) 8187static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
8180{ 8188{
@@ -8184,11 +8192,7 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
8184 if (likely(source < dd->num_rcv_contexts)) { 8192 if (likely(source < dd->num_rcv_contexts)) {
8185 rcd = hfi1_rcd_get_by_index(dd, source); 8193 rcd = hfi1_rcd_get_by_index(dd, source);
8186 if (rcd) { 8194 if (rcd) {
8187 /* only pay attention to user urgent interrupts */ 8195 handle_user_interrupt(rcd);
8188 if (source >= dd->first_dyn_alloc_ctxt &&
8189 !rcd->is_vnic)
8190 handle_user_interrupt(rcd);
8191
8192 hfi1_rcd_put(rcd); 8196 hfi1_rcd_put(rcd);
8193 return; /* OK */ 8197 return; /* OK */
8194 } 8198 }
@@ -8260,9 +8264,14 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
8260 dd_dev_err(dd, "invalid interrupt source %u\n", source); 8264 dd_dev_err(dd, "invalid interrupt source %u\n", source);
8261} 8265}
8262 8266
8263/* 8267/**
8264 * General interrupt handler. This is able to correctly handle 8268 * gerneral_interrupt() - General interrupt handler
8265 * all interrupts in case INTx is used. 8269 * @irq: MSIx IRQ vector
8270 * @data: hfi1 devdata
8271 *
8272 * This is able to correctly handle all non-threaded interrupts. Receive
8273 * context DATA IRQs are threaded and are not supported by this handler.
8274 *
8266 */ 8275 */
8267static irqreturn_t general_interrupt(int irq, void *data) 8276static irqreturn_t general_interrupt(int irq, void *data)
8268{ 8277{
@@ -10130,7 +10139,7 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
10130 (((lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) << 10139 (((lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) <<
10131 SEND_CTXT_CHECK_SLID_VALUE_SHIFT); 10140 SEND_CTXT_CHECK_SLID_VALUE_SHIFT);
10132 10141
10133 for (i = 0; i < dd->chip_send_contexts; i++) { 10142 for (i = 0; i < chip_send_contexts(dd); i++) {
10134 hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x", 10143 hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x",
10135 i, (u32)sreg); 10144 i, (u32)sreg);
10136 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg); 10145 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg);
@@ -11857,7 +11866,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
11857 * sequence numbers could land exactly on the same spot. 11866 * sequence numbers could land exactly on the same spot.
11858 * E.g. a rcd restart before the receive header wrapped. 11867 * E.g. a rcd restart before the receive header wrapped.
11859 */ 11868 */
11860 memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size); 11869 memset(rcd->rcvhdrq, 0, rcvhdrq_size(rcd));
11861 11870
11862 /* starting timeout */ 11871 /* starting timeout */
11863 rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr; 11872 rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr;
@@ -11952,9 +11961,8 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
11952 rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; 11961 rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
11953 if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS) 11962 if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
11954 rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; 11963 rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
11955 rcd->rcvctrl = rcvctrl;
11956 hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl); 11964 hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
11957 write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl); 11965 write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl);
11958 11966
11959 /* work around sticky RcvCtxtStatus.BlockedRHQFull */ 11967 /* work around sticky RcvCtxtStatus.BlockedRHQFull */
11960 if (did_enable && 11968 if (did_enable &&
@@ -12042,7 +12050,7 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp)
12042 } else if (entry->flags & CNTR_SDMA) { 12050 } else if (entry->flags & CNTR_SDMA) {
12043 hfi1_cdbg(CNTR, 12051 hfi1_cdbg(CNTR,
12044 "\t Per SDMA Engine\n"); 12052 "\t Per SDMA Engine\n");
12045 for (j = 0; j < dd->chip_sdma_engines; 12053 for (j = 0; j < chip_sdma_engines(dd);
12046 j++) { 12054 j++) {
12047 val = 12055 val =
12048 entry->rw_cntr(entry, dd, j, 12056 entry->rw_cntr(entry, dd, j,
@@ -12418,6 +12426,7 @@ static int init_cntrs(struct hfi1_devdata *dd)
12418 struct hfi1_pportdata *ppd; 12426 struct hfi1_pportdata *ppd;
12419 const char *bit_type_32 = ",32"; 12427 const char *bit_type_32 = ",32";
12420 const int bit_type_32_sz = strlen(bit_type_32); 12428 const int bit_type_32_sz = strlen(bit_type_32);
12429 u32 sdma_engines = chip_sdma_engines(dd);
12421 12430
12422 /* set up the stats timer; the add_timer is done at the end */ 12431 /* set up the stats timer; the add_timer is done at the end */
12423 timer_setup(&dd->synth_stats_timer, update_synth_timer, 0); 12432 timer_setup(&dd->synth_stats_timer, update_synth_timer, 0);
@@ -12450,7 +12459,7 @@ static int init_cntrs(struct hfi1_devdata *dd)
12450 } 12459 }
12451 } else if (dev_cntrs[i].flags & CNTR_SDMA) { 12460 } else if (dev_cntrs[i].flags & CNTR_SDMA) {
12452 dev_cntrs[i].offset = dd->ndevcntrs; 12461 dev_cntrs[i].offset = dd->ndevcntrs;
12453 for (j = 0; j < dd->chip_sdma_engines; j++) { 12462 for (j = 0; j < sdma_engines; j++) {
12454 snprintf(name, C_MAX_NAME, "%s%d", 12463 snprintf(name, C_MAX_NAME, "%s%d",
12455 dev_cntrs[i].name, j); 12464 dev_cntrs[i].name, j);
12456 sz += strlen(name); 12465 sz += strlen(name);
@@ -12507,7 +12516,7 @@ static int init_cntrs(struct hfi1_devdata *dd)
12507 *p++ = '\n'; 12516 *p++ = '\n';
12508 } 12517 }
12509 } else if (dev_cntrs[i].flags & CNTR_SDMA) { 12518 } else if (dev_cntrs[i].flags & CNTR_SDMA) {
12510 for (j = 0; j < dd->chip_sdma_engines; j++) { 12519 for (j = 0; j < sdma_engines; j++) {
12511 snprintf(name, C_MAX_NAME, "%s%d", 12520 snprintf(name, C_MAX_NAME, "%s%d",
12512 dev_cntrs[i].name, j); 12521 dev_cntrs[i].name, j);
12513 memcpy(p, name, strlen(name)); 12522 memcpy(p, name, strlen(name));
@@ -13020,9 +13029,9 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
13020 write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0); 13029 write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0);
13021 write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0); 13030 write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0);
13022 write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0); 13031 write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0);
13023 for (i = 0; i < dd->chip_send_contexts; i++) 13032 for (i = 0; i < chip_send_contexts(dd); i++)
13024 write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0); 13033 write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0);
13025 for (i = 0; i < dd->chip_sdma_engines; i++) 13034 for (i = 0; i < chip_sdma_engines(dd); i++)
13026 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0); 13035 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0);
13027 13036
13028 write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0); 13037 write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0);
@@ -13030,48 +13039,30 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
13030 write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0); 13039 write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
13031} 13040}
13032 13041
13033/* Move to pcie.c? */
13034static void disable_intx(struct pci_dev *pdev)
13035{
13036 pci_intx(pdev, 0);
13037}
13038
13039/** 13042/**
13040 * hfi1_clean_up_interrupts() - Free all IRQ resources 13043 * hfi1_clean_up_interrupts() - Free all IRQ resources
13041 * @dd: valid device data data structure 13044 * @dd: valid device data data structure
13042 * 13045 *
13043 * Free the MSI or INTx IRQs and assoicated PCI resources, 13046 * Free the MSIx and assoicated PCI resources, if they have been allocated.
13044 * if they have been allocated.
13045 */ 13047 */
13046void hfi1_clean_up_interrupts(struct hfi1_devdata *dd) 13048void hfi1_clean_up_interrupts(struct hfi1_devdata *dd)
13047{ 13049{
13048 int i; 13050 int i;
13051 struct hfi1_msix_entry *me = dd->msix_entries;
13049 13052
13050 /* remove irqs - must happen before disabling/turning off */ 13053 /* remove irqs - must happen before disabling/turning off */
13051 if (dd->num_msix_entries) { 13054 for (i = 0; i < dd->num_msix_entries; i++, me++) {
13052 /* MSI-X */ 13055 if (!me->arg) /* => no irq, no affinity */
13053 struct hfi1_msix_entry *me = dd->msix_entries; 13056 continue;
13054 13057 hfi1_put_irq_affinity(dd, me);
13055 for (i = 0; i < dd->num_msix_entries; i++, me++) { 13058 pci_free_irq(dd->pcidev, i, me->arg);
13056 if (!me->arg) /* => no irq, no affinity */
13057 continue;
13058 hfi1_put_irq_affinity(dd, me);
13059 pci_free_irq(dd->pcidev, i, me->arg);
13060 }
13061
13062 /* clean structures */
13063 kfree(dd->msix_entries);
13064 dd->msix_entries = NULL;
13065 dd->num_msix_entries = 0;
13066 } else {
13067 /* INTx */
13068 if (dd->requested_intx_irq) {
13069 pci_free_irq(dd->pcidev, 0, dd);
13070 dd->requested_intx_irq = 0;
13071 }
13072 disable_intx(dd->pcidev);
13073 } 13059 }
13074 13060
13061 /* clean structures */
13062 kfree(dd->msix_entries);
13063 dd->msix_entries = NULL;
13064 dd->num_msix_entries = 0;
13065
13075 pci_free_irq_vectors(dd->pcidev); 13066 pci_free_irq_vectors(dd->pcidev);
13076} 13067}
13077 13068
@@ -13121,20 +13112,6 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd,
13121 msix_intr); 13112 msix_intr);
13122} 13113}
13123 13114
13124static int request_intx_irq(struct hfi1_devdata *dd)
13125{
13126 int ret;
13127
13128 ret = pci_request_irq(dd->pcidev, 0, general_interrupt, NULL, dd,
13129 DRIVER_NAME "_%d", dd->unit);
13130 if (ret)
13131 dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
13132 ret);
13133 else
13134 dd->requested_intx_irq = 1;
13135 return ret;
13136}
13137
13138static int request_msix_irqs(struct hfi1_devdata *dd) 13115static int request_msix_irqs(struct hfi1_devdata *dd)
13139{ 13116{
13140 int first_general, last_general; 13117 int first_general, last_general;
@@ -13253,11 +13230,6 @@ void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
13253{ 13230{
13254 int i; 13231 int i;
13255 13232
13256 if (!dd->num_msix_entries) {
13257 synchronize_irq(pci_irq_vector(dd->pcidev, 0));
13258 return;
13259 }
13260
13261 for (i = 0; i < dd->vnic.num_ctxt; i++) { 13233 for (i = 0; i < dd->vnic.num_ctxt; i++) {
13262 struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i]; 13234 struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
13263 struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr]; 13235 struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
@@ -13346,7 +13318,6 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
13346{ 13318{
13347 u32 total; 13319 u32 total;
13348 int ret, request; 13320 int ret, request;
13349 int single_interrupt = 0; /* we expect to have all the interrupts */
13350 13321
13351 /* 13322 /*
13352 * Interrupt count: 13323 * Interrupt count:
@@ -13363,17 +13334,6 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
13363 if (request < 0) { 13334 if (request < 0) {
13364 ret = request; 13335 ret = request;
13365 goto fail; 13336 goto fail;
13366 } else if (request == 0) {
13367 /* using INTx */
13368 /* dd->num_msix_entries already zero */
13369 single_interrupt = 1;
13370 dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n");
13371 } else if (request < total) {
13372 /* using MSI-X, with reduced interrupts */
13373 dd_dev_err(dd, "reduced interrupt found, wanted %u, got %u\n",
13374 total, request);
13375 ret = -EINVAL;
13376 goto fail;
13377 } else { 13337 } else {
13378 dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries), 13338 dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries),
13379 GFP_KERNEL); 13339 GFP_KERNEL);
@@ -13394,10 +13354,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
13394 /* reset general handler mask, chip MSI-X mappings */ 13354 /* reset general handler mask, chip MSI-X mappings */
13395 reset_interrupts(dd); 13355 reset_interrupts(dd);
13396 13356
13397 if (single_interrupt) 13357 ret = request_msix_irqs(dd);
13398 ret = request_intx_irq(dd);
13399 else
13400 ret = request_msix_irqs(dd);
13401 if (ret) 13358 if (ret)
13402 goto fail; 13359 goto fail;
13403 13360
@@ -13429,6 +13386,8 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
13429 int qos_rmt_count; 13386 int qos_rmt_count;
13430 int user_rmt_reduced; 13387 int user_rmt_reduced;
13431 u32 n_usr_ctxts; 13388 u32 n_usr_ctxts;
13389 u32 send_contexts = chip_send_contexts(dd);
13390 u32 rcv_contexts = chip_rcv_contexts(dd);
13432 13391
13433 /* 13392 /*
13434 * Kernel receive contexts: 13393 * Kernel receive contexts:
@@ -13450,16 +13409,16 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
13450 * Every kernel receive context needs an ACK send context. 13409 * Every kernel receive context needs an ACK send context.
13451 * one send context is allocated for each VL{0-7} and VL15 13410 * one send context is allocated for each VL{0-7} and VL15
13452 */ 13411 */
13453 if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) { 13412 if (num_kernel_contexts > (send_contexts - num_vls - 1)) {
13454 dd_dev_err(dd, 13413 dd_dev_err(dd,
13455 "Reducing # kernel rcv contexts to: %d, from %lu\n", 13414 "Reducing # kernel rcv contexts to: %d, from %lu\n",
13456 (int)(dd->chip_send_contexts - num_vls - 1), 13415 send_contexts - num_vls - 1,
13457 num_kernel_contexts); 13416 num_kernel_contexts);
13458 num_kernel_contexts = dd->chip_send_contexts - num_vls - 1; 13417 num_kernel_contexts = send_contexts - num_vls - 1;
13459 } 13418 }
13460 13419
13461 /* Accommodate VNIC contexts if possible */ 13420 /* Accommodate VNIC contexts if possible */
13462 if ((num_kernel_contexts + num_vnic_contexts) > dd->chip_rcv_contexts) { 13421 if ((num_kernel_contexts + num_vnic_contexts) > rcv_contexts) {
13463 dd_dev_err(dd, "No receive contexts available for VNIC\n"); 13422 dd_dev_err(dd, "No receive contexts available for VNIC\n");
13464 num_vnic_contexts = 0; 13423 num_vnic_contexts = 0;
13465 } 13424 }
@@ -13477,13 +13436,13 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
13477 /* 13436 /*
13478 * Adjust the counts given a global max. 13437 * Adjust the counts given a global max.
13479 */ 13438 */
13480 if (total_contexts + n_usr_ctxts > dd->chip_rcv_contexts) { 13439 if (total_contexts + n_usr_ctxts > rcv_contexts) {
13481 dd_dev_err(dd, 13440 dd_dev_err(dd,
13482 "Reducing # user receive contexts to: %d, from %u\n", 13441 "Reducing # user receive contexts to: %d, from %u\n",
13483 (int)(dd->chip_rcv_contexts - total_contexts), 13442 rcv_contexts - total_contexts,
13484 n_usr_ctxts); 13443 n_usr_ctxts);
13485 /* recalculate */ 13444 /* recalculate */
13486 n_usr_ctxts = dd->chip_rcv_contexts - total_contexts; 13445 n_usr_ctxts = rcv_contexts - total_contexts;
13487 } 13446 }
13488 13447
13489 /* each user context requires an entry in the RMT */ 13448 /* each user context requires an entry in the RMT */
@@ -13509,7 +13468,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
13509 dd->freectxts = n_usr_ctxts; 13468 dd->freectxts = n_usr_ctxts;
13510 dd_dev_info(dd, 13469 dd_dev_info(dd,
13511 "rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n", 13470 "rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n",
13512 (int)dd->chip_rcv_contexts, 13471 rcv_contexts,
13513 (int)dd->num_rcv_contexts, 13472 (int)dd->num_rcv_contexts,
13514 (int)dd->n_krcv_queues, 13473 (int)dd->n_krcv_queues,
13515 dd->num_vnic_contexts, 13474 dd->num_vnic_contexts,
@@ -13527,7 +13486,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
13527 * contexts. 13486 * contexts.
13528 */ 13487 */
13529 dd->rcv_entries.group_size = RCV_INCREMENT; 13488 dd->rcv_entries.group_size = RCV_INCREMENT;
13530 ngroups = dd->chip_rcv_array_count / dd->rcv_entries.group_size; 13489 ngroups = chip_rcv_array_count(dd) / dd->rcv_entries.group_size;
13531 dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts; 13490 dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts;
13532 dd->rcv_entries.nctxt_extra = ngroups - 13491 dd->rcv_entries.nctxt_extra = ngroups -
13533 (dd->num_rcv_contexts * dd->rcv_entries.ngroups); 13492 (dd->num_rcv_contexts * dd->rcv_entries.ngroups);
@@ -13552,7 +13511,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
13552 dd_dev_info( 13511 dd_dev_info(
13553 dd, 13512 dd,
13554 "send contexts: chip %d, used %d (kernel %d, ack %d, user %d, vl15 %d)\n", 13513 "send contexts: chip %d, used %d (kernel %d, ack %d, user %d, vl15 %d)\n",
13555 dd->chip_send_contexts, 13514 send_contexts,
13556 dd->num_send_contexts, 13515 dd->num_send_contexts,
13557 dd->sc_sizes[SC_KERNEL].count, 13516 dd->sc_sizes[SC_KERNEL].count,
13558 dd->sc_sizes[SC_ACK].count, 13517 dd->sc_sizes[SC_ACK].count,
@@ -13610,7 +13569,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
13610 write_csr(dd, CCE_INT_MAP + (8 * i), 0); 13569 write_csr(dd, CCE_INT_MAP + (8 * i), 0);
13611 13570
13612 /* SendCtxtCreditReturnAddr */ 13571 /* SendCtxtCreditReturnAddr */
13613 for (i = 0; i < dd->chip_send_contexts; i++) 13572 for (i = 0; i < chip_send_contexts(dd); i++)
13614 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0); 13573 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
13615 13574
13616 /* PIO Send buffers */ 13575 /* PIO Send buffers */
@@ -13623,7 +13582,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
13623 /* RcvHdrAddr */ 13582 /* RcvHdrAddr */
13624 /* RcvHdrTailAddr */ 13583 /* RcvHdrTailAddr */
13625 /* RcvTidFlowTable */ 13584 /* RcvTidFlowTable */
13626 for (i = 0; i < dd->chip_rcv_contexts; i++) { 13585 for (i = 0; i < chip_rcv_contexts(dd); i++) {
13627 write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0); 13586 write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
13628 write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0); 13587 write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
13629 for (j = 0; j < RXE_NUM_TID_FLOWS; j++) 13588 for (j = 0; j < RXE_NUM_TID_FLOWS; j++)
@@ -13631,7 +13590,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
13631 } 13590 }
13632 13591
13633 /* RcvArray */ 13592 /* RcvArray */
13634 for (i = 0; i < dd->chip_rcv_array_count; i++) 13593 for (i = 0; i < chip_rcv_array_count(dd); i++)
13635 hfi1_put_tid(dd, i, PT_INVALID_FLUSH, 0, 0); 13594 hfi1_put_tid(dd, i, PT_INVALID_FLUSH, 0, 0);
13636 13595
13637 /* RcvQPMapTable */ 13596 /* RcvQPMapTable */
@@ -13789,7 +13748,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd)
13789 write_csr(dd, SEND_LOW_PRIORITY_LIST + (8 * i), 0); 13748 write_csr(dd, SEND_LOW_PRIORITY_LIST + (8 * i), 0);
13790 for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++) 13749 for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++)
13791 write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8 * i), 0); 13750 write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8 * i), 0);
13792 for (i = 0; i < dd->chip_send_contexts / NUM_CONTEXTS_PER_SET; i++) 13751 for (i = 0; i < chip_send_contexts(dd) / NUM_CONTEXTS_PER_SET; i++)
13793 write_csr(dd, SEND_CONTEXT_SET_CTRL + (8 * i), 0); 13752 write_csr(dd, SEND_CONTEXT_SET_CTRL + (8 * i), 0);
13794 for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++) 13753 for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++)
13795 write_csr(dd, SEND_COUNTER_ARRAY32 + (8 * i), 0); 13754 write_csr(dd, SEND_COUNTER_ARRAY32 + (8 * i), 0);
@@ -13817,7 +13776,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd)
13817 /* 13776 /*
13818 * TXE Per-Context CSRs 13777 * TXE Per-Context CSRs
13819 */ 13778 */
13820 for (i = 0; i < dd->chip_send_contexts; i++) { 13779 for (i = 0; i < chip_send_contexts(dd); i++) {
13821 write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0); 13780 write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
13822 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0); 13781 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0);
13823 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0); 13782 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
@@ -13835,7 +13794,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd)
13835 /* 13794 /*
13836 * TXE Per-SDMA CSRs 13795 * TXE Per-SDMA CSRs
13837 */ 13796 */
13838 for (i = 0; i < dd->chip_sdma_engines; i++) { 13797 for (i = 0; i < chip_sdma_engines(dd); i++) {
13839 write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0); 13798 write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
13840 /* SEND_DMA_STATUS read-only */ 13799 /* SEND_DMA_STATUS read-only */
13841 write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0); 13800 write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0);
@@ -13968,7 +13927,7 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd)
13968 /* 13927 /*
13969 * RXE Kernel and User Per-Context CSRs 13928 * RXE Kernel and User Per-Context CSRs
13970 */ 13929 */
13971 for (i = 0; i < dd->chip_rcv_contexts; i++) { 13930 for (i = 0; i < chip_rcv_contexts(dd); i++) {
13972 /* kernel */ 13931 /* kernel */
13973 write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0); 13932 write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0);
13974 /* RCV_CTXT_STATUS read-only */ 13933 /* RCV_CTXT_STATUS read-only */
@@ -14084,13 +14043,13 @@ static int init_chip(struct hfi1_devdata *dd)
14084 14043
14085 /* disable send contexts and SDMA engines */ 14044 /* disable send contexts and SDMA engines */
14086 write_csr(dd, SEND_CTRL, 0); 14045 write_csr(dd, SEND_CTRL, 0);
14087 for (i = 0; i < dd->chip_send_contexts; i++) 14046 for (i = 0; i < chip_send_contexts(dd); i++)
14088 write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0); 14047 write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
14089 for (i = 0; i < dd->chip_sdma_engines; i++) 14048 for (i = 0; i < chip_sdma_engines(dd); i++)
14090 write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0); 14049 write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
14091 /* disable port (turn off RXE inbound traffic) and contexts */ 14050 /* disable port (turn off RXE inbound traffic) and contexts */
14092 write_csr(dd, RCV_CTRL, 0); 14051 write_csr(dd, RCV_CTRL, 0);
14093 for (i = 0; i < dd->chip_rcv_contexts; i++) 14052 for (i = 0; i < chip_rcv_contexts(dd); i++)
14094 write_csr(dd, RCV_CTXT_CTRL, 0); 14053 write_csr(dd, RCV_CTXT_CTRL, 0);
14095 /* mask all interrupt sources */ 14054 /* mask all interrupt sources */
14096 for (i = 0; i < CCE_NUM_INT_CSRS; i++) 14055 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
@@ -14709,9 +14668,9 @@ static void init_txe(struct hfi1_devdata *dd)
14709 write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull); 14668 write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull);
14710 14669
14711 /* enable all per-context and per-SDMA engine errors */ 14670 /* enable all per-context and per-SDMA engine errors */
14712 for (i = 0; i < dd->chip_send_contexts; i++) 14671 for (i = 0; i < chip_send_contexts(dd); i++)
14713 write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull); 14672 write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull);
14714 for (i = 0; i < dd->chip_sdma_engines; i++) 14673 for (i = 0; i < chip_sdma_engines(dd); i++)
14715 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull); 14674 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull);
14716 14675
14717 /* set the local CU to AU mapping */ 14676 /* set the local CU to AU mapping */
@@ -14979,11 +14938,13 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
14979 "Functional simulator" 14938 "Functional simulator"
14980 }; 14939 };
14981 struct pci_dev *parent = pdev->bus->self; 14940 struct pci_dev *parent = pdev->bus->self;
14941 u32 sdma_engines;
14982 14942
14983 dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS * 14943 dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
14984 sizeof(struct hfi1_pportdata)); 14944 sizeof(struct hfi1_pportdata));
14985 if (IS_ERR(dd)) 14945 if (IS_ERR(dd))
14986 goto bail; 14946 goto bail;
14947 sdma_engines = chip_sdma_engines(dd);
14987 ppd = dd->pport; 14948 ppd = dd->pport;
14988 for (i = 0; i < dd->num_pports; i++, ppd++) { 14949 for (i = 0; i < dd->num_pports; i++, ppd++) {
14989 int vl; 14950 int vl;
@@ -15081,11 +15042,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
15081 /* give a reasonable active value, will be set on link up */ 15042 /* give a reasonable active value, will be set on link up */
15082 dd->pport->link_speed_active = OPA_LINK_SPEED_25G; 15043 dd->pport->link_speed_active = OPA_LINK_SPEED_25G;
15083 15044
15084 dd->chip_rcv_contexts = read_csr(dd, RCV_CONTEXTS);
15085 dd->chip_send_contexts = read_csr(dd, SEND_CONTEXTS);
15086 dd->chip_sdma_engines = read_csr(dd, SEND_DMA_ENGINES);
15087 dd->chip_pio_mem_size = read_csr(dd, SEND_PIO_MEM_SIZE);
15088 dd->chip_sdma_mem_size = read_csr(dd, SEND_DMA_MEM_SIZE);
15089 /* fix up link widths for emulation _p */ 15045 /* fix up link widths for emulation _p */
15090 ppd = dd->pport; 15046 ppd = dd->pport;
15091 if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) { 15047 if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) {
@@ -15096,11 +15052,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
15096 OPA_LINK_WIDTH_1X; 15052 OPA_LINK_WIDTH_1X;
15097 } 15053 }
15098 /* insure num_vls isn't larger than number of sdma engines */ 15054 /* insure num_vls isn't larger than number of sdma engines */
15099 if (HFI1_CAP_IS_KSET(SDMA) && num_vls > dd->chip_sdma_engines) { 15055 if (HFI1_CAP_IS_KSET(SDMA) && num_vls > sdma_engines) {
15100 dd_dev_err(dd, "num_vls %u too large, using %u VLs\n", 15056 dd_dev_err(dd, "num_vls %u too large, using %u VLs\n",
15101 num_vls, dd->chip_sdma_engines); 15057 num_vls, sdma_engines);
15102 num_vls = dd->chip_sdma_engines; 15058 num_vls = sdma_engines;
15103 ppd->vls_supported = dd->chip_sdma_engines; 15059 ppd->vls_supported = sdma_engines;
15104 ppd->vls_operational = ppd->vls_supported; 15060 ppd->vls_operational = ppd->vls_supported;
15105 } 15061 }
15106 15062
@@ -15216,13 +15172,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
15216 */ 15172 */
15217 aspm_init(dd); 15173 aspm_init(dd);
15218 15174
15219 dd->rcvhdrsize = DEFAULT_RCVHDRSIZE;
15220 /*
15221 * rcd[0] is guaranteed to be valid by this point. Also, all
15222 * context are using the same value, as per the module parameter.
15223 */
15224 dd->rhf_offset = dd->rcd[0]->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
15225
15226 ret = init_pervl_scs(dd); 15175 ret = init_pervl_scs(dd);
15227 if (ret) 15176 if (ret)
15228 goto bail_cleanup; 15177 goto bail_cleanup;
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index fdf389e46e19..36b04d6300e5 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -656,6 +656,36 @@ static inline void write_uctxt_csr(struct hfi1_devdata *dd, int ctxt,
656 write_csr(dd, offset0 + (0x1000 * ctxt), value); 656 write_csr(dd, offset0 + (0x1000 * ctxt), value);
657} 657}
658 658
659static inline u32 chip_rcv_contexts(struct hfi1_devdata *dd)
660{
661 return read_csr(dd, RCV_CONTEXTS);
662}
663
664static inline u32 chip_send_contexts(struct hfi1_devdata *dd)
665{
666 return read_csr(dd, SEND_CONTEXTS);
667}
668
669static inline u32 chip_sdma_engines(struct hfi1_devdata *dd)
670{
671 return read_csr(dd, SEND_DMA_ENGINES);
672}
673
674static inline u32 chip_pio_mem_size(struct hfi1_devdata *dd)
675{
676 return read_csr(dd, SEND_PIO_MEM_SIZE);
677}
678
679static inline u32 chip_sdma_mem_size(struct hfi1_devdata *dd)
680{
681 return read_csr(dd, SEND_DMA_MEM_SIZE);
682}
683
684static inline u32 chip_rcv_array_count(struct hfi1_devdata *dd)
685{
686 return read_csr(dd, RCV_ARRAY_CNT);
687}
688
659u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl, 689u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
660 u32 dw_len); 690 u32 dw_len);
661 691
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 94dca95db04f..a41f85558312 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -208,25 +208,25 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf,
208 (offset * RCV_BUF_BLOCK_SIZE)); 208 (offset * RCV_BUF_BLOCK_SIZE));
209} 209}
210 210
211static inline void *hfi1_get_header(struct hfi1_devdata *dd, 211static inline void *hfi1_get_header(struct hfi1_ctxtdata *rcd,
212 __le32 *rhf_addr) 212 __le32 *rhf_addr)
213{ 213{
214 u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); 214 u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
215 215
216 return (void *)(rhf_addr - dd->rhf_offset + offset); 216 return (void *)(rhf_addr - rcd->rhf_offset + offset);
217} 217}
218 218
219static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd, 219static inline struct ib_header *hfi1_get_msgheader(struct hfi1_ctxtdata *rcd,
220 __le32 *rhf_addr) 220 __le32 *rhf_addr)
221{ 221{
222 return (struct ib_header *)hfi1_get_header(dd, rhf_addr); 222 return (struct ib_header *)hfi1_get_header(rcd, rhf_addr);
223} 223}
224 224
225static inline struct hfi1_16b_header 225static inline struct hfi1_16b_header
226 *hfi1_get_16B_header(struct hfi1_devdata *dd, 226 *hfi1_get_16B_header(struct hfi1_ctxtdata *rcd,
227 __le32 *rhf_addr) 227 __le32 *rhf_addr)
228{ 228{
229 return (struct hfi1_16b_header *)hfi1_get_header(dd, rhf_addr); 229 return (struct hfi1_16b_header *)hfi1_get_header(rcd, rhf_addr);
230} 230}
231 231
232/* 232/*
@@ -591,13 +591,12 @@ static void __prescan_rxq(struct hfi1_packet *packet)
591 init_ps_mdata(&mdata, packet); 591 init_ps_mdata(&mdata, packet);
592 592
593 while (1) { 593 while (1) {
594 struct hfi1_devdata *dd = rcd->dd;
595 struct hfi1_ibport *ibp = rcd_to_iport(rcd); 594 struct hfi1_ibport *ibp = rcd_to_iport(rcd);
596 __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + 595 __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
597 dd->rhf_offset; 596 packet->rcd->rhf_offset;
598 struct rvt_qp *qp; 597 struct rvt_qp *qp;
599 struct ib_header *hdr; 598 struct ib_header *hdr;
600 struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; 599 struct rvt_dev_info *rdi = &rcd->dd->verbs_dev.rdi;
601 u64 rhf = rhf_to_cpu(rhf_addr); 600 u64 rhf = rhf_to_cpu(rhf_addr);
602 u32 etype = rhf_rcv_type(rhf), qpn, bth1; 601 u32 etype = rhf_rcv_type(rhf), qpn, bth1;
603 int is_ecn = 0; 602 int is_ecn = 0;
@@ -612,7 +611,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
612 if (etype != RHF_RCV_TYPE_IB) 611 if (etype != RHF_RCV_TYPE_IB)
613 goto next; 612 goto next;
614 613
615 packet->hdr = hfi1_get_msgheader(dd, rhf_addr); 614 packet->hdr = hfi1_get_msgheader(packet->rcd, rhf_addr);
616 hdr = packet->hdr; 615 hdr = packet->hdr;
617 lnh = ib_get_lnh(hdr); 616 lnh = ib_get_lnh(hdr);
618 617
@@ -718,7 +717,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
718 ret = check_max_packet(packet, thread); 717 ret = check_max_packet(packet, thread);
719 718
720 packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff + 719 packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
721 packet->rcd->dd->rhf_offset; 720 packet->rcd->rhf_offset;
722 packet->rhf = rhf_to_cpu(packet->rhf_addr); 721 packet->rhf = rhf_to_cpu(packet->rhf_addr);
723 722
724 return ret; 723 return ret;
@@ -757,7 +756,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
757 * crashing down. There is no need to eat another 756 * crashing down. There is no need to eat another
758 * comparison in this performance critical code. 757 * comparison in this performance critical code.
759 */ 758 */
760 packet->rcd->dd->rhf_rcv_function_map[packet->etype](packet); 759 packet->rcd->rhf_rcv_function_map[packet->etype](packet);
761 packet->numpkt++; 760 packet->numpkt++;
762 761
763 /* Set up for the next packet */ 762 /* Set up for the next packet */
@@ -768,7 +767,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
768 ret = check_max_packet(packet, thread); 767 ret = check_max_packet(packet, thread);
769 768
770 packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff + 769 packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
771 packet->rcd->dd->rhf_offset; 770 packet->rcd->rhf_offset;
772 packet->rhf = rhf_to_cpu(packet->rhf_addr); 771 packet->rhf = rhf_to_cpu(packet->rhf_addr);
773 772
774 return ret; 773 return ret;
@@ -949,12 +948,12 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
949 u8 sc = SC15_PACKET; 948 u8 sc = SC15_PACKET;
950 949
951 if (etype == RHF_RCV_TYPE_IB) { 950 if (etype == RHF_RCV_TYPE_IB) {
952 struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, 951 struct ib_header *hdr = hfi1_get_msgheader(packet->rcd,
953 packet->rhf_addr); 952 packet->rhf_addr);
954 sc = hfi1_9B_get_sc5(hdr, packet->rhf); 953 sc = hfi1_9B_get_sc5(hdr, packet->rhf);
955 } else if (etype == RHF_RCV_TYPE_BYPASS) { 954 } else if (etype == RHF_RCV_TYPE_BYPASS) {
956 struct hfi1_16b_header *hdr = hfi1_get_16B_header( 955 struct hfi1_16b_header *hdr = hfi1_get_16B_header(
957 packet->rcd->dd, 956 packet->rcd,
958 packet->rhf_addr); 957 packet->rhf_addr);
959 sc = hfi1_16B_get_sc(hdr); 958 sc = hfi1_16B_get_sc(hdr);
960 } 959 }
@@ -1034,7 +1033,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
1034 packet.rhqoff += packet.rsize; 1033 packet.rhqoff += packet.rsize;
1035 packet.rhf_addr = (__le32 *)rcd->rcvhdrq + 1034 packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
1036 packet.rhqoff + 1035 packet.rhqoff +
1037 dd->rhf_offset; 1036 rcd->rhf_offset;
1038 packet.rhf = rhf_to_cpu(packet.rhf_addr); 1037 packet.rhf = rhf_to_cpu(packet.rhf_addr);
1039 1038
1040 } else if (skip_pkt) { 1039 } else if (skip_pkt) {
@@ -1384,7 +1383,7 @@ bail:
1384static inline void hfi1_setup_ib_header(struct hfi1_packet *packet) 1383static inline void hfi1_setup_ib_header(struct hfi1_packet *packet)
1385{ 1384{
1386 packet->hdr = (struct hfi1_ib_message_header *) 1385 packet->hdr = (struct hfi1_ib_message_header *)
1387 hfi1_get_msgheader(packet->rcd->dd, 1386 hfi1_get_msgheader(packet->rcd,
1388 packet->rhf_addr); 1387 packet->rhf_addr);
1389 packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; 1388 packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
1390} 1389}
@@ -1485,7 +1484,7 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet)
1485 u8 l4; 1484 u8 l4;
1486 1485
1487 packet->hdr = (struct hfi1_16b_header *) 1486 packet->hdr = (struct hfi1_16b_header *)
1488 hfi1_get_16B_header(packet->rcd->dd, 1487 hfi1_get_16B_header(packet->rcd,
1489 packet->rhf_addr); 1488 packet->rhf_addr);
1490 l4 = hfi1_16B_get_l4(packet->hdr); 1489 l4 = hfi1_16B_get_l4(packet->hdr);
1491 if (l4 == OPA_16B_L4_IB_LOCAL) { 1490 if (l4 == OPA_16B_L4_IB_LOCAL) {
@@ -1575,7 +1574,7 @@ void handle_eflags(struct hfi1_packet *packet)
1575 * The following functions are called by the interrupt handler. They are type 1574 * The following functions are called by the interrupt handler. They are type
1576 * specific handlers for each packet type. 1575 * specific handlers for each packet type.
1577 */ 1576 */
1578int process_receive_ib(struct hfi1_packet *packet) 1577static int process_receive_ib(struct hfi1_packet *packet)
1579{ 1578{
1580 if (hfi1_setup_9B_packet(packet)) 1579 if (hfi1_setup_9B_packet(packet))
1581 return RHF_RCV_CONTINUE; 1580 return RHF_RCV_CONTINUE;
@@ -1607,7 +1606,7 @@ static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
1607 return false; 1606 return false;
1608} 1607}
1609 1608
1610int process_receive_bypass(struct hfi1_packet *packet) 1609static int process_receive_bypass(struct hfi1_packet *packet)
1611{ 1610{
1612 struct hfi1_devdata *dd = packet->rcd->dd; 1611 struct hfi1_devdata *dd = packet->rcd->dd;
1613 1612
@@ -1649,7 +1648,7 @@ int process_receive_bypass(struct hfi1_packet *packet)
1649 return RHF_RCV_CONTINUE; 1648 return RHF_RCV_CONTINUE;
1650} 1649}
1651 1650
1652int process_receive_error(struct hfi1_packet *packet) 1651static int process_receive_error(struct hfi1_packet *packet)
1653{ 1652{
1654 /* KHdrHCRCErr -- KDETH packet with a bad HCRC */ 1653 /* KHdrHCRCErr -- KDETH packet with a bad HCRC */
1655 if (unlikely( 1654 if (unlikely(
@@ -1668,7 +1667,7 @@ int process_receive_error(struct hfi1_packet *packet)
1668 return RHF_RCV_CONTINUE; 1667 return RHF_RCV_CONTINUE;
1669} 1668}
1670 1669
1671int kdeth_process_expected(struct hfi1_packet *packet) 1670static int kdeth_process_expected(struct hfi1_packet *packet)
1672{ 1671{
1673 hfi1_setup_9B_packet(packet); 1672 hfi1_setup_9B_packet(packet);
1674 if (unlikely(hfi1_dbg_should_fault_rx(packet))) 1673 if (unlikely(hfi1_dbg_should_fault_rx(packet)))
@@ -1682,7 +1681,7 @@ int kdeth_process_expected(struct hfi1_packet *packet)
1682 return RHF_RCV_CONTINUE; 1681 return RHF_RCV_CONTINUE;
1683} 1682}
1684 1683
1685int kdeth_process_eager(struct hfi1_packet *packet) 1684static int kdeth_process_eager(struct hfi1_packet *packet)
1686{ 1685{
1687 hfi1_setup_9B_packet(packet); 1686 hfi1_setup_9B_packet(packet);
1688 if (unlikely(hfi1_dbg_should_fault_rx(packet))) 1687 if (unlikely(hfi1_dbg_should_fault_rx(packet)))
@@ -1695,7 +1694,7 @@ int kdeth_process_eager(struct hfi1_packet *packet)
1695 return RHF_RCV_CONTINUE; 1694 return RHF_RCV_CONTINUE;
1696} 1695}
1697 1696
1698int process_receive_invalid(struct hfi1_packet *packet) 1697static int process_receive_invalid(struct hfi1_packet *packet)
1699{ 1698{
1700 dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n", 1699 dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n",
1701 rhf_rcv_type(packet->rhf)); 1700 rhf_rcv_type(packet->rhf));
@@ -1719,9 +1718,8 @@ void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd)
1719 init_ps_mdata(&mdata, &packet); 1718 init_ps_mdata(&mdata, &packet);
1720 1719
1721 while (1) { 1720 while (1) {
1722 struct hfi1_devdata *dd = rcd->dd;
1723 __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + 1721 __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
1724 dd->rhf_offset; 1722 rcd->rhf_offset;
1725 struct ib_header *hdr; 1723 struct ib_header *hdr;
1726 u64 rhf = rhf_to_cpu(rhf_addr); 1724 u64 rhf = rhf_to_cpu(rhf_addr);
1727 u32 etype = rhf_rcv_type(rhf), qpn; 1725 u32 etype = rhf_rcv_type(rhf), qpn;
@@ -1738,7 +1736,7 @@ void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd)
1738 if (etype > RHF_RCV_TYPE_IB) 1736 if (etype > RHF_RCV_TYPE_IB)
1739 goto next; 1737 goto next;
1740 1738
1741 packet.hdr = hfi1_get_msgheader(dd, rhf_addr); 1739 packet.hdr = hfi1_get_msgheader(rcd, rhf_addr);
1742 hdr = packet.hdr; 1740 hdr = packet.hdr;
1743 1741
1744 lnh = be16_to_cpu(hdr->lrh[0]) & 3; 1742 lnh = be16_to_cpu(hdr->lrh[0]) & 3;
@@ -1760,3 +1758,14 @@ next:
1760 update_ps_mdata(&mdata, rcd); 1758 update_ps_mdata(&mdata, rcd);
1761 } 1759 }
1762} 1760}
1761
1762const rhf_rcv_function_ptr normal_rhf_rcv_functions[] = {
1763 [RHF_RCV_TYPE_EXPECTED] = kdeth_process_expected,
1764 [RHF_RCV_TYPE_EAGER] = kdeth_process_eager,
1765 [RHF_RCV_TYPE_IB] = process_receive_ib,
1766 [RHF_RCV_TYPE_ERROR] = process_receive_error,
1767 [RHF_RCV_TYPE_BYPASS] = process_receive_bypass,
1768 [RHF_RCV_TYPE_INVALID5] = process_receive_invalid,
1769 [RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
1770 [RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
1771};
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 0fc4aa9455c3..1fc75647e47b 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -411,7 +411,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
411 mapio = 1; 411 mapio = 1;
412 break; 412 break;
413 case RCV_HDRQ: 413 case RCV_HDRQ:
414 memlen = uctxt->rcvhdrq_size; 414 memlen = rcvhdrq_size(uctxt);
415 memvirt = uctxt->rcvhdrq; 415 memvirt = uctxt->rcvhdrq;
416 break; 416 break;
417 case RCV_EGRBUF: { 417 case RCV_EGRBUF: {
@@ -521,7 +521,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
521 break; 521 break;
522 case SUBCTXT_RCV_HDRQ: 522 case SUBCTXT_RCV_HDRQ:
523 memaddr = (u64)uctxt->subctxt_rcvhdr_base; 523 memaddr = (u64)uctxt->subctxt_rcvhdr_base;
524 memlen = uctxt->rcvhdrq_size * uctxt->subctxt_cnt; 524 memlen = rcvhdrq_size(uctxt) * uctxt->subctxt_cnt;
525 flags |= VM_IO | VM_DONTEXPAND; 525 flags |= VM_IO | VM_DONTEXPAND;
526 vmf = 1; 526 vmf = 1;
527 break; 527 break;
@@ -985,7 +985,11 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
985 * sub contexts. 985 * sub contexts.
986 * This has to be done here so the rest of the sub-contexts find the 986 * This has to be done here so the rest of the sub-contexts find the
987 * proper base context. 987 * proper base context.
988 * NOTE: _set_bit() can be used here because the context creation is
989 * protected by the mutex (rather than the spin_lock), and will be the
990 * very first instance of this context.
988 */ 991 */
992 __set_bit(0, uctxt->in_use_ctxts);
989 if (uinfo->subctxt_cnt) 993 if (uinfo->subctxt_cnt)
990 init_subctxts(uctxt, uinfo); 994 init_subctxts(uctxt, uinfo);
991 uctxt->userversion = uinfo->userversion; 995 uctxt->userversion = uinfo->userversion;
@@ -1040,7 +1044,7 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
1040 return -ENOMEM; 1044 return -ENOMEM;
1041 1045
1042 /* We can take the size of the RcvHdr Queue from the master */ 1046 /* We can take the size of the RcvHdr Queue from the master */
1043 uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size * 1047 uctxt->subctxt_rcvhdr_base = vmalloc_user(rcvhdrq_size(uctxt) *
1044 num_subctxts); 1048 num_subctxts);
1045 if (!uctxt->subctxt_rcvhdr_base) { 1049 if (!uctxt->subctxt_rcvhdr_base) {
1046 ret = -ENOMEM; 1050 ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 4ab8b5bfbed1..d9470317983f 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -169,12 +169,6 @@ extern const struct pci_error_handlers hfi1_pci_err_handler;
169struct hfi1_opcode_stats_perctx; 169struct hfi1_opcode_stats_perctx;
170 170
171struct ctxt_eager_bufs { 171struct ctxt_eager_bufs {
172 ssize_t size; /* total size of eager buffers */
173 u32 count; /* size of buffers array */
174 u32 numbufs; /* number of buffers allocated */
175 u32 alloced; /* number of rcvarray entries used */
176 u32 rcvtid_size; /* size of each eager rcv tid */
177 u32 threshold; /* head update threshold */
178 struct eager_buffer { 172 struct eager_buffer {
179 void *addr; 173 void *addr;
180 dma_addr_t dma; 174 dma_addr_t dma;
@@ -184,6 +178,12 @@ struct ctxt_eager_bufs {
184 void *addr; 178 void *addr;
185 dma_addr_t dma; 179 dma_addr_t dma;
186 } *rcvtids; 180 } *rcvtids;
181 u32 size; /* total size of eager buffers */
182 u32 rcvtid_size; /* size of each eager rcv tid */
183 u16 count; /* size of buffers array */
184 u16 numbufs; /* number of buffers allocated */
185 u16 alloced; /* number of rcvarray entries used */
186 u16 threshold; /* head update threshold */
187}; 187};
188 188
189struct exp_tid_set { 189struct exp_tid_set {
@@ -191,43 +191,84 @@ struct exp_tid_set {
191 u32 count; 191 u32 count;
192}; 192};
193 193
194typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
194struct hfi1_ctxtdata { 195struct hfi1_ctxtdata {
195 /* shadow the ctxt's RcvCtrl register */
196 u64 rcvctrl;
197 /* rcvhdrq base, needs mmap before useful */ 196 /* rcvhdrq base, needs mmap before useful */
198 void *rcvhdrq; 197 void *rcvhdrq;
199 /* kernel virtual address where hdrqtail is updated */ 198 /* kernel virtual address where hdrqtail is updated */
200 volatile __le64 *rcvhdrtail_kvaddr; 199 volatile __le64 *rcvhdrtail_kvaddr;
201 /* when waiting for rcv or pioavail */ 200 /* so functions that need physical port can get it easily */
202 wait_queue_head_t wait; 201 struct hfi1_pportdata *ppd;
203 /* rcvhdrq size (for freeing) */ 202 /* so file ops can get at unit */
204 size_t rcvhdrq_size; 203 struct hfi1_devdata *dd;
204 /* this receive context's assigned PIO ACK send context */
205 struct send_context *sc;
206 /* per context recv functions */
207 const rhf_rcv_function_ptr *rhf_rcv_function_map;
208 /*
209 * The interrupt handler for a particular receive context can vary
210 * throughout it's lifetime. This is not a lock protected data member so
211 * it must be updated atomically and the prev and new value must always
212 * be valid. Worst case is we process an extra interrupt and up to 64
213 * packets with the wrong interrupt handler.
214 */
215 int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
216 /* verbs rx_stats per rcd */
217 struct hfi1_opcode_stats_perctx *opstats;
218 /* clear interrupt mask */
219 u64 imask;
220 /* ctxt rcvhdrq head offset */
221 u32 head;
205 /* number of rcvhdrq entries */ 222 /* number of rcvhdrq entries */
206 u16 rcvhdrq_cnt; 223 u16 rcvhdrq_cnt;
224 u8 ireg; /* clear interrupt register */
225 /* receive packet sequence counter */
226 u8 seq_cnt;
207 /* size of each of the rcvhdrq entries */ 227 /* size of each of the rcvhdrq entries */
208 u16 rcvhdrqentsize; 228 u8 rcvhdrqentsize;
229 /* offset of RHF within receive header entry */
230 u8 rhf_offset;
231 /* dynamic receive available interrupt timeout */
232 u8 rcvavail_timeout;
233 /* Indicates that this is vnic context */
234 bool is_vnic;
235 /* vnic queue index this context is mapped to */
236 u8 vnic_q_idx;
237 /* Is ASPM interrupt supported for this context */
238 bool aspm_intr_supported;
239 /* ASPM state (enabled/disabled) for this context */
240 bool aspm_enabled;
241 /* Is ASPM processing enabled for this context (in intr context) */
242 bool aspm_intr_enable;
243 struct ctxt_eager_bufs egrbufs;
244 /* QPs waiting for context processing */
245 struct list_head qp_wait_list;
246 /* tid allocation lists */
247 struct exp_tid_set tid_group_list;
248 struct exp_tid_set tid_used_list;
249 struct exp_tid_set tid_full_list;
250
251 /* Timer for re-enabling ASPM if interrupt activity quiets down */
252 struct timer_list aspm_timer;
253 /* per-context configuration flags */
254 unsigned long flags;
255 /* array of tid_groups */
256 struct tid_group *groups;
209 /* mmap of hdrq, must fit in 44 bits */ 257 /* mmap of hdrq, must fit in 44 bits */
210 dma_addr_t rcvhdrq_dma; 258 dma_addr_t rcvhdrq_dma;
211 dma_addr_t rcvhdrqtailaddr_dma; 259 dma_addr_t rcvhdrqtailaddr_dma;
212 struct ctxt_eager_bufs egrbufs; 260 /* Last interrupt timestamp */
213 /* this receive context's assigned PIO ACK send context */ 261 ktime_t aspm_ts_last_intr;
214 struct send_context *sc; 262 /* Last timestamp at which we scheduled a timer for this context */
215 263 ktime_t aspm_ts_timer_sched;
216 /* dynamic receive available interrupt timeout */ 264 /* Lock to serialize between intr, timer intr and user threads */
217 u32 rcvavail_timeout; 265 spinlock_t aspm_lock;
218 /* Reference count the base context usage */ 266 /* Reference count the base context usage */
219 struct kref kref; 267 struct kref kref;
220 268 /* numa node of this context */
221 /* Device context index */ 269 int numa_id;
222 u16 ctxt; 270 /* associated msix interrupt. */
223 /* 271 s16 msix_intr;
224 * non-zero if ctxt can be shared, and defines the maximum number of
225 * sub-contexts for this device context.
226 */
227 u16 subctxt_cnt;
228 /* non-zero if ctxt is being shared. */
229 u16 subctxt_id;
230 u8 uuid[16];
231 /* job key */ 272 /* job key */
232 u16 jkey; 273 u16 jkey;
233 /* number of RcvArray groups for this context. */ 274 /* number of RcvArray groups for this context. */
@@ -238,87 +279,59 @@ struct hfi1_ctxtdata {
238 u16 expected_count; 279 u16 expected_count;
239 /* index of first expected TID entry. */ 280 /* index of first expected TID entry. */
240 u16 expected_base; 281 u16 expected_base;
241 /* array of tid_groups */ 282 /* Device context index */
242 struct tid_group *groups; 283 u8 ctxt;
243
244 struct exp_tid_set tid_group_list;
245 struct exp_tid_set tid_used_list;
246 struct exp_tid_set tid_full_list;
247 284
248 /* lock protecting all Expected TID data of user contexts */ 285 /* PSM Specific fields */
286 /* lock protecting all Expected TID data */
249 struct mutex exp_mutex; 287 struct mutex exp_mutex;
250 /* per-context configuration flags */ 288 /* when waiting for rcv or pioavail */
251 unsigned long flags; 289 wait_queue_head_t wait;
252 /* per-context event flags for fileops/intr communication */ 290 /* uuid from PSM */
253 unsigned long event_flags; 291 u8 uuid[16];
254 /* total number of polled urgent packets */
255 u32 urgent;
256 /* saved total number of polled urgent packets for poll edge trigger */
257 u32 urgent_poll;
258 /* same size as task_struct .comm[], command that opened context */ 292 /* same size as task_struct .comm[], command that opened context */
259 char comm[TASK_COMM_LEN]; 293 char comm[TASK_COMM_LEN];
260 /* so file ops can get at unit */ 294 /* Bitmask of in use context(s) */
261 struct hfi1_devdata *dd; 295 DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS);
262 /* so functions that need physical port can get it easily */ 296 /* per-context event flags for fileops/intr communication */
263 struct hfi1_pportdata *ppd; 297 unsigned long event_flags;
264 /* associated msix interrupt */
265 u32 msix_intr;
266 /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */ 298 /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
267 void *subctxt_uregbase; 299 void *subctxt_uregbase;
268 /* An array of pages for the eager receive buffers * N */ 300 /* An array of pages for the eager receive buffers * N */
269 void *subctxt_rcvegrbuf; 301 void *subctxt_rcvegrbuf;
270 /* An array of pages for the eager header queue entries * N */ 302 /* An array of pages for the eager header queue entries * N */
271 void *subctxt_rcvhdr_base; 303 void *subctxt_rcvhdr_base;
272 /* Bitmask of in use context(s) */ 304 /* total number of polled urgent packets */
273 DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS); 305 u32 urgent;
274 /* The version of the library which opened this ctxt */ 306 /* saved total number of polled urgent packets for poll edge trigger */
275 u32 userversion; 307 u32 urgent_poll;
276 /* Type of packets or conditions we want to poll for */ 308 /* Type of packets or conditions we want to poll for */
277 u16 poll_type; 309 u16 poll_type;
278 /* receive packet sequence counter */ 310 /* non-zero if ctxt is being shared. */
279 u8 seq_cnt; 311 u16 subctxt_id;
280 /* ctxt rcvhdrq head offset */ 312 /* The version of the library which opened this ctxt */
281 u32 head; 313 u32 userversion;
282 /* QPs waiting for context processing */
283 struct list_head qp_wait_list;
284 /* interrupt handling */
285 u64 imask; /* clear interrupt mask */
286 int ireg; /* clear interrupt register */
287 int numa_id; /* numa node of this context */
288 /* verbs rx_stats per rcd */
289 struct hfi1_opcode_stats_perctx *opstats;
290
291 /* Is ASPM interrupt supported for this context */
292 bool aspm_intr_supported;
293 /* ASPM state (enabled/disabled) for this context */
294 bool aspm_enabled;
295 /* Timer for re-enabling ASPM if interrupt activity quietens down */
296 struct timer_list aspm_timer;
297 /* Lock to serialize between intr, timer intr and user threads */
298 spinlock_t aspm_lock;
299 /* Is ASPM processing enabled for this context (in intr context) */
300 bool aspm_intr_enable;
301 /* Last interrupt timestamp */
302 ktime_t aspm_ts_last_intr;
303 /* Last timestamp at which we scheduled a timer for this context */
304 ktime_t aspm_ts_timer_sched;
305
306 /* 314 /*
307 * The interrupt handler for a particular receive context can vary 315 * non-zero if ctxt can be shared, and defines the maximum number of
308 * throughout it's lifetime. This is not a lock protected data member so 316 * sub-contexts for this device context.
309 * it must be updated atomically and the prev and new value must always
310 * be valid. Worst case is we process an extra interrupt and up to 64
311 * packets with the wrong interrupt handler.
312 */ 317 */
313 int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded); 318 u8 subctxt_cnt;
314
315 /* Indicates that this is vnic context */
316 bool is_vnic;
317 319
318 /* vnic queue index this context is mapped to */
319 u8 vnic_q_idx;
320}; 320};
321 321
322/**
323 * rcvhdrq_size - return total size in bytes for header queue
324 * @rcd: the receive context
325 *
326 * rcvhdrqentsize is in DWs, so we have to convert to bytes
327 *
328 */
329static inline u32 rcvhdrq_size(struct hfi1_ctxtdata *rcd)
330{
331 return PAGE_ALIGN(rcd->rcvhdrq_cnt *
332 rcd->rcvhdrqentsize * sizeof(u32));
333}
334
322/* 335/*
323 * Represents a single packet at a high level. Put commonly computed things in 336 * Represents a single packet at a high level. Put commonly computed things in
324 * here so we do not have to keep doing them over and over. The rule of thumb is 337 * here so we do not have to keep doing them over and over. The rule of thumb is
@@ -897,12 +910,11 @@ struct hfi1_pportdata {
897 u64 vl_xmit_flit_cnt[C_VL_COUNT + 1]; 910 u64 vl_xmit_flit_cnt[C_VL_COUNT + 1];
898}; 911};
899 912
900typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
901
902typedef void (*opcode_handler)(struct hfi1_packet *packet); 913typedef void (*opcode_handler)(struct hfi1_packet *packet);
903typedef void (*hfi1_make_req)(struct rvt_qp *qp, 914typedef void (*hfi1_make_req)(struct rvt_qp *qp,
904 struct hfi1_pkt_state *ps, 915 struct hfi1_pkt_state *ps,
905 struct rvt_swqe *wqe); 916 struct rvt_swqe *wqe);
917extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[];
906 918
907 919
908/* return values for the RHF receive functions */ 920/* return values for the RHF receive functions */
@@ -1046,8 +1058,6 @@ struct hfi1_devdata {
1046 dma_addr_t sdma_pad_phys; 1058 dma_addr_t sdma_pad_phys;
1047 /* for deallocation */ 1059 /* for deallocation */
1048 size_t sdma_heads_size; 1060 size_t sdma_heads_size;
1049 /* number from the chip */
1050 u32 chip_sdma_engines;
1051 /* num used */ 1061 /* num used */
1052 u32 num_sdma; 1062 u32 num_sdma;
1053 /* array of engines sized by num_sdma */ 1063 /* array of engines sized by num_sdma */
@@ -1102,8 +1112,6 @@ struct hfi1_devdata {
1102 /* base receive interrupt timeout, in CSR units */ 1112 /* base receive interrupt timeout, in CSR units */
1103 u32 rcv_intr_timeout_csr; 1113 u32 rcv_intr_timeout_csr;
1104 1114
1105 u32 freezelen; /* max length of freezemsg */
1106 u64 __iomem *egrtidbase;
1107 spinlock_t sendctrl_lock; /* protect changes to SendCtrl */ 1115 spinlock_t sendctrl_lock; /* protect changes to SendCtrl */
1108 spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ 1116 spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
1109 spinlock_t uctxt_lock; /* protect rcd changes */ 1117 spinlock_t uctxt_lock; /* protect rcd changes */
@@ -1130,25 +1138,6 @@ struct hfi1_devdata {
1130 /* Base GUID for device (network order) */ 1138 /* Base GUID for device (network order) */
1131 u64 base_guid; 1139 u64 base_guid;
1132 1140
1133 /* these are the "32 bit" regs */
1134
1135 /* value we put in kr_rcvhdrsize */
1136 u32 rcvhdrsize;
1137 /* number of receive contexts the chip supports */
1138 u32 chip_rcv_contexts;
1139 /* number of receive array entries */
1140 u32 chip_rcv_array_count;
1141 /* number of PIO send contexts the chip supports */
1142 u32 chip_send_contexts;
1143 /* number of bytes in the PIO memory buffer */
1144 u32 chip_pio_mem_size;
1145 /* number of bytes in the SDMA memory buffer */
1146 u32 chip_sdma_mem_size;
1147
1148 /* size of each rcvegrbuffer */
1149 u32 rcvegrbufsize;
1150 /* log2 of above */
1151 u16 rcvegrbufsize_shift;
1152 /* both sides of the PCIe link are gen3 capable */ 1141 /* both sides of the PCIe link are gen3 capable */
1153 u8 link_gen3_capable; 1142 u8 link_gen3_capable;
1154 u8 dc_shutdown; 1143 u8 dc_shutdown;
@@ -1221,9 +1210,6 @@ struct hfi1_devdata {
1221 u32 num_msix_entries; 1210 u32 num_msix_entries;
1222 u32 first_dyn_msix_idx; 1211 u32 first_dyn_msix_idx;
1223 1212
1224 /* INTx information */
1225 u32 requested_intx_irq; /* did we request one? */
1226
1227 /* general interrupt: mask of handled interrupts */ 1213 /* general interrupt: mask of handled interrupts */
1228 u64 gi_mask[CCE_NUM_INT_CSRS]; 1214 u64 gi_mask[CCE_NUM_INT_CSRS];
1229 1215
@@ -1289,8 +1275,6 @@ struct hfi1_devdata {
1289 u64 sw_cce_err_status_aggregate; 1275 u64 sw_cce_err_status_aggregate;
1290 /* Software counter that aggregates all bypass packet rcv errors */ 1276 /* Software counter that aggregates all bypass packet rcv errors */
1291 u64 sw_rcv_bypass_packet_errors; 1277 u64 sw_rcv_bypass_packet_errors;
1292 /* receive interrupt function */
1293 rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
1294 1278
1295 /* Save the enabled LCB error bits */ 1279 /* Save the enabled LCB error bits */
1296 u64 lcb_err_en; 1280 u64 lcb_err_en;
@@ -1329,10 +1313,7 @@ struct hfi1_devdata {
1329 /* seqlock for sc2vl */ 1313 /* seqlock for sc2vl */
1330 seqlock_t sc2vl_lock ____cacheline_aligned_in_smp; 1314 seqlock_t sc2vl_lock ____cacheline_aligned_in_smp;
1331 u64 sc2vl[4]; 1315 u64 sc2vl[4];
1332 /* receive interrupt functions */
1333 rhf_rcv_function_ptr *rhf_rcv_function_map;
1334 u64 __percpu *rcv_limit; 1316 u64 __percpu *rcv_limit;
1335 u16 rhf_offset; /* offset of RHF within receive header entry */
1336 /* adding a new field here would make it part of this cacheline */ 1317 /* adding a new field here would make it part of this cacheline */
1337 1318
1338 /* OUI comes from the HW. Used everywhere as 3 separate bytes. */ 1319 /* OUI comes from the HW. Used everywhere as 3 separate bytes. */
@@ -1471,7 +1452,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp,
1471/* calculate the current RHF address */ 1452/* calculate the current RHF address */
1472static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd) 1453static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd)
1473{ 1454{
1474 return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->dd->rhf_offset; 1455 return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->rhf_offset;
1475} 1456}
1476 1457
1477int hfi1_reset_device(int); 1458int hfi1_reset_device(int);
@@ -2021,12 +2002,6 @@ static inline void flush_wc(void)
2021} 2002}
2022 2003
2023void handle_eflags(struct hfi1_packet *packet); 2004void handle_eflags(struct hfi1_packet *packet);
2024int process_receive_ib(struct hfi1_packet *packet);
2025int process_receive_bypass(struct hfi1_packet *packet);
2026int process_receive_error(struct hfi1_packet *packet);
2027int kdeth_process_expected(struct hfi1_packet *packet);
2028int kdeth_process_eager(struct hfi1_packet *packet);
2029int process_receive_invalid(struct hfi1_packet *packet);
2030void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd); 2005void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd);
2031 2006
2032/* global module parameter variables */ 2007/* global module parameter variables */
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index f110842b91f5..758d273c32cf 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -364,9 +364,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
364 hfi1_exp_tid_group_init(rcd); 364 hfi1_exp_tid_group_init(rcd);
365 rcd->ppd = ppd; 365 rcd->ppd = ppd;
366 rcd->dd = dd; 366 rcd->dd = dd;
367 __set_bit(0, rcd->in_use_ctxts);
368 rcd->numa_id = numa; 367 rcd->numa_id = numa;
369 rcd->rcv_array_groups = dd->rcv_entries.ngroups; 368 rcd->rcv_array_groups = dd->rcv_entries.ngroups;
369 rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
370 370
371 mutex_init(&rcd->exp_mutex); 371 mutex_init(&rcd->exp_mutex);
372 372
@@ -404,6 +404,8 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
404 404
405 rcd->rcvhdrq_cnt = rcvhdrcnt; 405 rcd->rcvhdrq_cnt = rcvhdrcnt;
406 rcd->rcvhdrqentsize = hfi1_hdrq_entsize; 406 rcd->rcvhdrqentsize = hfi1_hdrq_entsize;
407 rcd->rhf_offset =
408 rcd->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
407 /* 409 /*
408 * Simple Eager buffer allocation: we have already pre-allocated 410 * Simple Eager buffer allocation: we have already pre-allocated
409 * the number of RcvArray entry groups. Each ctxtdata structure 411 * the number of RcvArray entry groups. Each ctxtdata structure
@@ -853,24 +855,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
853 struct hfi1_ctxtdata *rcd; 855 struct hfi1_ctxtdata *rcd;
854 struct hfi1_pportdata *ppd; 856 struct hfi1_pportdata *ppd;
855 857
856 /* Set up recv low level handlers */
857 dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_EXPECTED] =
858 kdeth_process_expected;
859 dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_EAGER] =
860 kdeth_process_eager;
861 dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_IB] = process_receive_ib;
862 dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_ERROR] =
863 process_receive_error;
864 dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_BYPASS] =
865 process_receive_bypass;
866 dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID5] =
867 process_receive_invalid;
868 dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID6] =
869 process_receive_invalid;
870 dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID7] =
871 process_receive_invalid;
872 dd->rhf_rcv_function_map = dd->normal_rhf_rcv_functions;
873
874 /* Set up send low level handlers */ 858 /* Set up send low level handlers */
875 dd->process_pio_send = hfi1_verbs_send_pio; 859 dd->process_pio_send = hfi1_verbs_send_pio;
876 dd->process_dma_send = hfi1_verbs_send_dma; 860 dd->process_dma_send = hfi1_verbs_send_dma;
@@ -936,7 +920,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
936 } 920 }
937 921
938 /* Allocate enough memory for user event notification. */ 922 /* Allocate enough memory for user event notification. */
939 len = PAGE_ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS * 923 len = PAGE_ALIGN(chip_rcv_contexts(dd) * HFI1_MAX_SHARED_CTXTS *
940 sizeof(*dd->events)); 924 sizeof(*dd->events));
941 dd->events = vmalloc_user(len); 925 dd->events = vmalloc_user(len);
942 if (!dd->events) 926 if (!dd->events)
@@ -948,9 +932,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
948 dd->status = vmalloc_user(PAGE_SIZE); 932 dd->status = vmalloc_user(PAGE_SIZE);
949 if (!dd->status) 933 if (!dd->status)
950 dd_dev_err(dd, "Failed to allocate dev status page\n"); 934 dd_dev_err(dd, "Failed to allocate dev status page\n");
951 else
952 dd->freezelen = PAGE_SIZE - (sizeof(*dd->status) -
953 sizeof(dd->status->freezemsg));
954 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 935 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
955 ppd = dd->pport + pidx; 936 ppd = dd->pport + pidx;
956 if (dd->status) 937 if (dd->status)
@@ -1144,7 +1125,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
1144 return; 1125 return;
1145 1126
1146 if (rcd->rcvhdrq) { 1127 if (rcd->rcvhdrq) {
1147 dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size, 1128 dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd),
1148 rcd->rcvhdrq, rcd->rcvhdrq_dma); 1129 rcd->rcvhdrq, rcd->rcvhdrq_dma);
1149 rcd->rcvhdrq = NULL; 1130 rcd->rcvhdrq = NULL;
1150 if (rcd->rcvhdrtail_kvaddr) { 1131 if (rcd->rcvhdrtail_kvaddr) {
@@ -1855,12 +1836,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
1855 if (!rcd->rcvhdrq) { 1836 if (!rcd->rcvhdrq) {
1856 gfp_t gfp_flags; 1837 gfp_t gfp_flags;
1857 1838
1858 /* 1839 amt = rcvhdrq_size(rcd);
1859 * rcvhdrqentsize is in DWs, so we have to convert to bytes
1860 * (* sizeof(u32)).
1861 */
1862 amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize *
1863 sizeof(u32));
1864 1840
1865 if (rcd->ctxt < dd->first_dyn_alloc_ctxt || rcd->is_vnic) 1841 if (rcd->ctxt < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
1866 gfp_flags = GFP_KERNEL; 1842 gfp_flags = GFP_KERNEL;
@@ -1885,8 +1861,6 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
1885 if (!rcd->rcvhdrtail_kvaddr) 1861 if (!rcd->rcvhdrtail_kvaddr)
1886 goto bail_free; 1862 goto bail_free;
1887 } 1863 }
1888
1889 rcd->rcvhdrq_size = amt;
1890 } 1864 }
1891 /* 1865 /*
1892 * These values are per-context: 1866 * These values are per-context:
@@ -1902,7 +1876,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
1902 & RCV_HDR_ENT_SIZE_ENT_SIZE_MASK) 1876 & RCV_HDR_ENT_SIZE_ENT_SIZE_MASK)
1903 << RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT; 1877 << RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT;
1904 write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg); 1878 write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg);
1905 reg = (dd->rcvhdrsize & RCV_HDR_SIZE_HDR_SIZE_MASK) 1879 reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK)
1906 << RCV_HDR_SIZE_HDR_SIZE_SHIFT; 1880 << RCV_HDR_SIZE_HDR_SIZE_SHIFT;
1907 write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg); 1881 write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg);
1908 1882
@@ -1938,9 +1912,9 @@ bail:
1938int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) 1912int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
1939{ 1913{
1940 struct hfi1_devdata *dd = rcd->dd; 1914 struct hfi1_devdata *dd = rcd->dd;
1941 u32 max_entries, egrtop, alloced_bytes = 0, idx = 0; 1915 u32 max_entries, egrtop, alloced_bytes = 0;
1942 gfp_t gfp_flags; 1916 gfp_t gfp_flags;
1943 u16 order; 1917 u16 order, idx = 0;
1944 int ret = 0; 1918 int ret = 0;
1945 u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu); 1919 u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu);
1946 1920
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index baf7c324f7b8..eec83757d55f 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -157,6 +157,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
157 unsigned long len; 157 unsigned long len;
158 resource_size_t addr; 158 resource_size_t addr;
159 int ret = 0; 159 int ret = 0;
160 u32 rcv_array_count;
160 161
161 addr = pci_resource_start(pdev, 0); 162 addr = pci_resource_start(pdev, 0);
162 len = pci_resource_len(pdev, 0); 163 len = pci_resource_len(pdev, 0);
@@ -186,9 +187,9 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
186 goto nomem; 187 goto nomem;
187 } 188 }
188 189
189 dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT); 190 rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT);
190 dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count); 191 dd_dev_info(dd, "RcvArray count: %u\n", rcv_array_count);
191 dd->base2_start = RCV_ARRAY + dd->chip_rcv_array_count * 8; 192 dd->base2_start = RCV_ARRAY + rcv_array_count * 8;
192 193
193 dd->kregbase2 = ioremap_nocache( 194 dd->kregbase2 = ioremap_nocache(
194 addr + dd->base2_start, 195 addr + dd->base2_start,
@@ -214,13 +215,13 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
214 * to write an entire cacheline worth of entries in one shot. 215 * to write an entire cacheline worth of entries in one shot.
215 */ 216 */
216 dd->rcvarray_wc = ioremap_wc(addr + RCV_ARRAY, 217 dd->rcvarray_wc = ioremap_wc(addr + RCV_ARRAY,
217 dd->chip_rcv_array_count * 8); 218 rcv_array_count * 8);
218 if (!dd->rcvarray_wc) { 219 if (!dd->rcvarray_wc) {
219 dd_dev_err(dd, "WC mapping of receive array failed\n"); 220 dd_dev_err(dd, "WC mapping of receive array failed\n");
220 goto nomem; 221 goto nomem;
221 } 222 }
222 dd_dev_info(dd, "WC RcvArray: %p for %x\n", 223 dd_dev_info(dd, "WC RcvArray: %p for %x\n",
223 dd->rcvarray_wc, dd->chip_rcv_array_count * 8); 224 dd->rcvarray_wc, rcv_array_count * 8);
224 225
225 dd->flags |= HFI1_PRESENT; /* chip.c CSR routines now work */ 226 dd->flags |= HFI1_PRESENT; /* chip.c CSR routines now work */
226 return 0; 227 return 0;
@@ -346,15 +347,13 @@ int pcie_speeds(struct hfi1_devdata *dd)
346/* 347/*
347 * Returns: 348 * Returns:
348 * - actual number of interrupts allocated or 349 * - actual number of interrupts allocated or
349 * - 0 if fell back to INTx.
350 * - error 350 * - error
351 */ 351 */
352int request_msix(struct hfi1_devdata *dd, u32 msireq) 352int request_msix(struct hfi1_devdata *dd, u32 msireq)
353{ 353{
354 int nvec; 354 int nvec;
355 355
356 nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq, 356 nvec = pci_alloc_irq_vectors(dd->pcidev, msireq, msireq, PCI_IRQ_MSIX);
357 PCI_IRQ_MSIX | PCI_IRQ_LEGACY);
358 if (nvec < 0) { 357 if (nvec < 0) {
359 dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec); 358 dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec);
360 return nvec; 359 return nvec;
@@ -362,10 +361,6 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq)
362 361
363 tune_pcie_caps(dd); 362 tune_pcie_caps(dd);
364 363
365 /* check for legacy IRQ */
366 if (nvec == 1 && !dd->pcidev->msix_enabled)
367 return 0;
368
369 return nvec; 364 return nvec;
370} 365}
371 366
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 9cac15d10c4f..c2c1cba5b23b 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright(c) 2015-2017 Intel Corporation. 2 * Copyright(c) 2015-2018 Intel Corporation.
3 * 3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or 4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license. 5 * redistributing this file, you may do so under either license.
@@ -226,7 +226,7 @@ static const char *sc_type_name(int index)
226int init_sc_pools_and_sizes(struct hfi1_devdata *dd) 226int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
227{ 227{
228 struct mem_pool_info mem_pool_info[NUM_SC_POOLS] = { { 0 } }; 228 struct mem_pool_info mem_pool_info[NUM_SC_POOLS] = { { 0 } };
229 int total_blocks = (dd->chip_pio_mem_size / PIO_BLOCK_SIZE) - 1; 229 int total_blocks = (chip_pio_mem_size(dd) / PIO_BLOCK_SIZE) - 1;
230 int total_contexts = 0; 230 int total_contexts = 0;
231 int fixed_blocks; 231 int fixed_blocks;
232 int pool_blocks; 232 int pool_blocks;
@@ -343,8 +343,8 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
343 sc_type_name(i), count); 343 sc_type_name(i), count);
344 return -EINVAL; 344 return -EINVAL;
345 } 345 }
346 if (total_contexts + count > dd->chip_send_contexts) 346 if (total_contexts + count > chip_send_contexts(dd))
347 count = dd->chip_send_contexts - total_contexts; 347 count = chip_send_contexts(dd) - total_contexts;
348 348
349 total_contexts += count; 349 total_contexts += count;
350 350
@@ -507,7 +507,7 @@ static int sc_hw_alloc(struct hfi1_devdata *dd, int type, u32 *sw_index,
507 if (sci->type == type && sci->allocated == 0) { 507 if (sci->type == type && sci->allocated == 0) {
508 sci->allocated = 1; 508 sci->allocated = 1;
509 /* use a 1:1 mapping, but make them non-equal */ 509 /* use a 1:1 mapping, but make them non-equal */
510 context = dd->chip_send_contexts - index - 1; 510 context = chip_send_contexts(dd) - index - 1;
511 dd->hw_to_sw[context] = index; 511 dd->hw_to_sw[context] = index;
512 *sw_index = index; 512 *sw_index = index;
513 *hw_context = context; 513 *hw_context = context;
@@ -1618,11 +1618,11 @@ static void sc_piobufavail(struct send_context *sc)
1618 /* Wake up the most starved one first */ 1618 /* Wake up the most starved one first */
1619 if (n) 1619 if (n)
1620 hfi1_qp_wakeup(qps[max_idx], 1620 hfi1_qp_wakeup(qps[max_idx],
1621 RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN); 1621 RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
1622 for (i = 0; i < n; i++) 1622 for (i = 0; i < n; i++)
1623 if (i != max_idx) 1623 if (i != max_idx)
1624 hfi1_qp_wakeup(qps[i], 1624 hfi1_qp_wakeup(qps[i],
1625 RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN); 1625 RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
1626} 1626}
1627 1627
1628/* translate a send credit update to a bit code of reasons */ 1628/* translate a send credit update to a bit code of reasons */
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 1697d96151bd..9b1e84a6b1cc 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright(c) 2015 - 2017 Intel Corporation. 2 * Copyright(c) 2015 - 2018 Intel Corporation.
3 * 3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or 4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license. 5 * redistributing this file, you may do so under either license.
@@ -273,7 +273,7 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
273 if (attr_mask & IB_QP_PATH_MIG_STATE && 273 if (attr_mask & IB_QP_PATH_MIG_STATE &&
274 attr->path_mig_state == IB_MIG_MIGRATED && 274 attr->path_mig_state == IB_MIG_MIGRATED &&
275 qp->s_mig_state == IB_MIG_ARMED) { 275 qp->s_mig_state == IB_MIG_ARMED) {
276 qp->s_flags |= RVT_S_AHG_CLEAR; 276 qp->s_flags |= HFI1_S_AHG_CLEAR;
277 priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); 277 priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
278 priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); 278 priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
279 priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); 279 priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
@@ -717,7 +717,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp)
717 qp->remote_ah_attr = qp->alt_ah_attr; 717 qp->remote_ah_attr = qp->alt_ah_attr;
718 qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr); 718 qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
719 qp->s_pkey_index = qp->s_alt_pkey_index; 719 qp->s_pkey_index = qp->s_alt_pkey_index;
720 qp->s_flags |= RVT_S_AHG_CLEAR; 720 qp->s_flags |= HFI1_S_AHG_CLEAR;
721 priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr); 721 priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
722 priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); 722 priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
723 qp_set_16b(qp); 723 qp_set_16b(qp);
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index b2d4cba8d15b..078cff7560b6 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -1,7 +1,7 @@
1#ifndef _QP_H 1#ifndef _QP_H
2#define _QP_H 2#define _QP_H
3/* 3/*
4 * Copyright(c) 2015 - 2017 Intel Corporation. 4 * Copyright(c) 2015 - 2018 Intel Corporation.
5 * 5 *
6 * This file is provided under a dual BSD/GPLv2 license. When using or 6 * This file is provided under a dual BSD/GPLv2 license. When using or
7 * redistributing this file, you may do so under either license. 7 * redistributing this file, you may do so under either license.
@@ -70,6 +70,26 @@ static inline int hfi1_send_ok(struct rvt_qp *qp)
70} 70}
71 71
72/* 72/*
73 * Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK
74 *
75 * HFI1_S_AHG_VALID - ahg header valid on chip
76 * HFI1_S_AHG_CLEAR - have send engine clear ahg state
77 * HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain
78 * HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1
79 */
80#define HFI1_S_AHG_VALID 0x80000000
81#define HFI1_S_AHG_CLEAR 0x40000000
82#define HFI1_S_WAIT_PIO_DRAIN 0x20000000
83#define HFI1_S_MIN_BIT_MASK 0x01000000
84
85/*
86 * overload wait defines
87 */
88
89#define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN)
90#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
91
92/*
73 * free_ahg - clear ahg from QP 93 * free_ahg - clear ahg from QP
74 */ 94 */
75static inline void clear_ahg(struct rvt_qp *qp) 95static inline void clear_ahg(struct rvt_qp *qp)
@@ -77,7 +97,7 @@ static inline void clear_ahg(struct rvt_qp *qp)
77 struct hfi1_qp_priv *priv = qp->priv; 97 struct hfi1_qp_priv *priv = qp->priv;
78 98
79 priv->s_ahg->ahgcount = 0; 99 priv->s_ahg->ahgcount = 0;
80 qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR); 100 qp->s_flags &= ~(HFI1_S_AHG_VALID | HFI1_S_AHG_CLEAR);
81 if (priv->s_sde && qp->s_ahgidx >= 0) 101 if (priv->s_sde && qp->s_ahgidx >= 0)
82 sdma_ahg_free(priv->s_sde, qp->s_ahgidx); 102 sdma_ahg_free(priv->s_sde, qp->s_ahgidx);
83 qp->s_ahgidx = -1; 103 qp->s_ahgidx = -1;
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index f15c93102081..9bd63abb2dfe 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright(c) 2015, 2016 Intel Corporation. 2 * Copyright(c) 2015 - 2018 Intel Corporation.
3 * 3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or 4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license. 5 * redistributing this file, you may do so under either license.
@@ -241,7 +241,7 @@ bail:
241 smp_wmb(); 241 smp_wmb();
242 qp->s_flags &= ~(RVT_S_RESP_PENDING 242 qp->s_flags &= ~(RVT_S_RESP_PENDING
243 | RVT_S_ACK_PENDING 243 | RVT_S_ACK_PENDING
244 | RVT_S_AHG_VALID); 244 | HFI1_S_AHG_VALID);
245 return 0; 245 return 0;
246} 246}
247 247
@@ -1024,7 +1024,7 @@ done:
1024 if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) && 1024 if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
1025 (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) 1025 (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
1026 qp->s_flags |= RVT_S_WAIT_PSN; 1026 qp->s_flags |= RVT_S_WAIT_PSN;
1027 qp->s_flags &= ~RVT_S_AHG_VALID; 1027 qp->s_flags &= ~HFI1_S_AHG_VALID;
1028} 1028}
1029 1029
1030/* 1030/*
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index ef4c566e206f..5f56f3c1b4c4 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright(c) 2015 - 2017 Intel Corporation. 2 * Copyright(c) 2015 - 2018 Intel Corporation.
3 * 3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or 4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license. 5 * redistributing this file, you may do so under either license.
@@ -194,7 +194,7 @@ static void ruc_loopback(struct rvt_qp *sqp)
194 spin_lock_irqsave(&sqp->s_lock, flags); 194 spin_lock_irqsave(&sqp->s_lock, flags);
195 195
196 /* Return if we are already busy processing a work request. */ 196 /* Return if we are already busy processing a work request. */
197 if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) || 197 if ((sqp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT)) ||
198 !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND)) 198 !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
199 goto unlock; 199 goto unlock;
200 200
@@ -533,9 +533,9 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
533{ 533{
534 struct hfi1_qp_priv *priv = qp->priv; 534 struct hfi1_qp_priv *priv = qp->priv;
535 535
536 if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR)) 536 if (unlikely(qp->s_flags & HFI1_S_AHG_CLEAR))
537 clear_ahg(qp); 537 clear_ahg(qp);
538 if (!(qp->s_flags & RVT_S_AHG_VALID)) { 538 if (!(qp->s_flags & HFI1_S_AHG_VALID)) {
539 /* first middle that needs copy */ 539 /* first middle that needs copy */
540 if (qp->s_ahgidx < 0) 540 if (qp->s_ahgidx < 0)
541 qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde); 541 qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde);
@@ -544,7 +544,7 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
544 priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY; 544 priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
545 /* save to protect a change in another thread */ 545 /* save to protect a change in another thread */
546 priv->s_ahg->ahgidx = qp->s_ahgidx; 546 priv->s_ahg->ahgidx = qp->s_ahgidx;
547 qp->s_flags |= RVT_S_AHG_VALID; 547 qp->s_flags |= HFI1_S_AHG_VALID;
548 } 548 }
549 } else { 549 } else {
550 /* subsequent middle after valid */ 550 /* subsequent middle after valid */
@@ -650,7 +650,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
650 if (middle) 650 if (middle)
651 build_ahg(qp, bth2); 651 build_ahg(qp, bth2);
652 else 652 else
653 qp->s_flags &= ~RVT_S_AHG_VALID; 653 qp->s_flags &= ~HFI1_S_AHG_VALID;
654 654
655 bth0 |= pkey; 655 bth0 |= pkey;
656 bth0 |= extra_bytes << 20; 656 bth0 |= extra_bytes << 20;
@@ -727,7 +727,7 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
727 if (middle) 727 if (middle)
728 build_ahg(qp, bth2); 728 build_ahg(qp, bth2);
729 else 729 else
730 qp->s_flags &= ~RVT_S_AHG_VALID; 730 qp->s_flags &= ~HFI1_S_AHG_VALID;
731 731
732 bth0 |= pkey; 732 bth0 |= pkey;
733 bth0 |= extra_bytes << 20; 733 bth0 |= extra_bytes << 20;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 7fb350b87b49..88e326d6cc49 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1351,7 +1351,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
1351 struct hfi1_pportdata *ppd = dd->pport + port; 1351 struct hfi1_pportdata *ppd = dd->pport + port;
1352 u32 per_sdma_credits; 1352 u32 per_sdma_credits;
1353 uint idle_cnt = sdma_idle_cnt; 1353 uint idle_cnt = sdma_idle_cnt;
1354 size_t num_engines = dd->chip_sdma_engines; 1354 size_t num_engines = chip_sdma_engines(dd);
1355 int ret = -ENOMEM; 1355 int ret = -ENOMEM;
1356 1356
1357 if (!HFI1_CAP_IS_KSET(SDMA)) { 1357 if (!HFI1_CAP_IS_KSET(SDMA)) {
@@ -1360,18 +1360,18 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
1360 } 1360 }
1361 if (mod_num_sdma && 1361 if (mod_num_sdma &&
1362 /* can't exceed chip support */ 1362 /* can't exceed chip support */
1363 mod_num_sdma <= dd->chip_sdma_engines && 1363 mod_num_sdma <= chip_sdma_engines(dd) &&
1364 /* count must be >= vls */ 1364 /* count must be >= vls */
1365 mod_num_sdma >= num_vls) 1365 mod_num_sdma >= num_vls)
1366 num_engines = mod_num_sdma; 1366 num_engines = mod_num_sdma;
1367 1367
1368 dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma); 1368 dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma);
1369 dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", dd->chip_sdma_engines); 1369 dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", chip_sdma_engines(dd));
1370 dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n", 1370 dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n",
1371 dd->chip_sdma_mem_size); 1371 chip_sdma_mem_size(dd));
1372 1372
1373 per_sdma_credits = 1373 per_sdma_credits =
1374 dd->chip_sdma_mem_size / (num_engines * SDMA_BLOCK_SIZE); 1374 chip_sdma_mem_size(dd) / (num_engines * SDMA_BLOCK_SIZE);
1375 1375
1376 /* set up freeze waitqueue */ 1376 /* set up freeze waitqueue */
1377 init_waitqueue_head(&dd->sdma_unfreeze_wq); 1377 init_waitqueue_head(&dd->sdma_unfreeze_wq);
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 08991874c0e2..13374c727b14 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1007,7 +1007,7 @@ static int pio_wait(struct rvt_qp *qp,
1007 int was_empty; 1007 int was_empty;
1008 1008
1009 dev->n_piowait += !!(flag & RVT_S_WAIT_PIO); 1009 dev->n_piowait += !!(flag & RVT_S_WAIT_PIO);
1010 dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN); 1010 dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN);
1011 qp->s_flags |= flag; 1011 qp->s_flags |= flag;
1012 was_empty = list_empty(&sc->piowait); 1012 was_empty = list_empty(&sc->piowait);
1013 iowait_queue(ps->pkts_sent, &priv->s_iowait, 1013 iowait_queue(ps->pkts_sent, &priv->s_iowait,
@@ -1376,7 +1376,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
1376 return pio_wait(qp, 1376 return pio_wait(qp,
1377 ps->s_txreq->psc, 1377 ps->s_txreq->psc,
1378 ps, 1378 ps,
1379 RVT_S_WAIT_PIO_DRAIN); 1379 HFI1_S_WAIT_PIO_DRAIN);
1380 return sr(qp, ps, 0); 1380 return sr(qp, ps, 0);
1381} 1381}
1382 1382
@@ -1410,7 +1410,8 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
1410 rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX; 1410 rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX;
1411 rdi->dparms.props.max_qp = hfi1_max_qps; 1411 rdi->dparms.props.max_qp = hfi1_max_qps;
1412 rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs; 1412 rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
1413 rdi->dparms.props.max_sge = hfi1_max_sges; 1413 rdi->dparms.props.max_send_sge = hfi1_max_sges;
1414 rdi->dparms.props.max_recv_sge = hfi1_max_sges;
1414 rdi->dparms.props.max_sge_rd = hfi1_max_sges; 1415 rdi->dparms.props.max_sge_rd = hfi1_max_sges;
1415 rdi->dparms.props.max_cq = hfi1_max_cqs; 1416 rdi->dparms.props.max_cq = hfi1_max_cqs;
1416 rdi->dparms.props.max_ah = hfi1_max_ahs; 1417 rdi->dparms.props.max_ah = hfi1_max_ahs;
@@ -1497,15 +1498,6 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
1497 props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : 1498 props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
1498 mtu_to_enum(ppd->ibmtu, IB_MTU_4096); 1499 mtu_to_enum(ppd->ibmtu, IB_MTU_4096);
1499 1500
1500 /*
1501 * sm_lid of 0xFFFF needs special handling so that it can
1502 * be differentiated from a permissve LID of 0xFFFF.
1503 * We set the grh_required flag here so the SA can program
1504 * the DGID in the address handle appropriately
1505 */
1506 if (props->sm_lid == be16_to_cpu(IB_LID_PERMISSIVE))
1507 props->grh_required = true;
1508
1509 return 0; 1501 return 0;
1510} 1502}
1511 1503
@@ -1892,7 +1884,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
1892 ibdev->process_mad = hfi1_process_mad; 1884 ibdev->process_mad = hfi1_process_mad;
1893 ibdev->get_dev_fw_str = hfi1_get_dev_fw_str; 1885 ibdev->get_dev_fw_str = hfi1_get_dev_fw_str;
1894 1886
1895 strncpy(ibdev->node_desc, init_utsname()->nodename, 1887 strlcpy(ibdev->node_desc, init_utsname()->nodename,
1896 sizeof(ibdev->node_desc)); 1888 sizeof(ibdev->node_desc));
1897 1889
1898 /* 1890 /*
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index 616fc9b6fad8..c643d80c5a53 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright(c) 2017 Intel Corporation. 2 * Copyright(c) 2017 - 2018 Intel Corporation.
3 * 3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or 4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license. 5 * redistributing this file, you may do so under either license.
@@ -120,8 +120,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
120 uctxt->seq_cnt = 1; 120 uctxt->seq_cnt = 1;
121 uctxt->is_vnic = true; 121 uctxt->is_vnic = true;
122 122
123 if (dd->num_msix_entries) 123 hfi1_set_vnic_msix_info(uctxt);
124 hfi1_set_vnic_msix_info(uctxt);
125 124
126 hfi1_stats.sps_ctxts++; 125 hfi1_stats.sps_ctxts++;
127 dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); 126 dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
@@ -136,8 +135,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
136 dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); 135 dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
137 flush_wc(); 136 flush_wc();
138 137
139 if (dd->num_msix_entries) 138 hfi1_reset_vnic_msix_info(uctxt);
140 hfi1_reset_vnic_msix_info(uctxt);
141 139
142 /* 140 /*
143 * Disable receive context and interrupt available, reset all 141 * Disable receive context and interrupt available, reset all
@@ -818,14 +816,14 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
818 816
819 size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); 817 size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
820 netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, 818 netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
821 dd->chip_sdma_engines, dd->num_vnic_contexts); 819 chip_sdma_engines(dd), dd->num_vnic_contexts);
822 if (!netdev) 820 if (!netdev)
823 return ERR_PTR(-ENOMEM); 821 return ERR_PTR(-ENOMEM);
824 822
825 rn = netdev_priv(netdev); 823 rn = netdev_priv(netdev);
826 vinfo = opa_vnic_dev_priv(netdev); 824 vinfo = opa_vnic_dev_priv(netdev);
827 vinfo->dd = dd; 825 vinfo->dd = dd;
828 vinfo->num_tx_q = dd->chip_sdma_engines; 826 vinfo->num_tx_q = chip_sdma_engines(dd);
829 vinfo->num_rx_q = dd->num_vnic_contexts; 827 vinfo->num_rx_q = dd->num_vnic_contexts;
830 vinfo->netdev = netdev; 828 vinfo->netdev = netdev;
831 rn->free_rdma_netdev = hfi1_vnic_free_rn; 829 rn->free_rdma_netdev = hfi1_vnic_free_rn;
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index d74928621559..0d96c5bb38cd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -44,13 +44,11 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
44 struct ib_udata *udata) 44 struct ib_udata *udata)
45{ 45{
46 struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); 46 struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device);
47 const struct ib_gid_attr *gid_attr;
47 struct device *dev = hr_dev->dev; 48 struct device *dev = hr_dev->dev;
48 struct ib_gid_attr gid_attr;
49 struct hns_roce_ah *ah; 49 struct hns_roce_ah *ah;
50 u16 vlan_tag = 0xffff; 50 u16 vlan_tag = 0xffff;
51 const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); 51 const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
52 union ib_gid sgid;
53 int ret;
54 52
55 ah = kzalloc(sizeof(*ah), GFP_ATOMIC); 53 ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
56 if (!ah) 54 if (!ah)
@@ -59,18 +57,9 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
59 /* Get mac address */ 57 /* Get mac address */
60 memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); 58 memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
61 59
62 /* Get source gid */ 60 gid_attr = ah_attr->grh.sgid_attr;
63 ret = ib_get_cached_gid(ibpd->device, rdma_ah_get_port_num(ah_attr), 61 if (is_vlan_dev(gid_attr->ndev))
64 grh->sgid_index, &sgid, &gid_attr); 62 vlan_tag = vlan_dev_vlan_id(gid_attr->ndev);
65 if (ret) {
66 dev_err(dev, "get sgid failed! ret = %d\n", ret);
67 kfree(ah);
68 return ERR_PTR(ret);
69 }
70
71 if (is_vlan_dev(gid_attr.ndev))
72 vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
73 dev_put(gid_attr.ndev);
74 63
75 if (vlan_tag < 0x1000) 64 if (vlan_tag < 0x1000)
76 vlan_tag |= (rdma_ah_get_sl(ah_attr) & 65 vlan_tag |= (rdma_ah_get_sl(ah_attr) &
@@ -108,7 +97,7 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
108 rdma_ah_set_static_rate(ah_attr, ah->av.stat_rate); 97 rdma_ah_set_static_rate(ah_attr, ah->av.stat_rate);
109 rdma_ah_set_grh(ah_attr, NULL, 98 rdma_ah_set_grh(ah_attr, NULL,
110 (le32_to_cpu(ah->av.sl_tclass_flowlabel) & 99 (le32_to_cpu(ah->av.sl_tclass_flowlabel) &
111 HNS_ROCE_FLOW_LABLE_MASK), ah->av.gid_index, 100 HNS_ROCE_FLOW_LABEL_MASK), ah->av.gid_index,
112 ah->av.hop_limit, 101 ah->av.hop_limit,
113 (le32_to_cpu(ah->av.sl_tclass_flowlabel) >> 102 (le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
114 HNS_ROCE_TCLASS_SHIFT)); 103 HNS_ROCE_TCLASS_SHIFT));
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index 319cb74aebaf..93d4b4ec002d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -382,15 +382,6 @@
382#define ROCEE_VF_EQ_DB_CFG0_REG 0x238 382#define ROCEE_VF_EQ_DB_CFG0_REG 0x238
383#define ROCEE_VF_EQ_DB_CFG1_REG 0x23C 383#define ROCEE_VF_EQ_DB_CFG1_REG 0x23C
384 384
385#define ROCEE_VF_SMAC_CFG0_REG 0x12000
386#define ROCEE_VF_SMAC_CFG1_REG 0x12004
387
388#define ROCEE_VF_SGID_CFG0_REG 0x10000
389#define ROCEE_VF_SGID_CFG1_REG 0x10004
390#define ROCEE_VF_SGID_CFG2_REG 0x10008
391#define ROCEE_VF_SGID_CFG3_REG 0x1000c
392#define ROCEE_VF_SGID_CFG4_REG 0x10010
393
394#define ROCEE_VF_ABN_INT_CFG_REG 0x13000 385#define ROCEE_VF_ABN_INT_CFG_REG 0x13000
395#define ROCEE_VF_ABN_INT_ST_REG 0x13004 386#define ROCEE_VF_ABN_INT_ST_REG 0x13004
396#define ROCEE_VF_ABN_INT_EN_REG 0x13008 387#define ROCEE_VF_ABN_INT_EN_REG 0x13008
diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c
index ebee2782a573..e2f93c1ce86a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_db.c
+++ b/drivers/infiniband/hw/hns/hns_roce_db.c
@@ -41,6 +41,8 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
41found: 41found:
42 db->dma = sg_dma_address(page->umem->sg_head.sgl) + 42 db->dma = sg_dma_address(page->umem->sg_head.sgl) +
43 (virt & ~PAGE_MASK); 43 (virt & ~PAGE_MASK);
44 page->umem->sg_head.sgl->offset = virt & ~PAGE_MASK;
45 db->virt_addr = sg_virt(page->umem->sg_head.sgl);
44 db->u.user_page = page; 46 db->u.user_page = page;
45 refcount_inc(&page->refcount); 47 refcount_inc(&page->refcount);
46 48
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 31221d506d9a..9a24fd0ee3e7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -76,7 +76,7 @@
76/* 4G/4K = 1M */ 76/* 4G/4K = 1M */
77#define HNS_ROCE_SL_SHIFT 28 77#define HNS_ROCE_SL_SHIFT 28
78#define HNS_ROCE_TCLASS_SHIFT 20 78#define HNS_ROCE_TCLASS_SHIFT 20
79#define HNS_ROCE_FLOW_LABLE_MASK 0xfffff 79#define HNS_ROCE_FLOW_LABEL_MASK 0xfffff
80 80
81#define HNS_ROCE_MAX_PORTS 6 81#define HNS_ROCE_MAX_PORTS 6
82#define HNS_ROCE_MAX_GID_NUM 16 82#define HNS_ROCE_MAX_GID_NUM 16
@@ -110,6 +110,7 @@
110 110
111enum { 111enum {
112 HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0, 112 HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0,
113 HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1,
113}; 114};
114 115
115enum { 116enum {
@@ -190,7 +191,8 @@ enum {
190 HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0), 191 HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0),
191 HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1), 192 HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1),
192 HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2), 193 HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2),
193 HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3) 194 HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3),
195 HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4),
194}; 196};
195 197
196enum hns_roce_mtt_type { 198enum hns_roce_mtt_type {
@@ -385,6 +387,7 @@ struct hns_roce_db {
385 struct hns_roce_user_db_page *user_page; 387 struct hns_roce_user_db_page *user_page;
386 } u; 388 } u;
387 dma_addr_t dma; 389 dma_addr_t dma;
390 void *virt_addr;
388 int index; 391 int index;
389 int order; 392 int order;
390}; 393};
@@ -524,7 +527,9 @@ struct hns_roce_qp {
524 struct hns_roce_buf hr_buf; 527 struct hns_roce_buf hr_buf;
525 struct hns_roce_wq rq; 528 struct hns_roce_wq rq;
526 struct hns_roce_db rdb; 529 struct hns_roce_db rdb;
530 struct hns_roce_db sdb;
527 u8 rdb_en; 531 u8 rdb_en;
532 u8 sdb_en;
528 u32 doorbell_qpn; 533 u32 doorbell_qpn;
529 __le32 sq_signal_bits; 534 __le32 sq_signal_bits;
530 u32 sq_next_wqe; 535 u32 sq_next_wqe;
@@ -579,22 +584,22 @@ struct hns_roce_ceqe {
579}; 584};
580 585
581struct hns_roce_aeqe { 586struct hns_roce_aeqe {
582 u32 asyn; 587 __le32 asyn;
583 union { 588 union {
584 struct { 589 struct {
585 u32 qp; 590 __le32 qp;
586 u32 rsv0; 591 u32 rsv0;
587 u32 rsv1; 592 u32 rsv1;
588 } qp_event; 593 } qp_event;
589 594
590 struct { 595 struct {
591 u32 cq; 596 __le32 cq;
592 u32 rsv0; 597 u32 rsv0;
593 u32 rsv1; 598 u32 rsv1;
594 } cq_event; 599 } cq_event;
595 600
596 struct { 601 struct {
597 u32 ceqe; 602 __le32 ceqe;
598 u32 rsv0; 603 u32 rsv0;
599 u32 rsv1; 604 u32 rsv1;
600 } ce_event; 605 } ce_event;
@@ -641,6 +646,8 @@ struct hns_roce_eq {
641 int shift; 646 int shift;
642 dma_addr_t cur_eqe_ba; 647 dma_addr_t cur_eqe_ba;
643 dma_addr_t nxt_eqe_ba; 648 dma_addr_t nxt_eqe_ba;
649 int event_type;
650 int sub_type;
644}; 651};
645 652
646struct hns_roce_eq_table { 653struct hns_roce_eq_table {
@@ -720,10 +727,21 @@ struct hns_roce_caps {
720 u32 eqe_ba_pg_sz; 727 u32 eqe_ba_pg_sz;
721 u32 eqe_buf_pg_sz; 728 u32 eqe_buf_pg_sz;
722 u32 eqe_hop_num; 729 u32 eqe_hop_num;
730 u32 sl_num;
731 u32 tsq_buf_pg_sz;
732 u32 tpq_buf_pg_sz;
723 u32 chunk_sz; /* chunk size in non multihop mode*/ 733 u32 chunk_sz; /* chunk size in non multihop mode*/
724 u64 flags; 734 u64 flags;
725}; 735};
726 736
737struct hns_roce_work {
738 struct hns_roce_dev *hr_dev;
739 struct work_struct work;
740 u32 qpn;
741 int event_type;
742 int sub_type;
743};
744
727struct hns_roce_hw { 745struct hns_roce_hw {
728 int (*reset)(struct hns_roce_dev *hr_dev, bool enable); 746 int (*reset)(struct hns_roce_dev *hr_dev, bool enable);
729 int (*cmq_init)(struct hns_roce_dev *hr_dev); 747 int (*cmq_init)(struct hns_roce_dev *hr_dev);
@@ -736,7 +754,7 @@ struct hns_roce_hw {
736 u16 token, int event); 754 u16 token, int event);
737 int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout); 755 int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout);
738 int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index, 756 int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index,
739 union ib_gid *gid, const struct ib_gid_attr *attr); 757 const union ib_gid *gid, const struct ib_gid_attr *attr);
740 int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr); 758 int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr);
741 void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port, 759 void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port,
742 enum ib_mtu mtu); 760 enum ib_mtu mtu);
@@ -760,10 +778,10 @@ struct hns_roce_hw {
760 int attr_mask, enum ib_qp_state cur_state, 778 int attr_mask, enum ib_qp_state cur_state,
761 enum ib_qp_state new_state); 779 enum ib_qp_state new_state);
762 int (*destroy_qp)(struct ib_qp *ibqp); 780 int (*destroy_qp)(struct ib_qp *ibqp);
763 int (*post_send)(struct ib_qp *ibqp, struct ib_send_wr *wr, 781 int (*post_send)(struct ib_qp *ibqp, const struct ib_send_wr *wr,
764 struct ib_send_wr **bad_wr); 782 const struct ib_send_wr **bad_wr);
765 int (*post_recv)(struct ib_qp *qp, struct ib_recv_wr *recv_wr, 783 int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
766 struct ib_recv_wr **bad_recv_wr); 784 const struct ib_recv_wr **bad_recv_wr);
767 int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); 785 int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
768 int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); 786 int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
769 int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr); 787 int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr);
@@ -816,6 +834,7 @@ struct hns_roce_dev {
816 u32 tptr_size; /*only for hw v1*/ 834 u32 tptr_size; /*only for hw v1*/
817 const struct hns_roce_hw *hw; 835 const struct hns_roce_hw *hw;
818 void *priv; 836 void *priv;
837 struct workqueue_struct *irq_workq;
819}; 838};
820 839
821static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) 840static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
@@ -864,7 +883,7 @@ static inline struct hns_roce_sqp *hr_to_hr_sqp(struct hns_roce_qp *hr_qp)
864 return container_of(hr_qp, struct hns_roce_sqp, hr_qp); 883 return container_of(hr_qp, struct hns_roce_sqp, hr_qp);
865} 884}
866 885
867static inline void hns_roce_write64_k(__be32 val[2], void __iomem *dest) 886static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest)
868{ 887{
869 __raw_writeq(*(u64 *) val, dest); 888 __raw_writeq(*(u64 *) val, dest);
870} 889}
@@ -982,7 +1001,7 @@ void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
982void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); 1001void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
983void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, 1002void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
984 int cnt); 1003 int cnt);
985__be32 send_ieth(struct ib_send_wr *wr); 1004__be32 send_ieth(const struct ib_send_wr *wr);
986int to_hr_qp_type(int qp_type); 1005int to_hr_qp_type(int qp_type);
987 1006
988struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, 1007struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index 63b5b3edabcb..f6faefed96e8 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -170,7 +170,7 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
170 case 3: 170 case 3:
171 mhop->l2_idx = table_idx & (chunk_ba_num - 1); 171 mhop->l2_idx = table_idx & (chunk_ba_num - 1);
172 mhop->l1_idx = table_idx / chunk_ba_num & (chunk_ba_num - 1); 172 mhop->l1_idx = table_idx / chunk_ba_num & (chunk_ba_num - 1);
173 mhop->l0_idx = table_idx / chunk_ba_num / chunk_ba_num; 173 mhop->l0_idx = (table_idx / chunk_ba_num) / chunk_ba_num;
174 break; 174 break;
175 case 2: 175 case 2:
176 mhop->l1_idx = table_idx & (chunk_ba_num - 1); 176 mhop->l1_idx = table_idx & (chunk_ba_num - 1);
@@ -342,7 +342,7 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
342 } else { 342 } else {
343 break; 343 break;
344 } 344 }
345 msleep(HW_SYNC_SLEEP_TIME_INTERVAL); 345 mdelay(HW_SYNC_SLEEP_TIME_INTERVAL);
346 } 346 }
347 347
348 bt_cmd_l = (u32)bt_ba; 348 bt_cmd_l = (u32)bt_ba;
@@ -494,6 +494,9 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
494 step_idx = 1; 494 step_idx = 1;
495 } else if (hop_num == HNS_ROCE_HOP_NUM_0) { 495 } else if (hop_num == HNS_ROCE_HOP_NUM_0) {
496 step_idx = 0; 496 step_idx = 0;
497 } else {
498 ret = -EINVAL;
499 goto err_dma_alloc_l1;
497 } 500 }
498 501
499 /* set HEM base address to hardware */ 502 /* set HEM base address to hardware */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 8444234ed092..081aa91fc162 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -58,8 +58,9 @@ static void set_raddr_seg(struct hns_roce_wqe_raddr_seg *rseg, u64 remote_addr,
58 rseg->len = 0; 58 rseg->len = 0;
59} 59}
60 60
61static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 61static int hns_roce_v1_post_send(struct ib_qp *ibqp,
62 struct ib_send_wr **bad_wr) 62 const struct ib_send_wr *wr,
63 const struct ib_send_wr **bad_wr)
63{ 64{
64 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); 65 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
65 struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); 66 struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
@@ -173,12 +174,14 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
173 174
174 roce_set_field(ud_sq_wqe->u32_36, 175 roce_set_field(ud_sq_wqe->u32_36,
175 UD_SEND_WQE_U32_36_FLOW_LABEL_M, 176 UD_SEND_WQE_U32_36_FLOW_LABEL_M,
176 UD_SEND_WQE_U32_36_FLOW_LABEL_S, 0); 177 UD_SEND_WQE_U32_36_FLOW_LABEL_S,
178 ah->av.sl_tclass_flowlabel &
179 HNS_ROCE_FLOW_LABEL_MASK);
177 roce_set_field(ud_sq_wqe->u32_36, 180 roce_set_field(ud_sq_wqe->u32_36,
178 UD_SEND_WQE_U32_36_PRIORITY_M, 181 UD_SEND_WQE_U32_36_PRIORITY_M,
179 UD_SEND_WQE_U32_36_PRIORITY_S, 182 UD_SEND_WQE_U32_36_PRIORITY_S,
180 ah->av.sl_tclass_flowlabel >> 183 le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
181 HNS_ROCE_SL_SHIFT); 184 HNS_ROCE_SL_SHIFT);
182 roce_set_field(ud_sq_wqe->u32_36, 185 roce_set_field(ud_sq_wqe->u32_36,
183 UD_SEND_WQE_U32_36_SGID_INDEX_M, 186 UD_SEND_WQE_U32_36_SGID_INDEX_M,
184 UD_SEND_WQE_U32_36_SGID_INDEX_S, 187 UD_SEND_WQE_U32_36_SGID_INDEX_S,
@@ -191,7 +194,9 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
191 ah->av.hop_limit); 194 ah->av.hop_limit);
192 roce_set_field(ud_sq_wqe->u32_40, 195 roce_set_field(ud_sq_wqe->u32_40,
193 UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M, 196 UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M,
194 UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S, 0); 197 UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S,
198 ah->av.sl_tclass_flowlabel >>
199 HNS_ROCE_TCLASS_SHIFT);
195 200
196 memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN); 201 memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN);
197 202
@@ -333,7 +338,7 @@ out:
333 doorbell[0] = le32_to_cpu(sq_db.u32_4); 338 doorbell[0] = le32_to_cpu(sq_db.u32_4);
334 doorbell[1] = le32_to_cpu(sq_db.u32_8); 339 doorbell[1] = le32_to_cpu(sq_db.u32_8);
335 340
336 hns_roce_write64_k(doorbell, qp->sq.db_reg_l); 341 hns_roce_write64_k((__le32 *)doorbell, qp->sq.db_reg_l);
337 qp->sq_next_wqe = ind; 342 qp->sq_next_wqe = ind;
338 } 343 }
339 344
@@ -342,14 +347,15 @@ out:
342 return ret; 347 return ret;
343} 348}
344 349
345static int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 350static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
346 struct ib_recv_wr **bad_wr) 351 const struct ib_recv_wr *wr,
352 const struct ib_recv_wr **bad_wr)
347{ 353{
348 int ret = 0; 354 int ret = 0;
349 int nreq = 0; 355 int nreq = 0;
350 int ind = 0; 356 int ind = 0;
351 int i = 0; 357 int i = 0;
352 u32 reg_val = 0; 358 u32 reg_val;
353 unsigned long flags = 0; 359 unsigned long flags = 0;
354 struct hns_roce_rq_wqe_ctrl *ctrl = NULL; 360 struct hns_roce_rq_wqe_ctrl *ctrl = NULL;
355 struct hns_roce_wqe_data_seg *scat = NULL; 361 struct hns_roce_wqe_data_seg *scat = NULL;
@@ -402,14 +408,18 @@ out:
402 wmb(); 408 wmb();
403 409
404 if (ibqp->qp_type == IB_QPT_GSI) { 410 if (ibqp->qp_type == IB_QPT_GSI) {
411 __le32 tmp;
412
405 /* SW update GSI rq header */ 413 /* SW update GSI rq header */
406 reg_val = roce_read(to_hr_dev(ibqp->device), 414 reg_val = roce_read(to_hr_dev(ibqp->device),
407 ROCEE_QP1C_CFG3_0_REG + 415 ROCEE_QP1C_CFG3_0_REG +
408 QP1C_CFGN_OFFSET * hr_qp->phy_port); 416 QP1C_CFGN_OFFSET * hr_qp->phy_port);
409 roce_set_field(reg_val, 417 tmp = cpu_to_le32(reg_val);
418 roce_set_field(tmp,
410 ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M, 419 ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M,
411 ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S, 420 ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S,
412 hr_qp->rq.head); 421 hr_qp->rq.head);
422 reg_val = le32_to_cpu(tmp);
413 roce_write(to_hr_dev(ibqp->device), 423 roce_write(to_hr_dev(ibqp->device),
414 ROCEE_QP1C_CFG3_0_REG + 424 ROCEE_QP1C_CFG3_0_REG +
415 QP1C_CFGN_OFFSET * hr_qp->phy_port, reg_val); 425 QP1C_CFGN_OFFSET * hr_qp->phy_port, reg_val);
@@ -430,7 +440,8 @@ out:
430 doorbell[0] = le32_to_cpu(rq_db.u32_4); 440 doorbell[0] = le32_to_cpu(rq_db.u32_4);
431 doorbell[1] = le32_to_cpu(rq_db.u32_8); 441 doorbell[1] = le32_to_cpu(rq_db.u32_8);
432 442
433 hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l); 443 hns_roce_write64_k((__le32 *)doorbell,
444 hr_qp->rq.db_reg_l);
434 } 445 }
435 } 446 }
436 spin_unlock_irqrestore(&hr_qp->rq.lock, flags); 447 spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
@@ -441,51 +452,63 @@ out:
441static void hns_roce_set_db_event_mode(struct hns_roce_dev *hr_dev, 452static void hns_roce_set_db_event_mode(struct hns_roce_dev *hr_dev,
442 int sdb_mode, int odb_mode) 453 int sdb_mode, int odb_mode)
443{ 454{
455 __le32 tmp;
444 u32 val; 456 u32 val;
445 457
446 val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); 458 val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
447 roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode); 459 tmp = cpu_to_le32(val);
448 roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode); 460 roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode);
461 roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode);
462 val = le32_to_cpu(tmp);
449 roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); 463 roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
450} 464}
451 465
452static void hns_roce_set_db_ext_mode(struct hns_roce_dev *hr_dev, u32 sdb_mode, 466static void hns_roce_set_db_ext_mode(struct hns_roce_dev *hr_dev, u32 sdb_mode,
453 u32 odb_mode) 467 u32 odb_mode)
454{ 468{
469 __le32 tmp;
455 u32 val; 470 u32 val;
456 471
457 /* Configure SDB/ODB extend mode */ 472 /* Configure SDB/ODB extend mode */
458 val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); 473 val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
459 roce_set_bit(val, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode); 474 tmp = cpu_to_le32(val);
460 roce_set_bit(val, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode); 475 roce_set_bit(tmp, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode);
476 roce_set_bit(tmp, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode);
477 val = le32_to_cpu(tmp);
461 roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); 478 roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
462} 479}
463 480
464static void hns_roce_set_sdb(struct hns_roce_dev *hr_dev, u32 sdb_alept, 481static void hns_roce_set_sdb(struct hns_roce_dev *hr_dev, u32 sdb_alept,
465 u32 sdb_alful) 482 u32 sdb_alful)
466{ 483{
484 __le32 tmp;
467 u32 val; 485 u32 val;
468 486
469 /* Configure SDB */ 487 /* Configure SDB */
470 val = roce_read(hr_dev, ROCEE_DB_SQ_WL_REG); 488 val = roce_read(hr_dev, ROCEE_DB_SQ_WL_REG);
471 roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M, 489 tmp = cpu_to_le32(val);
490 roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M,
472 ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S, sdb_alful); 491 ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S, sdb_alful);
473 roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M, 492 roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M,
474 ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S, sdb_alept); 493 ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S, sdb_alept);
494 val = le32_to_cpu(tmp);
475 roce_write(hr_dev, ROCEE_DB_SQ_WL_REG, val); 495 roce_write(hr_dev, ROCEE_DB_SQ_WL_REG, val);
476} 496}
477 497
478static void hns_roce_set_odb(struct hns_roce_dev *hr_dev, u32 odb_alept, 498static void hns_roce_set_odb(struct hns_roce_dev *hr_dev, u32 odb_alept,
479 u32 odb_alful) 499 u32 odb_alful)
480{ 500{
501 __le32 tmp;
481 u32 val; 502 u32 val;
482 503
483 /* Configure ODB */ 504 /* Configure ODB */
484 val = roce_read(hr_dev, ROCEE_DB_OTHERS_WL_REG); 505 val = roce_read(hr_dev, ROCEE_DB_OTHERS_WL_REG);
485 roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M, 506 tmp = cpu_to_le32(val);
507 roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M,
486 ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S, odb_alful); 508 ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S, odb_alful);
487 roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M, 509 roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M,
488 ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S, odb_alept); 510 ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S, odb_alept);
511 val = le32_to_cpu(tmp);
489 roce_write(hr_dev, ROCEE_DB_OTHERS_WL_REG, val); 512 roce_write(hr_dev, ROCEE_DB_OTHERS_WL_REG, val);
490} 513}
491 514
@@ -496,6 +519,7 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept,
496 struct hns_roce_v1_priv *priv; 519 struct hns_roce_v1_priv *priv;
497 struct hns_roce_db_table *db; 520 struct hns_roce_db_table *db;
498 dma_addr_t sdb_dma_addr; 521 dma_addr_t sdb_dma_addr;
522 __le32 tmp;
499 u32 val; 523 u32 val;
500 524
501 priv = (struct hns_roce_v1_priv *)hr_dev->priv; 525 priv = (struct hns_roce_v1_priv *)hr_dev->priv;
@@ -511,7 +535,8 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept,
511 535
512 /* Configure extend SDB depth */ 536 /* Configure extend SDB depth */
513 val = roce_read(hr_dev, ROCEE_EXT_DB_SQ_H_REG); 537 val = roce_read(hr_dev, ROCEE_EXT_DB_SQ_H_REG);
514 roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M, 538 tmp = cpu_to_le32(val);
539 roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M,
515 ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S, 540 ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S,
516 db->ext_db->esdb_dep); 541 db->ext_db->esdb_dep);
517 /* 542 /*
@@ -519,8 +544,9 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept,
519 * using 4K page, and shift more 32 because of 544 * using 4K page, and shift more 32 because of
520 * caculating the high 32 bit value evaluated to hardware. 545 * caculating the high 32 bit value evaluated to hardware.
521 */ 546 */
522 roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M, 547 roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M,
523 ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S, sdb_dma_addr >> 44); 548 ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S, sdb_dma_addr >> 44);
549 val = le32_to_cpu(tmp);
524 roce_write(hr_dev, ROCEE_EXT_DB_SQ_H_REG, val); 550 roce_write(hr_dev, ROCEE_EXT_DB_SQ_H_REG, val);
525 551
526 dev_dbg(dev, "ext SDB depth: 0x%x\n", db->ext_db->esdb_dep); 552 dev_dbg(dev, "ext SDB depth: 0x%x\n", db->ext_db->esdb_dep);
@@ -535,6 +561,7 @@ static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept,
535 struct hns_roce_v1_priv *priv; 561 struct hns_roce_v1_priv *priv;
536 struct hns_roce_db_table *db; 562 struct hns_roce_db_table *db;
537 dma_addr_t odb_dma_addr; 563 dma_addr_t odb_dma_addr;
564 __le32 tmp;
538 u32 val; 565 u32 val;
539 566
540 priv = (struct hns_roce_v1_priv *)hr_dev->priv; 567 priv = (struct hns_roce_v1_priv *)hr_dev->priv;
@@ -550,12 +577,14 @@ static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept,
550 577
551 /* Configure extend ODB depth */ 578 /* Configure extend ODB depth */
552 val = roce_read(hr_dev, ROCEE_EXT_DB_OTH_H_REG); 579 val = roce_read(hr_dev, ROCEE_EXT_DB_OTH_H_REG);
553 roce_set_field(val, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M, 580 tmp = cpu_to_le32(val);
581 roce_set_field(tmp, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M,
554 ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S, 582 ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S,
555 db->ext_db->eodb_dep); 583 db->ext_db->eodb_dep);
556 roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M, 584 roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M,
557 ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S, 585 ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S,
558 db->ext_db->eodb_dep); 586 db->ext_db->eodb_dep);
587 val = le32_to_cpu(tmp);
559 roce_write(hr_dev, ROCEE_EXT_DB_OTH_H_REG, val); 588 roce_write(hr_dev, ROCEE_EXT_DB_OTH_H_REG, val);
560 589
561 dev_dbg(dev, "ext ODB depth: 0x%x\n", db->ext_db->eodb_dep); 590 dev_dbg(dev, "ext ODB depth: 0x%x\n", db->ext_db->eodb_dep);
@@ -762,6 +791,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
762 free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd); 791 free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd);
763 if (!free_mr->mr_free_qp[i]) { 792 if (!free_mr->mr_free_qp[i]) {
764 dev_err(dev, "Create loop qp failed!\n"); 793 dev_err(dev, "Create loop qp failed!\n");
794 ret = -ENOMEM;
765 goto create_lp_qp_failed; 795 goto create_lp_qp_failed;
766 } 796 }
767 hr_qp = free_mr->mr_free_qp[i]; 797 hr_qp = free_mr->mr_free_qp[i];
@@ -831,7 +861,7 @@ alloc_pd_failed:
831 if (hns_roce_ib_destroy_cq(cq)) 861 if (hns_roce_ib_destroy_cq(cq))
832 dev_err(dev, "Destroy cq for create_lp_qp failed!\n"); 862 dev_err(dev, "Destroy cq for create_lp_qp failed!\n");
833 863
834 return -EINVAL; 864 return ret;
835} 865}
836 866
837static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) 867static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev)
@@ -969,7 +999,8 @@ static int hns_roce_v1_send_lp_wqe(struct hns_roce_qp *hr_qp)
969{ 999{
970 struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); 1000 struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
971 struct device *dev = &hr_dev->pdev->dev; 1001 struct device *dev = &hr_dev->pdev->dev;
972 struct ib_send_wr send_wr, *bad_wr; 1002 struct ib_send_wr send_wr;
1003 const struct ib_send_wr *bad_wr;
973 int ret; 1004 int ret;
974 1005
975 memset(&send_wr, 0, sizeof(send_wr)); 1006 memset(&send_wr, 0, sizeof(send_wr));
@@ -1161,9 +1192,10 @@ static void hns_roce_db_free(struct hns_roce_dev *hr_dev)
1161static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) 1192static int hns_roce_raq_init(struct hns_roce_dev *hr_dev)
1162{ 1193{
1163 int ret; 1194 int ret;
1195 u32 val;
1196 __le32 tmp;
1164 int raq_shift = 0; 1197 int raq_shift = 0;
1165 dma_addr_t addr; 1198 dma_addr_t addr;
1166 u32 val;
1167 struct hns_roce_v1_priv *priv; 1199 struct hns_roce_v1_priv *priv;
1168 struct hns_roce_raq_table *raq; 1200 struct hns_roce_raq_table *raq;
1169 struct device *dev = &hr_dev->pdev->dev; 1201 struct device *dev = &hr_dev->pdev->dev;
@@ -1189,46 +1221,54 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev)
1189 /* Configure raq_shift */ 1221 /* Configure raq_shift */
1190 raq_shift = ilog2(HNS_ROCE_V1_RAQ_SIZE / HNS_ROCE_V1_RAQ_ENTRY); 1222 raq_shift = ilog2(HNS_ROCE_V1_RAQ_SIZE / HNS_ROCE_V1_RAQ_ENTRY);
1191 val = roce_read(hr_dev, ROCEE_EXT_RAQ_H_REG); 1223 val = roce_read(hr_dev, ROCEE_EXT_RAQ_H_REG);
1192 roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M, 1224 tmp = cpu_to_le32(val);
1225 roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M,
1193 ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S, raq_shift); 1226 ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S, raq_shift);
1194 /* 1227 /*
1195 * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of 1228 * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
1196 * using 4K page, and shift more 32 because of 1229 * using 4K page, and shift more 32 because of
1197 * caculating the high 32 bit value evaluated to hardware. 1230 * caculating the high 32 bit value evaluated to hardware.
1198 */ 1231 */
1199 roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M, 1232 roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M,
1200 ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S, 1233 ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S,
1201 raq->e_raq_buf->map >> 44); 1234 raq->e_raq_buf->map >> 44);
1235 val = le32_to_cpu(tmp);
1202 roce_write(hr_dev, ROCEE_EXT_RAQ_H_REG, val); 1236 roce_write(hr_dev, ROCEE_EXT_RAQ_H_REG, val);
1203 dev_dbg(dev, "Configure raq_shift 0x%x.\n", val); 1237 dev_dbg(dev, "Configure raq_shift 0x%x.\n", val);
1204 1238
1205 /* Configure raq threshold */ 1239 /* Configure raq threshold */
1206 val = roce_read(hr_dev, ROCEE_RAQ_WL_REG); 1240 val = roce_read(hr_dev, ROCEE_RAQ_WL_REG);
1207 roce_set_field(val, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M, 1241 tmp = cpu_to_le32(val);
1242 roce_set_field(tmp, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M,
1208 ROCEE_RAQ_WL_ROCEE_RAQ_WL_S, 1243 ROCEE_RAQ_WL_ROCEE_RAQ_WL_S,
1209 HNS_ROCE_V1_EXT_RAQ_WF); 1244 HNS_ROCE_V1_EXT_RAQ_WF);
1245 val = le32_to_cpu(tmp);
1210 roce_write(hr_dev, ROCEE_RAQ_WL_REG, val); 1246 roce_write(hr_dev, ROCEE_RAQ_WL_REG, val);
1211 dev_dbg(dev, "Configure raq_wl 0x%x.\n", val); 1247 dev_dbg(dev, "Configure raq_wl 0x%x.\n", val);
1212 1248
1213 /* Enable extend raq */ 1249 /* Enable extend raq */
1214 val = roce_read(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG); 1250 val = roce_read(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG);
1215 roce_set_field(val, 1251 tmp = cpu_to_le32(val);
1252 roce_set_field(tmp,
1216 ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M, 1253 ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M,
1217 ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S, 1254 ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S,
1218 POL_TIME_INTERVAL_VAL); 1255 POL_TIME_INTERVAL_VAL);
1219 roce_set_bit(val, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1); 1256 roce_set_bit(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1);
1220 roce_set_field(val, 1257 roce_set_field(tmp,
1221 ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M, 1258 ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M,
1222 ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S, 1259 ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S,
1223 2); 1260 2);
1224 roce_set_bit(val, 1261 roce_set_bit(tmp,
1225 ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S, 1); 1262 ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S, 1);
1263 val = le32_to_cpu(tmp);
1226 roce_write(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG, val); 1264 roce_write(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG, val);
1227 dev_dbg(dev, "Configure WrmsPolTimeInterval 0x%x.\n", val); 1265 dev_dbg(dev, "Configure WrmsPolTimeInterval 0x%x.\n", val);
1228 1266
1229 /* Enable raq drop */ 1267 /* Enable raq drop */
1230 val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); 1268 val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
1231 roce_set_bit(val, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1); 1269 tmp = cpu_to_le32(val);
1270 roce_set_bit(tmp, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1);
1271 val = le32_to_cpu(tmp);
1232 roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); 1272 roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
1233 dev_dbg(dev, "Configure GlbCfg = 0x%x.\n", val); 1273 dev_dbg(dev, "Configure GlbCfg = 0x%x.\n", val);
1234 1274
@@ -1255,20 +1295,25 @@ static void hns_roce_raq_free(struct hns_roce_dev *hr_dev)
1255 1295
1256static void hns_roce_port_enable(struct hns_roce_dev *hr_dev, int enable_flag) 1296static void hns_roce_port_enable(struct hns_roce_dev *hr_dev, int enable_flag)
1257{ 1297{
1298 __le32 tmp;
1258 u32 val; 1299 u32 val;
1259 1300
1260 if (enable_flag) { 1301 if (enable_flag) {
1261 val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); 1302 val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
1262 /* Open all ports */ 1303 /* Open all ports */
1263 roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, 1304 tmp = cpu_to_le32(val);
1305 roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M,
1264 ROCEE_GLB_CFG_ROCEE_PORT_ST_S, 1306 ROCEE_GLB_CFG_ROCEE_PORT_ST_S,
1265 ALL_PORT_VAL_OPEN); 1307 ALL_PORT_VAL_OPEN);
1308 val = le32_to_cpu(tmp);
1266 roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); 1309 roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
1267 } else { 1310 } else {
1268 val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); 1311 val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
1269 /* Close all ports */ 1312 /* Close all ports */
1270 roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, 1313 tmp = cpu_to_le32(val);
1314 roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M,
1271 ROCEE_GLB_CFG_ROCEE_PORT_ST_S, 0x0); 1315 ROCEE_GLB_CFG_ROCEE_PORT_ST_S, 0x0);
1316 val = le32_to_cpu(tmp);
1272 roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); 1317 roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
1273 } 1318 }
1274} 1319}
@@ -1498,13 +1543,11 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
1498 int i = 0; 1543 int i = 0;
1499 struct hns_roce_caps *caps = &hr_dev->caps; 1544 struct hns_roce_caps *caps = &hr_dev->caps;
1500 1545
1501 hr_dev->vendor_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_ID_REG)); 1546 hr_dev->vendor_id = roce_read(hr_dev, ROCEE_VENDOR_ID_REG);
1502 hr_dev->vendor_part_id = le32_to_cpu(roce_read(hr_dev, 1547 hr_dev->vendor_part_id = roce_read(hr_dev, ROCEE_VENDOR_PART_ID_REG);
1503 ROCEE_VENDOR_PART_ID_REG)); 1548 hr_dev->sys_image_guid = roce_read(hr_dev, ROCEE_SYS_IMAGE_GUID_L_REG) |
1504 hr_dev->sys_image_guid = le32_to_cpu(roce_read(hr_dev, 1549 ((u64)roce_read(hr_dev,
1505 ROCEE_SYS_IMAGE_GUID_L_REG)) | 1550 ROCEE_SYS_IMAGE_GUID_H_REG) << 32);
1506 ((u64)le32_to_cpu(roce_read(hr_dev,
1507 ROCEE_SYS_IMAGE_GUID_H_REG)) << 32);
1508 hr_dev->hw_rev = HNS_ROCE_HW_VER1; 1551 hr_dev->hw_rev = HNS_ROCE_HW_VER1;
1509 1552
1510 caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM; 1553 caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM;
@@ -1557,8 +1600,7 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
1557 1600
1558 caps->ceqe_depth = HNS_ROCE_V1_COMP_EQE_NUM; 1601 caps->ceqe_depth = HNS_ROCE_V1_COMP_EQE_NUM;
1559 caps->aeqe_depth = HNS_ROCE_V1_ASYNC_EQE_NUM; 1602 caps->aeqe_depth = HNS_ROCE_V1_ASYNC_EQE_NUM;
1560 caps->local_ca_ack_delay = le32_to_cpu(roce_read(hr_dev, 1603 caps->local_ca_ack_delay = roce_read(hr_dev, ROCEE_ACK_DELAY_REG);
1561 ROCEE_ACK_DELAY_REG));
1562 caps->max_mtu = IB_MTU_2048; 1604 caps->max_mtu = IB_MTU_2048;
1563 1605
1564 return 0; 1606 return 0;
@@ -1568,21 +1610,25 @@ static int hns_roce_v1_init(struct hns_roce_dev *hr_dev)
1568{ 1610{
1569 int ret; 1611 int ret;
1570 u32 val; 1612 u32 val;
1613 __le32 tmp;
1571 struct device *dev = &hr_dev->pdev->dev; 1614 struct device *dev = &hr_dev->pdev->dev;
1572 1615
1573 /* DMAE user config */ 1616 /* DMAE user config */
1574 val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG1_REG); 1617 val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG1_REG);
1575 roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M, 1618 tmp = cpu_to_le32(val);
1619 roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M,
1576 ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S, 0xf); 1620 ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S, 0xf);
1577 roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M, 1621 roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M,
1578 ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S, 1622 ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S,
1579 1 << PAGES_SHIFT_16); 1623 1 << PAGES_SHIFT_16);
1624 val = le32_to_cpu(tmp);
1580 roce_write(hr_dev, ROCEE_DMAE_USER_CFG1_REG, val); 1625 roce_write(hr_dev, ROCEE_DMAE_USER_CFG1_REG, val);
1581 1626
1582 val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG2_REG); 1627 val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG2_REG);
1583 roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M, 1628 tmp = cpu_to_le32(val);
1629 roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M,
1584 ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S, 0xf); 1630 ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S, 0xf);
1585 roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M, 1631 roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M,
1586 ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S, 1632 ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S,
1587 1 << PAGES_SHIFT_16); 1633 1 << PAGES_SHIFT_16);
1588 1634
@@ -1668,6 +1714,7 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
1668 u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base + ROCEE_MB1_REG); 1714 u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base + ROCEE_MB1_REG);
1669 unsigned long end; 1715 unsigned long end;
1670 u32 val = 0; 1716 u32 val = 0;
1717 __le32 tmp;
1671 1718
1672 end = msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS) + jiffies; 1719 end = msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS) + jiffies;
1673 while (hns_roce_v1_cmd_pending(hr_dev)) { 1720 while (hns_roce_v1_cmd_pending(hr_dev)) {
@@ -1679,15 +1726,17 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
1679 cond_resched(); 1726 cond_resched();
1680 } 1727 }
1681 1728
1682 roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S, 1729 tmp = cpu_to_le32(val);
1730 roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S,
1683 op); 1731 op);
1684 roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_MDF_M, 1732 roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_MDF_M,
1685 ROCEE_MB6_ROCEE_MB_CMD_MDF_S, op_modifier); 1733 ROCEE_MB6_ROCEE_MB_CMD_MDF_S, op_modifier);
1686 roce_set_bit(val, ROCEE_MB6_ROCEE_MB_EVENT_S, event); 1734 roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_EVENT_S, event);
1687 roce_set_bit(val, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1); 1735 roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1);
1688 roce_set_field(val, ROCEE_MB6_ROCEE_MB_TOKEN_M, 1736 roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_TOKEN_M,
1689 ROCEE_MB6_ROCEE_MB_TOKEN_S, token); 1737 ROCEE_MB6_ROCEE_MB_TOKEN_S, token);
1690 1738
1739 val = le32_to_cpu(tmp);
1691 writeq(in_param, hcr + 0); 1740 writeq(in_param, hcr + 0);
1692 writeq(out_param, hcr + 2); 1741 writeq(out_param, hcr + 2);
1693 writel(in_modifier, hcr + 4); 1742 writel(in_modifier, hcr + 4);
@@ -1717,7 +1766,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev,
1717 return -ETIMEDOUT; 1766 return -ETIMEDOUT;
1718 } 1767 }
1719 1768
1720 status = le32_to_cpu((__force __be32) 1769 status = le32_to_cpu((__force __le32)
1721 __raw_readl(hcr + HCR_STATUS_OFFSET)); 1770 __raw_readl(hcr + HCR_STATUS_OFFSET));
1722 if ((status & STATUS_MASK) != 0x1) { 1771 if ((status & STATUS_MASK) != 0x1) {
1723 dev_err(hr_dev->dev, "mailbox status 0x%x!\n", status); 1772 dev_err(hr_dev->dev, "mailbox status 0x%x!\n", status);
@@ -1728,7 +1777,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev,
1728} 1777}
1729 1778
1730static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, 1779static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port,
1731 int gid_index, union ib_gid *gid, 1780 int gid_index, const union ib_gid *gid,
1732 const struct ib_gid_attr *attr) 1781 const struct ib_gid_attr *attr)
1733{ 1782{
1734 u32 *p = NULL; 1783 u32 *p = NULL;
@@ -1760,6 +1809,7 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
1760{ 1809{
1761 u32 reg_smac_l; 1810 u32 reg_smac_l;
1762 u16 reg_smac_h; 1811 u16 reg_smac_h;
1812 __le32 tmp;
1763 u16 *p_h; 1813 u16 *p_h;
1764 u32 *p; 1814 u32 *p;
1765 u32 val; 1815 u32 val;
@@ -1784,10 +1834,12 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
1784 1834
1785 val = roce_read(hr_dev, 1835 val = roce_read(hr_dev,
1786 ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET); 1836 ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET);
1837 tmp = cpu_to_le32(val);
1787 p_h = (u16 *)(&addr[4]); 1838 p_h = (u16 *)(&addr[4]);
1788 reg_smac_h = *p_h; 1839 reg_smac_h = *p_h;
1789 roce_set_field(val, ROCEE_SMAC_H_ROCEE_SMAC_H_M, 1840 roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_SMAC_H_M,
1790 ROCEE_SMAC_H_ROCEE_SMAC_H_S, reg_smac_h); 1841 ROCEE_SMAC_H_ROCEE_SMAC_H_S, reg_smac_h);
1842 val = le32_to_cpu(tmp);
1791 roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET, 1843 roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET,
1792 val); 1844 val);
1793 1845
@@ -1797,12 +1849,15 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
1797static void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port, 1849static void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port,
1798 enum ib_mtu mtu) 1850 enum ib_mtu mtu)
1799{ 1851{
1852 __le32 tmp;
1800 u32 val; 1853 u32 val;
1801 1854
1802 val = roce_read(hr_dev, 1855 val = roce_read(hr_dev,
1803 ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET); 1856 ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET);
1804 roce_set_field(val, ROCEE_SMAC_H_ROCEE_PORT_MTU_M, 1857 tmp = cpu_to_le32(val);
1858 roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_PORT_MTU_M,
1805 ROCEE_SMAC_H_ROCEE_PORT_MTU_S, mtu); 1859 ROCEE_SMAC_H_ROCEE_PORT_MTU_S, mtu);
1860 val = le32_to_cpu(tmp);
1806 roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET, 1861 roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET,
1807 val); 1862 val);
1808} 1863}
@@ -1848,9 +1903,9 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
1848 roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_MW_BIND_COUNTER_M, 1903 roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_MW_BIND_COUNTER_M,
1849 MPT_BYTE_12_MW_BIND_COUNTER_S, 0); 1904 MPT_BYTE_12_MW_BIND_COUNTER_S, 0);
1850 1905
1851 mpt_entry->virt_addr_l = (u32)mr->iova; 1906 mpt_entry->virt_addr_l = cpu_to_le32((u32)mr->iova);
1852 mpt_entry->virt_addr_h = (u32)(mr->iova >> 32); 1907 mpt_entry->virt_addr_h = cpu_to_le32((u32)(mr->iova >> 32));
1853 mpt_entry->length = (u32)mr->size; 1908 mpt_entry->length = cpu_to_le32((u32)mr->size);
1854 1909
1855 roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_PD_M, 1910 roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_PD_M,
1856 MPT_BYTE_28_PD_S, mr->pd); 1911 MPT_BYTE_28_PD_S, mr->pd);
@@ -1885,64 +1940,59 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
1885 roce_set_field(mpt_entry->mpt_byte_36, 1940 roce_set_field(mpt_entry->mpt_byte_36,
1886 MPT_BYTE_36_PA0_H_M, 1941 MPT_BYTE_36_PA0_H_M,
1887 MPT_BYTE_36_PA0_H_S, 1942 MPT_BYTE_36_PA0_H_S,
1888 cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32))); 1943 (u32)(pages[i] >> PAGES_SHIFT_32));
1889 break; 1944 break;
1890 case 1: 1945 case 1:
1891 roce_set_field(mpt_entry->mpt_byte_36, 1946 roce_set_field(mpt_entry->mpt_byte_36,
1892 MPT_BYTE_36_PA1_L_M, 1947 MPT_BYTE_36_PA1_L_M,
1893 MPT_BYTE_36_PA1_L_S, 1948 MPT_BYTE_36_PA1_L_S, (u32)(pages[i]));
1894 cpu_to_le32((u32)(pages[i])));
1895 roce_set_field(mpt_entry->mpt_byte_40, 1949 roce_set_field(mpt_entry->mpt_byte_40,
1896 MPT_BYTE_40_PA1_H_M, 1950 MPT_BYTE_40_PA1_H_M,
1897 MPT_BYTE_40_PA1_H_S, 1951 MPT_BYTE_40_PA1_H_S,
1898 cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24))); 1952 (u32)(pages[i] >> PAGES_SHIFT_24));
1899 break; 1953 break;
1900 case 2: 1954 case 2:
1901 roce_set_field(mpt_entry->mpt_byte_40, 1955 roce_set_field(mpt_entry->mpt_byte_40,
1902 MPT_BYTE_40_PA2_L_M, 1956 MPT_BYTE_40_PA2_L_M,
1903 MPT_BYTE_40_PA2_L_S, 1957 MPT_BYTE_40_PA2_L_S, (u32)(pages[i]));
1904 cpu_to_le32((u32)(pages[i])));
1905 roce_set_field(mpt_entry->mpt_byte_44, 1958 roce_set_field(mpt_entry->mpt_byte_44,
1906 MPT_BYTE_44_PA2_H_M, 1959 MPT_BYTE_44_PA2_H_M,
1907 MPT_BYTE_44_PA2_H_S, 1960 MPT_BYTE_44_PA2_H_S,
1908 cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16))); 1961 (u32)(pages[i] >> PAGES_SHIFT_16));
1909 break; 1962 break;
1910 case 3: 1963 case 3:
1911 roce_set_field(mpt_entry->mpt_byte_44, 1964 roce_set_field(mpt_entry->mpt_byte_44,
1912 MPT_BYTE_44_PA3_L_M, 1965 MPT_BYTE_44_PA3_L_M,
1913 MPT_BYTE_44_PA3_L_S, 1966 MPT_BYTE_44_PA3_L_S, (u32)(pages[i]));
1914 cpu_to_le32((u32)(pages[i])));
1915 roce_set_field(mpt_entry->mpt_byte_48, 1967 roce_set_field(mpt_entry->mpt_byte_48,
1916 MPT_BYTE_48_PA3_H_M, 1968 MPT_BYTE_48_PA3_H_M,
1917 MPT_BYTE_48_PA3_H_S, 1969 MPT_BYTE_48_PA3_H_S,
1918 cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_8))); 1970 (u32)(pages[i] >> PAGES_SHIFT_8));
1919 break; 1971 break;
1920 case 4: 1972 case 4:
1921 mpt_entry->pa4_l = cpu_to_le32((u32)(pages[i])); 1973 mpt_entry->pa4_l = cpu_to_le32((u32)(pages[i]));
1922 roce_set_field(mpt_entry->mpt_byte_56, 1974 roce_set_field(mpt_entry->mpt_byte_56,
1923 MPT_BYTE_56_PA4_H_M, 1975 MPT_BYTE_56_PA4_H_M,
1924 MPT_BYTE_56_PA4_H_S, 1976 MPT_BYTE_56_PA4_H_S,
1925 cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32))); 1977 (u32)(pages[i] >> PAGES_SHIFT_32));
1926 break; 1978 break;
1927 case 5: 1979 case 5:
1928 roce_set_field(mpt_entry->mpt_byte_56, 1980 roce_set_field(mpt_entry->mpt_byte_56,
1929 MPT_BYTE_56_PA5_L_M, 1981 MPT_BYTE_56_PA5_L_M,
1930 MPT_BYTE_56_PA5_L_S, 1982 MPT_BYTE_56_PA5_L_S, (u32)(pages[i]));
1931 cpu_to_le32((u32)(pages[i])));
1932 roce_set_field(mpt_entry->mpt_byte_60, 1983 roce_set_field(mpt_entry->mpt_byte_60,
1933 MPT_BYTE_60_PA5_H_M, 1984 MPT_BYTE_60_PA5_H_M,
1934 MPT_BYTE_60_PA5_H_S, 1985 MPT_BYTE_60_PA5_H_S,
1935 cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24))); 1986 (u32)(pages[i] >> PAGES_SHIFT_24));
1936 break; 1987 break;
1937 case 6: 1988 case 6:
1938 roce_set_field(mpt_entry->mpt_byte_60, 1989 roce_set_field(mpt_entry->mpt_byte_60,
1939 MPT_BYTE_60_PA6_L_M, 1990 MPT_BYTE_60_PA6_L_M,
1940 MPT_BYTE_60_PA6_L_S, 1991 MPT_BYTE_60_PA6_L_S, (u32)(pages[i]));
1941 cpu_to_le32((u32)(pages[i])));
1942 roce_set_field(mpt_entry->mpt_byte_64, 1992 roce_set_field(mpt_entry->mpt_byte_64,
1943 MPT_BYTE_64_PA6_H_M, 1993 MPT_BYTE_64_PA6_H_M,
1944 MPT_BYTE_64_PA6_H_S, 1994 MPT_BYTE_64_PA6_H_S,
1945 cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16))); 1995 (u32)(pages[i] >> PAGES_SHIFT_16));
1946 break; 1996 break;
1947 default: 1997 default:
1948 break; 1998 break;
@@ -1951,7 +2001,7 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
1951 2001
1952 free_page((unsigned long) pages); 2002 free_page((unsigned long) pages);
1953 2003
1954 mpt_entry->pbl_addr_l = (u32)(mr->pbl_dma_addr); 2004 mpt_entry->pbl_addr_l = cpu_to_le32((u32)(mr->pbl_dma_addr));
1955 2005
1956 roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M, 2006 roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M,
1957 MPT_BYTE_12_PBL_ADDR_H_S, 2007 MPT_BYTE_12_PBL_ADDR_H_S,
@@ -1982,9 +2032,9 @@ static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq)
1982 2032
1983static void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) 2033static void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
1984{ 2034{
1985 u32 doorbell[2]; 2035 __le32 doorbell[2];
1986 2036
1987 doorbell[0] = cons_index & ((hr_cq->cq_depth << 1) - 1); 2037 doorbell[0] = cpu_to_le32(cons_index & ((hr_cq->cq_depth << 1) - 1));
1988 doorbell[1] = 0; 2038 doorbell[1] = 0;
1989 roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1); 2039 roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
1990 roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M, 2040 roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
@@ -2081,10 +2131,8 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
2081 CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S, CQ_STATE_VALID); 2131 CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S, CQ_STATE_VALID);
2082 roce_set_field(cq_context->cqc_byte_4, CQ_CONTEXT_CQC_BYTE_4_CQN_M, 2132 roce_set_field(cq_context->cqc_byte_4, CQ_CONTEXT_CQC_BYTE_4_CQN_M,
2083 CQ_CONTEXT_CQC_BYTE_4_CQN_S, hr_cq->cqn); 2133 CQ_CONTEXT_CQC_BYTE_4_CQN_S, hr_cq->cqn);
2084 cq_context->cqc_byte_4 = cpu_to_le32(cq_context->cqc_byte_4);
2085 2134
2086 cq_context->cq_bt_l = (u32)dma_handle; 2135 cq_context->cq_bt_l = cpu_to_le32((u32)dma_handle);
2087 cq_context->cq_bt_l = cpu_to_le32(cq_context->cq_bt_l);
2088 2136
2089 roce_set_field(cq_context->cqc_byte_12, 2137 roce_set_field(cq_context->cqc_byte_12,
2090 CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M, 2138 CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M,
@@ -2096,15 +2144,12 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
2096 ilog2((unsigned int)nent)); 2144 ilog2((unsigned int)nent));
2097 roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M, 2145 roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M,
2098 CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector); 2146 CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector);
2099 cq_context->cqc_byte_12 = cpu_to_le32(cq_context->cqc_byte_12);
2100 2147
2101 cq_context->cur_cqe_ba0_l = (u32)(mtts[0]); 2148 cq_context->cur_cqe_ba0_l = cpu_to_le32((u32)(mtts[0]));
2102 cq_context->cur_cqe_ba0_l = cpu_to_le32(cq_context->cur_cqe_ba0_l);
2103 2149
2104 roce_set_field(cq_context->cqc_byte_20, 2150 roce_set_field(cq_context->cqc_byte_20,
2105 CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M, 2151 CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M,
2106 CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S, 2152 CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S, (mtts[0]) >> 32);
2107 cpu_to_le32((mtts[0]) >> 32));
2108 /* Dedicated hardware, directly set 0 */ 2153 /* Dedicated hardware, directly set 0 */
2109 roce_set_field(cq_context->cqc_byte_20, 2154 roce_set_field(cq_context->cqc_byte_20,
2110 CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M, 2155 CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M,
@@ -2118,9 +2163,8 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
2118 CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M, 2163 CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M,
2119 CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S, 2164 CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S,
2120 tptr_dma_addr >> 44); 2165 tptr_dma_addr >> 44);
2121 cq_context->cqc_byte_20 = cpu_to_le32(cq_context->cqc_byte_20);
2122 2166
2123 cq_context->cqe_tptr_addr_l = (u32)(tptr_dma_addr >> 12); 2167 cq_context->cqe_tptr_addr_l = cpu_to_le32((u32)(tptr_dma_addr >> 12));
2124 2168
2125 roce_set_field(cq_context->cqc_byte_32, 2169 roce_set_field(cq_context->cqc_byte_32,
2126 CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M, 2170 CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M,
@@ -2138,7 +2182,6 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
2138 roce_set_field(cq_context->cqc_byte_32, 2182 roce_set_field(cq_context->cqc_byte_32,
2139 CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M, 2183 CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M,
2140 CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0); 2184 CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0);
2141 cq_context->cqc_byte_32 = cpu_to_le32(cq_context->cqc_byte_32);
2142} 2185}
2143 2186
2144static int hns_roce_v1_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) 2187static int hns_roce_v1_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
@@ -2151,7 +2194,7 @@ static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq,
2151{ 2194{
2152 struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); 2195 struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
2153 u32 notification_flag; 2196 u32 notification_flag;
2154 u32 doorbell[2]; 2197 __le32 doorbell[2];
2155 2198
2156 notification_flag = (flags & IB_CQ_SOLICITED_MASK) == 2199 notification_flag = (flags & IB_CQ_SOLICITED_MASK) ==
2157 IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL; 2200 IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL;
@@ -2159,7 +2202,8 @@ static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq,
2159 * flags = 0; Notification Flag = 1, next 2202 * flags = 0; Notification Flag = 1, next
2160 * flags = 1; Notification Flag = 0, solocited 2203 * flags = 1; Notification Flag = 0, solocited
2161 */ 2204 */
2162 doorbell[0] = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1); 2205 doorbell[0] =
2206 cpu_to_le32(hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1));
2163 roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1); 2207 roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
2164 roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M, 2208 roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
2165 ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3); 2209 ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3);
@@ -2416,7 +2460,7 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
2416 struct device *dev = &hr_dev->pdev->dev; 2460 struct device *dev = &hr_dev->pdev->dev;
2417 struct hns_roce_v1_priv *priv; 2461 struct hns_roce_v1_priv *priv;
2418 unsigned long end = 0, flags = 0; 2462 unsigned long end = 0, flags = 0;
2419 uint32_t bt_cmd_val[2] = {0}; 2463 __le32 bt_cmd_val[2] = {0};
2420 void __iomem *bt_cmd; 2464 void __iomem *bt_cmd;
2421 u64 bt_ba = 0; 2465 u64 bt_ba = 0;
2422 2466
@@ -2468,7 +2512,7 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
2468 msleep(HW_SYNC_SLEEP_TIME_INTERVAL); 2512 msleep(HW_SYNC_SLEEP_TIME_INTERVAL);
2469 } 2513 }
2470 2514
2471 bt_cmd_val[0] = (uint32_t)bt_ba; 2515 bt_cmd_val[0] = (__le32)bt_ba;
2472 roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, 2516 roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
2473 ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, bt_ba >> 32); 2517 ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, bt_ba >> 32);
2474 hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG); 2518 hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG);
@@ -2569,10 +2613,11 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
2569 struct hns_roce_sqp_context *context; 2613 struct hns_roce_sqp_context *context;
2570 struct device *dev = &hr_dev->pdev->dev; 2614 struct device *dev = &hr_dev->pdev->dev;
2571 dma_addr_t dma_handle = 0; 2615 dma_addr_t dma_handle = 0;
2616 u32 __iomem *addr;
2572 int rq_pa_start; 2617 int rq_pa_start;
2618 __le32 tmp;
2573 u32 reg_val; 2619 u32 reg_val;
2574 u64 *mtts; 2620 u64 *mtts;
2575 u32 __iomem *addr;
2576 2621
2577 context = kzalloc(sizeof(*context), GFP_KERNEL); 2622 context = kzalloc(sizeof(*context), GFP_KERNEL);
2578 if (!context) 2623 if (!context)
@@ -2598,7 +2643,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
2598 roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M, 2643 roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M,
2599 QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn); 2644 QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn);
2600 2645
2601 context->sq_rq_bt_l = (u32)(dma_handle); 2646 context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle));
2602 roce_set_field(context->qp1c_bytes_12, 2647 roce_set_field(context->qp1c_bytes_12,
2603 QP1C_BYTES_12_SQ_RQ_BT_H_M, 2648 QP1C_BYTES_12_SQ_RQ_BT_H_M,
2604 QP1C_BYTES_12_SQ_RQ_BT_H_S, 2649 QP1C_BYTES_12_SQ_RQ_BT_H_S,
@@ -2610,7 +2655,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
2610 QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port); 2655 QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port);
2611 roce_set_bit(context->qp1c_bytes_16, 2656 roce_set_bit(context->qp1c_bytes_16,
2612 QP1C_BYTES_16_SIGNALING_TYPE_S, 2657 QP1C_BYTES_16_SIGNALING_TYPE_S,
2613 hr_qp->sq_signal_bits); 2658 le32_to_cpu(hr_qp->sq_signal_bits));
2614 roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S, 2659 roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S,
2615 1); 2660 1);
2616 roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S, 2661 roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S,
@@ -2624,7 +2669,8 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
2624 QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index); 2669 QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index);
2625 2670
2626 rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; 2671 rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE;
2627 context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]); 2672 context->cur_rq_wqe_ba_l =
2673 cpu_to_le32((u32)(mtts[rq_pa_start]));
2628 2674
2629 roce_set_field(context->qp1c_bytes_28, 2675 roce_set_field(context->qp1c_bytes_28,
2630 QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M, 2676 QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M,
@@ -2643,7 +2689,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
2643 QP1C_BYTES_32_TX_CQ_NUM_S, 2689 QP1C_BYTES_32_TX_CQ_NUM_S,
2644 to_hr_cq(ibqp->send_cq)->cqn); 2690 to_hr_cq(ibqp->send_cq)->cqn);
2645 2691
2646 context->cur_sq_wqe_ba_l = (u32)mtts[0]; 2692 context->cur_sq_wqe_ba_l = cpu_to_le32((u32)mtts[0]);
2647 2693
2648 roce_set_field(context->qp1c_bytes_40, 2694 roce_set_field(context->qp1c_bytes_40,
2649 QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M, 2695 QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M,
@@ -2658,23 +2704,25 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
2658 ROCEE_QP1C_CFG0_0_REG + 2704 ROCEE_QP1C_CFG0_0_REG +
2659 hr_qp->phy_port * sizeof(*context)); 2705 hr_qp->phy_port * sizeof(*context));
2660 2706
2661 writel(context->qp1c_bytes_4, addr); 2707 writel(le32_to_cpu(context->qp1c_bytes_4), addr);
2662 writel(context->sq_rq_bt_l, addr + 1); 2708 writel(le32_to_cpu(context->sq_rq_bt_l), addr + 1);
2663 writel(context->qp1c_bytes_12, addr + 2); 2709 writel(le32_to_cpu(context->qp1c_bytes_12), addr + 2);
2664 writel(context->qp1c_bytes_16, addr + 3); 2710 writel(le32_to_cpu(context->qp1c_bytes_16), addr + 3);
2665 writel(context->qp1c_bytes_20, addr + 4); 2711 writel(le32_to_cpu(context->qp1c_bytes_20), addr + 4);
2666 writel(context->cur_rq_wqe_ba_l, addr + 5); 2712 writel(le32_to_cpu(context->cur_rq_wqe_ba_l), addr + 5);
2667 writel(context->qp1c_bytes_28, addr + 6); 2713 writel(le32_to_cpu(context->qp1c_bytes_28), addr + 6);
2668 writel(context->qp1c_bytes_32, addr + 7); 2714 writel(le32_to_cpu(context->qp1c_bytes_32), addr + 7);
2669 writel(context->cur_sq_wqe_ba_l, addr + 8); 2715 writel(le32_to_cpu(context->cur_sq_wqe_ba_l), addr + 8);
2670 writel(context->qp1c_bytes_40, addr + 9); 2716 writel(le32_to_cpu(context->qp1c_bytes_40), addr + 9);
2671 } 2717 }
2672 2718
2673 /* Modify QP1C status */ 2719 /* Modify QP1C status */
2674 reg_val = roce_read(hr_dev, ROCEE_QP1C_CFG0_0_REG + 2720 reg_val = roce_read(hr_dev, ROCEE_QP1C_CFG0_0_REG +
2675 hr_qp->phy_port * sizeof(*context)); 2721 hr_qp->phy_port * sizeof(*context));
2676 roce_set_field(reg_val, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M, 2722 tmp = cpu_to_le32(reg_val);
2723 roce_set_field(tmp, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M,
2677 ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S, new_state); 2724 ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S, new_state);
2725 reg_val = le32_to_cpu(tmp);
2678 roce_write(hr_dev, ROCEE_QP1C_CFG0_0_REG + 2726 roce_write(hr_dev, ROCEE_QP1C_CFG0_0_REG +
2679 hr_qp->phy_port * sizeof(*context), reg_val); 2727 hr_qp->phy_port * sizeof(*context), reg_val);
2680 2728
@@ -2712,7 +2760,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
2712 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); 2760 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
2713 dma_addr_t dma_handle_2 = 0; 2761 dma_addr_t dma_handle_2 = 0;
2714 dma_addr_t dma_handle = 0; 2762 dma_addr_t dma_handle = 0;
2715 uint32_t doorbell[2] = {0}; 2763 __le32 doorbell[2] = {0};
2716 int rq_pa_start = 0; 2764 int rq_pa_start = 0;
2717 u64 *mtts_2 = NULL; 2765 u64 *mtts_2 = NULL;
2718 int ret = -EINVAL; 2766 int ret = -EINVAL;
@@ -2887,7 +2935,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
2887 2935
2888 dmac = (u8 *)attr->ah_attr.roce.dmac; 2936 dmac = (u8 *)attr->ah_attr.roce.dmac;
2889 2937
2890 context->sq_rq_bt_l = (u32)(dma_handle); 2938 context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle));
2891 roce_set_field(context->qpc_bytes_24, 2939 roce_set_field(context->qpc_bytes_24,
2892 QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M, 2940 QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M,
2893 QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S, 2941 QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S,
@@ -2899,7 +2947,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
2899 QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M, 2947 QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M,
2900 QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S, 2948 QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S,
2901 attr->min_rnr_timer); 2949 attr->min_rnr_timer);
2902 context->irrl_ba_l = (u32)(dma_handle_2); 2950 context->irrl_ba_l = cpu_to_le32((u32)(dma_handle_2));
2903 roce_set_field(context->qpc_bytes_32, 2951 roce_set_field(context->qpc_bytes_32,
2904 QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M, 2952 QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M,
2905 QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S, 2953 QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S,
@@ -2913,7 +2961,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
2913 1); 2961 1);
2914 roce_set_bit(context->qpc_bytes_32, 2962 roce_set_bit(context->qpc_bytes_32,
2915 QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S, 2963 QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S,
2916 hr_qp->sq_signal_bits); 2964 le32_to_cpu(hr_qp->sq_signal_bits));
2917 2965
2918 port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : 2966 port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) :
2919 hr_qp->port; 2967 hr_qp->port;
@@ -2991,7 +3039,8 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
2991 QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0); 3039 QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0);
2992 3040
2993 rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; 3041 rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE;
2994 context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]); 3042 context->cur_rq_wqe_ba_l =
3043 cpu_to_le32((u32)(mtts[rq_pa_start]));
2995 3044
2996 roce_set_field(context->qpc_bytes_76, 3045 roce_set_field(context->qpc_bytes_76,
2997 QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M, 3046 QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M,
@@ -3071,7 +3120,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
3071 goto out; 3120 goto out;
3072 } 3121 }
3073 3122
3074 context->rx_cur_sq_wqe_ba_l = (u32)(mtts[0]); 3123 context->rx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0]));
3075 3124
3076 roce_set_field(context->qpc_bytes_120, 3125 roce_set_field(context->qpc_bytes_120,
3077 QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M, 3126 QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M,
@@ -3219,7 +3268,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
3219 QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M, 3268 QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M,
3220 QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0); 3269 QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0);
3221 3270
3222 context->tx_cur_sq_wqe_ba_l = (u32)(mtts[0]); 3271 context->tx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0]));
3223 3272
3224 roce_set_field(context->qpc_bytes_188, 3273 roce_set_field(context->qpc_bytes_188,
3225 QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M, 3274 QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M,
@@ -3386,16 +3435,16 @@ static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
3386 3435
3387 addr = ROCEE_QP1C_CFG0_0_REG + 3436 addr = ROCEE_QP1C_CFG0_0_REG +
3388 hr_qp->port * sizeof(struct hns_roce_sqp_context); 3437 hr_qp->port * sizeof(struct hns_roce_sqp_context);
3389 context.qp1c_bytes_4 = roce_read(hr_dev, addr); 3438 context.qp1c_bytes_4 = cpu_to_le32(roce_read(hr_dev, addr));
3390 context.sq_rq_bt_l = roce_read(hr_dev, addr + 1); 3439 context.sq_rq_bt_l = cpu_to_le32(roce_read(hr_dev, addr + 1));
3391 context.qp1c_bytes_12 = roce_read(hr_dev, addr + 2); 3440 context.qp1c_bytes_12 = cpu_to_le32(roce_read(hr_dev, addr + 2));
3392 context.qp1c_bytes_16 = roce_read(hr_dev, addr + 3); 3441 context.qp1c_bytes_16 = cpu_to_le32(roce_read(hr_dev, addr + 3));
3393 context.qp1c_bytes_20 = roce_read(hr_dev, addr + 4); 3442 context.qp1c_bytes_20 = cpu_to_le32(roce_read(hr_dev, addr + 4));
3394 context.cur_rq_wqe_ba_l = roce_read(hr_dev, addr + 5); 3443 context.cur_rq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 5));
3395 context.qp1c_bytes_28 = roce_read(hr_dev, addr + 6); 3444 context.qp1c_bytes_28 = cpu_to_le32(roce_read(hr_dev, addr + 6));
3396 context.qp1c_bytes_32 = roce_read(hr_dev, addr + 7); 3445 context.qp1c_bytes_32 = cpu_to_le32(roce_read(hr_dev, addr + 7));
3397 context.cur_sq_wqe_ba_l = roce_read(hr_dev, addr + 8); 3446 context.cur_sq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 8));
3398 context.qp1c_bytes_40 = roce_read(hr_dev, addr + 9); 3447 context.qp1c_bytes_40 = cpu_to_le32(roce_read(hr_dev, addr + 9));
3399 3448
3400 hr_qp->state = roce_get_field(context.qp1c_bytes_4, 3449 hr_qp->state = roce_get_field(context.qp1c_bytes_4,
3401 QP1C_BYTES_4_QP_STATE_M, 3450 QP1C_BYTES_4_QP_STATE_M,
@@ -3557,7 +3606,7 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
3557 qp_attr->retry_cnt = roce_get_field(context->qpc_bytes_148, 3606 qp_attr->retry_cnt = roce_get_field(context->qpc_bytes_148,
3558 QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M, 3607 QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M,
3559 QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S); 3608 QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S);
3560 qp_attr->rnr_retry = context->rnr_retry; 3609 qp_attr->rnr_retry = (u8)context->rnr_retry;
3561 3610
3562done: 3611done:
3563 qp_attr->cur_qp_state = qp_attr->qp_state; 3612 qp_attr->cur_qp_state = qp_attr->qp_state;
@@ -3595,42 +3644,47 @@ static void hns_roce_check_sdb_status(struct hns_roce_dev *hr_dev,
3595 u32 *old_send, u32 *old_retry, 3644 u32 *old_send, u32 *old_retry,
3596 u32 *tsp_st, u32 *success_flags) 3645 u32 *tsp_st, u32 *success_flags)
3597{ 3646{
3647 __le32 *old_send_tmp, *old_retry_tmp;
3598 u32 sdb_retry_cnt; 3648 u32 sdb_retry_cnt;
3599 u32 sdb_send_ptr; 3649 u32 sdb_send_ptr;
3600 u32 cur_cnt, old_cnt; 3650 u32 cur_cnt, old_cnt;
3651 __le32 tmp, tmp1;
3601 u32 send_ptr; 3652 u32 send_ptr;
3602 3653
3603 sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); 3654 sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG);
3604 sdb_retry_cnt = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG); 3655 sdb_retry_cnt = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG);
3605 cur_cnt = roce_get_field(sdb_send_ptr, 3656 tmp = cpu_to_le32(sdb_send_ptr);
3606 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, 3657 tmp1 = cpu_to_le32(sdb_retry_cnt);
3658 cur_cnt = roce_get_field(tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
3607 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + 3659 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
3608 roce_get_field(sdb_retry_cnt, 3660 roce_get_field(tmp1, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
3609 ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
3610 ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); 3661 ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
3662
3663 old_send_tmp = (__le32 *)old_send;
3664 old_retry_tmp = (__le32 *)old_retry;
3611 if (!roce_get_bit(*tsp_st, ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) { 3665 if (!roce_get_bit(*tsp_st, ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) {
3612 old_cnt = roce_get_field(*old_send, 3666 old_cnt = roce_get_field(*old_send_tmp,
3613 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, 3667 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
3614 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + 3668 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
3615 roce_get_field(*old_retry, 3669 roce_get_field(*old_retry_tmp,
3616 ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, 3670 ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
3617 ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); 3671 ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
3618 if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) 3672 if (cur_cnt - old_cnt > SDB_ST_CMP_VAL)
3619 *success_flags = 1; 3673 *success_flags = 1;
3620 } else { 3674 } else {
3621 old_cnt = roce_get_field(*old_send, 3675 old_cnt = roce_get_field(*old_send_tmp,
3622 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, 3676 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
3623 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S); 3677 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S);
3624 if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) { 3678 if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) {
3625 *success_flags = 1; 3679 *success_flags = 1;
3626 } else { 3680 } else {
3627 send_ptr = roce_get_field(*old_send, 3681 send_ptr = roce_get_field(*old_send_tmp,
3628 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, 3682 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
3629 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + 3683 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
3630 roce_get_field(sdb_retry_cnt, 3684 roce_get_field(tmp1,
3631 ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, 3685 ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
3632 ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); 3686 ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
3633 roce_set_field(*old_send, 3687 roce_set_field(*old_send_tmp,
3634 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, 3688 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
3635 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S, 3689 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S,
3636 send_ptr); 3690 send_ptr);
@@ -3646,11 +3700,14 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev,
3646{ 3700{
3647 struct device *dev = &hr_dev->pdev->dev; 3701 struct device *dev = &hr_dev->pdev->dev;
3648 u32 sdb_send_ptr, old_send; 3702 u32 sdb_send_ptr, old_send;
3703 __le32 sdb_issue_ptr_tmp;
3704 __le32 sdb_send_ptr_tmp;
3649 u32 success_flags = 0; 3705 u32 success_flags = 0;
3650 unsigned long end; 3706 unsigned long end;
3651 u32 old_retry; 3707 u32 old_retry;
3652 u32 inv_cnt; 3708 u32 inv_cnt;
3653 u32 tsp_st; 3709 u32 tsp_st;
3710 __le32 tmp;
3654 3711
3655 if (*wait_stage > HNS_ROCE_V1_DB_STAGE2 || 3712 if (*wait_stage > HNS_ROCE_V1_DB_STAGE2 ||
3656 *wait_stage < HNS_ROCE_V1_DB_STAGE1) { 3713 *wait_stage < HNS_ROCE_V1_DB_STAGE1) {
@@ -3679,10 +3736,12 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev,
3679 ROCEE_SDB_SEND_PTR_REG); 3736 ROCEE_SDB_SEND_PTR_REG);
3680 } 3737 }
3681 3738
3682 if (roce_get_field(sdb_issue_ptr, 3739 sdb_send_ptr_tmp = cpu_to_le32(sdb_send_ptr);
3740 sdb_issue_ptr_tmp = cpu_to_le32(sdb_issue_ptr);
3741 if (roce_get_field(sdb_issue_ptr_tmp,
3683 ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M, 3742 ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M,
3684 ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) == 3743 ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) ==
3685 roce_get_field(sdb_send_ptr, 3744 roce_get_field(sdb_send_ptr_tmp,
3686 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, 3745 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
3687 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)) { 3746 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)) {
3688 old_send = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); 3747 old_send = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG);
@@ -3690,7 +3749,8 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev,
3690 3749
3691 do { 3750 do {
3692 tsp_st = roce_read(hr_dev, ROCEE_TSP_BP_ST_REG); 3751 tsp_st = roce_read(hr_dev, ROCEE_TSP_BP_ST_REG);
3693 if (roce_get_bit(tsp_st, 3752 tmp = cpu_to_le32(tsp_st);
3753 if (roce_get_bit(tmp,
3694 ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S) == 1) { 3754 ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S) == 1) {
3695 *wait_stage = HNS_ROCE_V1_DB_WAIT_OK; 3755 *wait_stage = HNS_ROCE_V1_DB_WAIT_OK;
3696 return 0; 3756 return 0;
@@ -3699,8 +3759,9 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev,
3699 if (!time_before(jiffies, end)) { 3759 if (!time_before(jiffies, end)) {
3700 dev_dbg(dev, "QP(0x%lx) db process stage1 timeout when send ptr equals issue ptr.\n" 3760 dev_dbg(dev, "QP(0x%lx) db process stage1 timeout when send ptr equals issue ptr.\n"
3701 "issue 0x%x send 0x%x.\n", 3761 "issue 0x%x send 0x%x.\n",
3702 hr_qp->qpn, sdb_issue_ptr, 3762 hr_qp->qpn,
3703 sdb_send_ptr); 3763 le32_to_cpu(sdb_issue_ptr_tmp),
3764 le32_to_cpu(sdb_send_ptr_tmp));
3704 return 0; 3765 return 0;
3705 } 3766 }
3706 3767
@@ -4102,9 +4163,9 @@ static void hns_roce_v1_cq_err_handle(struct hns_roce_dev *hr_dev,
4102 struct device *dev = &hr_dev->pdev->dev; 4163 struct device *dev = &hr_dev->pdev->dev;
4103 u32 cqn; 4164 u32 cqn;
4104 4165
4105 cqn = le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq, 4166 cqn = roce_get_field(aeqe->event.cq_event.cq,
4106 HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, 4167 HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M,
4107 HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S)); 4168 HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S);
4108 4169
4109 switch (event_type) { 4170 switch (event_type) {
4110 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: 4171 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
@@ -4340,6 +4401,7 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
4340 u32 aeshift_val; 4401 u32 aeshift_val;
4341 u32 ceshift_val; 4402 u32 ceshift_val;
4342 u32 cemask_val; 4403 u32 cemask_val;
4404 __le32 tmp;
4343 int i; 4405 int i;
4344 4406
4345 /* 4407 /*
@@ -4348,30 +4410,34 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
4348 * interrupt, mask irq, clear irq, cancel mask operation 4410 * interrupt, mask irq, clear irq, cancel mask operation
4349 */ 4411 */
4350 aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG); 4412 aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG);
4413 tmp = cpu_to_le32(aeshift_val);
4351 4414
4352 /* AEQE overflow */ 4415 /* AEQE overflow */
4353 if (roce_get_bit(aeshift_val, 4416 if (roce_get_bit(tmp,
4354 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S) == 1) { 4417 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S) == 1) {
4355 dev_warn(dev, "AEQ overflow!\n"); 4418 dev_warn(dev, "AEQ overflow!\n");
4356 4419
4357 /* Set mask */ 4420 /* Set mask */
4358 caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); 4421 caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
4359 roce_set_bit(caepaemask_val, 4422 tmp = cpu_to_le32(caepaemask_val);
4360 ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, 4423 roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
4361 HNS_ROCE_INT_MASK_ENABLE); 4424 HNS_ROCE_INT_MASK_ENABLE);
4425 caepaemask_val = le32_to_cpu(tmp);
4362 roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val); 4426 roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
4363 4427
4364 /* Clear int state(INT_WC : write 1 clear) */ 4428 /* Clear int state(INT_WC : write 1 clear) */
4365 caepaest_val = roce_read(hr_dev, ROCEE_CAEP_AE_ST_REG); 4429 caepaest_val = roce_read(hr_dev, ROCEE_CAEP_AE_ST_REG);
4366 roce_set_bit(caepaest_val, 4430 tmp = cpu_to_le32(caepaest_val);
4367 ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1); 4431 roce_set_bit(tmp, ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1);
4432 caepaest_val = le32_to_cpu(tmp);
4368 roce_write(hr_dev, ROCEE_CAEP_AE_ST_REG, caepaest_val); 4433 roce_write(hr_dev, ROCEE_CAEP_AE_ST_REG, caepaest_val);
4369 4434
4370 /* Clear mask */ 4435 /* Clear mask */
4371 caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); 4436 caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
4372 roce_set_bit(caepaemask_val, 4437 tmp = cpu_to_le32(caepaemask_val);
4373 ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, 4438 roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
4374 HNS_ROCE_INT_MASK_DISABLE); 4439 HNS_ROCE_INT_MASK_DISABLE);
4440 caepaemask_val = le32_to_cpu(tmp);
4375 roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val); 4441 roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
4376 } 4442 }
4377 4443
@@ -4379,8 +4445,9 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
4379 for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) { 4445 for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) {
4380 ceshift_val = roce_read(hr_dev, ROCEE_CAEP_CEQC_SHIFT_0_REG + 4446 ceshift_val = roce_read(hr_dev, ROCEE_CAEP_CEQC_SHIFT_0_REG +
4381 i * CEQ_REG_OFFSET); 4447 i * CEQ_REG_OFFSET);
4448 tmp = cpu_to_le32(ceshift_val);
4382 4449
4383 if (roce_get_bit(ceshift_val, 4450 if (roce_get_bit(tmp,
4384 ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S) == 1) { 4451 ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S) == 1) {
4385 dev_warn(dev, "CEQ[%d] almost overflow!\n", i); 4452 dev_warn(dev, "CEQ[%d] almost overflow!\n", i);
4386 int_work++; 4453 int_work++;
@@ -4389,9 +4456,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
4389 cemask_val = roce_read(hr_dev, 4456 cemask_val = roce_read(hr_dev,
4390 ROCEE_CAEP_CE_IRQ_MASK_0_REG + 4457 ROCEE_CAEP_CE_IRQ_MASK_0_REG +
4391 i * CEQ_REG_OFFSET); 4458 i * CEQ_REG_OFFSET);
4392 roce_set_bit(cemask_val, 4459 tmp = cpu_to_le32(cemask_val);
4460 roce_set_bit(tmp,
4393 ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S, 4461 ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
4394 HNS_ROCE_INT_MASK_ENABLE); 4462 HNS_ROCE_INT_MASK_ENABLE);
4463 cemask_val = le32_to_cpu(tmp);
4395 roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + 4464 roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
4396 i * CEQ_REG_OFFSET, cemask_val); 4465 i * CEQ_REG_OFFSET, cemask_val);
4397 4466
@@ -4399,9 +4468,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
4399 cealmovf_val = roce_read(hr_dev, 4468 cealmovf_val = roce_read(hr_dev,
4400 ROCEE_CAEP_CEQ_ALM_OVF_0_REG + 4469 ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
4401 i * CEQ_REG_OFFSET); 4470 i * CEQ_REG_OFFSET);
4402 roce_set_bit(cealmovf_val, 4471 tmp = cpu_to_le32(cealmovf_val);
4472 roce_set_bit(tmp,
4403 ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S, 4473 ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S,
4404 1); 4474 1);
4475 cealmovf_val = le32_to_cpu(tmp);
4405 roce_write(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG + 4476 roce_write(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
4406 i * CEQ_REG_OFFSET, cealmovf_val); 4477 i * CEQ_REG_OFFSET, cealmovf_val);
4407 4478
@@ -4409,9 +4480,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
4409 cemask_val = roce_read(hr_dev, 4480 cemask_val = roce_read(hr_dev,
4410 ROCEE_CAEP_CE_IRQ_MASK_0_REG + 4481 ROCEE_CAEP_CE_IRQ_MASK_0_REG +
4411 i * CEQ_REG_OFFSET); 4482 i * CEQ_REG_OFFSET);
4412 roce_set_bit(cemask_val, 4483 tmp = cpu_to_le32(cemask_val);
4484 roce_set_bit(tmp,
4413 ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S, 4485 ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
4414 HNS_ROCE_INT_MASK_DISABLE); 4486 HNS_ROCE_INT_MASK_DISABLE);
4487 cemask_val = le32_to_cpu(tmp);
4415 roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + 4488 roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
4416 i * CEQ_REG_OFFSET, cemask_val); 4489 i * CEQ_REG_OFFSET, cemask_val);
4417 } 4490 }
@@ -4435,13 +4508,16 @@ static void hns_roce_v1_int_mask_enable(struct hns_roce_dev *hr_dev)
4435{ 4508{
4436 u32 aemask_val; 4509 u32 aemask_val;
4437 int masken = 0; 4510 int masken = 0;
4511 __le32 tmp;
4438 int i; 4512 int i;
4439 4513
4440 /* AEQ INT */ 4514 /* AEQ INT */
4441 aemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); 4515 aemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
4442 roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, 4516 tmp = cpu_to_le32(aemask_val);
4517 roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
4443 masken); 4518 masken);
4444 roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken); 4519 roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken);
4520 aemask_val = le32_to_cpu(tmp);
4445 roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, aemask_val); 4521 roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, aemask_val);
4446 4522
4447 /* CEQ INT */ 4523 /* CEQ INT */
@@ -4473,20 +4549,24 @@ static void hns_roce_v1_enable_eq(struct hns_roce_dev *hr_dev, int eq_num,
4473 int enable_flag) 4549 int enable_flag)
4474{ 4550{
4475 void __iomem *eqc = hr_dev->eq_table.eqc_base[eq_num]; 4551 void __iomem *eqc = hr_dev->eq_table.eqc_base[eq_num];
4552 __le32 tmp;
4476 u32 val; 4553 u32 val;
4477 4554
4478 val = readl(eqc); 4555 val = readl(eqc);
4556 tmp = cpu_to_le32(val);
4479 4557
4480 if (enable_flag) 4558 if (enable_flag)
4481 roce_set_field(val, 4559 roce_set_field(tmp,
4482 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, 4560 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
4483 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, 4561 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
4484 HNS_ROCE_EQ_STAT_VALID); 4562 HNS_ROCE_EQ_STAT_VALID);
4485 else 4563 else
4486 roce_set_field(val, 4564 roce_set_field(tmp,
4487 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, 4565 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
4488 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, 4566 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
4489 HNS_ROCE_EQ_STAT_INVALID); 4567 HNS_ROCE_EQ_STAT_INVALID);
4568
4569 val = le32_to_cpu(tmp);
4490 writel(val, eqc); 4570 writel(val, eqc);
4491} 4571}
4492 4572
@@ -4499,6 +4579,9 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev,
4499 u32 eqconsindx_val = 0; 4579 u32 eqconsindx_val = 0;
4500 u32 eqcuridx_val = 0; 4580 u32 eqcuridx_val = 0;
4501 u32 eqshift_val = 0; 4581 u32 eqshift_val = 0;
4582 __le32 tmp2 = 0;
4583 __le32 tmp1 = 0;
4584 __le32 tmp = 0;
4502 int num_bas; 4585 int num_bas;
4503 int ret; 4586 int ret;
4504 int i; 4587 int i;
@@ -4530,14 +4613,13 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev,
4530 memset(eq->buf_list[i].buf, 0, HNS_ROCE_BA_SIZE); 4613 memset(eq->buf_list[i].buf, 0, HNS_ROCE_BA_SIZE);
4531 } 4614 }
4532 eq->cons_index = 0; 4615 eq->cons_index = 0;
4533 roce_set_field(eqshift_val, 4616 roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
4534 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
4535 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, 4617 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
4536 HNS_ROCE_EQ_STAT_INVALID); 4618 HNS_ROCE_EQ_STAT_INVALID);
4537 roce_set_field(eqshift_val, 4619 roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M,
4538 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M,
4539 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S, 4620 ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S,
4540 eq->log_entries); 4621 eq->log_entries);
4622 eqshift_val = le32_to_cpu(tmp);
4541 writel(eqshift_val, eqc); 4623 writel(eqshift_val, eqc);
4542 4624
4543 /* Configure eq extended address 12~44bit */ 4625 /* Configure eq extended address 12~44bit */
@@ -4549,18 +4631,18 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev,
4549 * using 4K page, and shift more 32 because of 4631 * using 4K page, and shift more 32 because of
4550 * caculating the high 32 bit value evaluated to hardware. 4632 * caculating the high 32 bit value evaluated to hardware.
4551 */ 4633 */
4552 roce_set_field(eqcuridx_val, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M, 4634 roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M,
4553 ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S, 4635 ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S,
4554 eq->buf_list[0].map >> 44); 4636 eq->buf_list[0].map >> 44);
4555 roce_set_field(eqcuridx_val, 4637 roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M,
4556 ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M,
4557 ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0); 4638 ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0);
4639 eqcuridx_val = le32_to_cpu(tmp1);
4558 writel(eqcuridx_val, eqc + 8); 4640 writel(eqcuridx_val, eqc + 8);
4559 4641
4560 /* Configure eq consumer index */ 4642 /* Configure eq consumer index */
4561 roce_set_field(eqconsindx_val, 4643 roce_set_field(tmp2, ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M,
4562 ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M,
4563 ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0); 4644 ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0);
4645 eqconsindx_val = le32_to_cpu(tmp2);
4564 writel(eqconsindx_val, eqc + 0xc); 4646 writel(eqconsindx_val, eqc + 0xc);
4565 4647
4566 return 0; 4648 return 0;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index e9a2717ea7cd..66440147d9eb 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -260,7 +260,7 @@ struct hns_roce_cqe {
260 __le32 cqe_byte_4; 260 __le32 cqe_byte_4;
261 union { 261 union {
262 __le32 r_key; 262 __le32 r_key;
263 __be32 immediate_data; 263 __le32 immediate_data;
264 }; 264 };
265 __le32 byte_cnt; 265 __le32 byte_cnt;
266 __le32 cqe_byte_16; 266 __le32 cqe_byte_16;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index a6e11be0ea0f..0218c0f8c2a7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -36,6 +36,7 @@
36#include <linux/kernel.h> 36#include <linux/kernel.h>
37#include <linux/types.h> 37#include <linux/types.h>
38#include <net/addrconf.h> 38#include <net/addrconf.h>
39#include <rdma/ib_addr.h>
39#include <rdma/ib_umem.h> 40#include <rdma/ib_umem.h>
40 41
41#include "hnae3.h" 42#include "hnae3.h"
@@ -53,7 +54,7 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
53 dseg->len = cpu_to_le32(sg->length); 54 dseg->len = cpu_to_le32(sg->length);
54} 55}
55 56
56static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr, 57static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
57 unsigned int *sge_ind) 58 unsigned int *sge_ind)
58{ 59{
59 struct hns_roce_v2_wqe_data_seg *dseg; 60 struct hns_roce_v2_wqe_data_seg *dseg;
@@ -100,10 +101,10 @@ static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr,
100 } 101 }
101} 102}
102 103
103static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr, 104static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
104 struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, 105 struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
105 void *wqe, unsigned int *sge_ind, 106 void *wqe, unsigned int *sge_ind,
106 struct ib_send_wr **bad_wr) 107 const struct ib_send_wr **bad_wr)
107{ 108{
108 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); 109 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
109 struct hns_roce_v2_wqe_data_seg *dseg = wqe; 110 struct hns_roce_v2_wqe_data_seg *dseg = wqe;
@@ -164,23 +165,30 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
164 return 0; 165 return 0;
165} 166}
166 167
167static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 168static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
168 struct ib_send_wr **bad_wr) 169 const struct ib_qp_attr *attr,
170 int attr_mask, enum ib_qp_state cur_state,
171 enum ib_qp_state new_state);
172
173static int hns_roce_v2_post_send(struct ib_qp *ibqp,
174 const struct ib_send_wr *wr,
175 const struct ib_send_wr **bad_wr)
169{ 176{
170 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); 177 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
171 struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); 178 struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
172 struct hns_roce_v2_ud_send_wqe *ud_sq_wqe; 179 struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
173 struct hns_roce_v2_rc_send_wqe *rc_sq_wqe; 180 struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
174 struct hns_roce_qp *qp = to_hr_qp(ibqp); 181 struct hns_roce_qp *qp = to_hr_qp(ibqp);
175 struct hns_roce_v2_wqe_data_seg *dseg;
176 struct device *dev = hr_dev->dev; 182 struct device *dev = hr_dev->dev;
177 struct hns_roce_v2_db sq_db; 183 struct hns_roce_v2_db sq_db;
184 struct ib_qp_attr attr;
178 unsigned int sge_ind = 0; 185 unsigned int sge_ind = 0;
179 unsigned int owner_bit; 186 unsigned int owner_bit;
180 unsigned long flags; 187 unsigned long flags;
181 unsigned int ind; 188 unsigned int ind;
182 void *wqe = NULL; 189 void *wqe = NULL;
183 bool loopback; 190 bool loopback;
191 int attr_mask;
184 u32 tmp_len; 192 u32 tmp_len;
185 int ret = 0; 193 int ret = 0;
186 u8 *smac; 194 u8 *smac;
@@ -273,7 +281,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
273 switch (wr->opcode) { 281 switch (wr->opcode) {
274 case IB_WR_SEND_WITH_IMM: 282 case IB_WR_SEND_WITH_IMM:
275 case IB_WR_RDMA_WRITE_WITH_IMM: 283 case IB_WR_RDMA_WRITE_WITH_IMM:
276 ud_sq_wqe->immtdata = wr->ex.imm_data; 284 ud_sq_wqe->immtdata =
285 cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
277 break; 286 break;
278 default: 287 default:
279 ud_sq_wqe->immtdata = 0; 288 ud_sq_wqe->immtdata = 0;
@@ -330,14 +339,13 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
330 roce_set_field(ud_sq_wqe->byte_36, 339 roce_set_field(ud_sq_wqe->byte_36,
331 V2_UD_SEND_WQE_BYTE_36_TCLASS_M, 340 V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
332 V2_UD_SEND_WQE_BYTE_36_TCLASS_S, 341 V2_UD_SEND_WQE_BYTE_36_TCLASS_S,
333 0); 342 ah->av.sl_tclass_flowlabel >>
334 roce_set_field(ud_sq_wqe->byte_36, 343 HNS_ROCE_TCLASS_SHIFT);
335 V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
336 V2_UD_SEND_WQE_BYTE_36_TCLASS_S,
337 0);
338 roce_set_field(ud_sq_wqe->byte_40, 344 roce_set_field(ud_sq_wqe->byte_40,
339 V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, 345 V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M,
340 V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, 0); 346 V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S,
347 ah->av.sl_tclass_flowlabel &
348 HNS_ROCE_FLOW_LABEL_MASK);
341 roce_set_field(ud_sq_wqe->byte_40, 349 roce_set_field(ud_sq_wqe->byte_40,
342 V2_UD_SEND_WQE_BYTE_40_SL_M, 350 V2_UD_SEND_WQE_BYTE_40_SL_M,
343 V2_UD_SEND_WQE_BYTE_40_SL_S, 351 V2_UD_SEND_WQE_BYTE_40_SL_S,
@@ -371,7 +379,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
371 switch (wr->opcode) { 379 switch (wr->opcode) {
372 case IB_WR_SEND_WITH_IMM: 380 case IB_WR_SEND_WITH_IMM:
373 case IB_WR_RDMA_WRITE_WITH_IMM: 381 case IB_WR_RDMA_WRITE_WITH_IMM:
374 rc_sq_wqe->immtdata = wr->ex.imm_data; 382 rc_sq_wqe->immtdata =
383 cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
375 break; 384 break;
376 case IB_WR_SEND_WITH_INV: 385 case IB_WR_SEND_WITH_INV:
377 rc_sq_wqe->inv_key = 386 rc_sq_wqe->inv_key =
@@ -485,7 +494,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
485 } 494 }
486 495
487 wqe += sizeof(struct hns_roce_v2_rc_send_wqe); 496 wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
488 dseg = wqe;
489 497
490 ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe, 498 ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe,
491 &sge_ind, bad_wr); 499 &sge_ind, bad_wr);
@@ -523,6 +531,19 @@ out:
523 531
524 qp->sq_next_wqe = ind; 532 qp->sq_next_wqe = ind;
525 qp->next_sge = sge_ind; 533 qp->next_sge = sge_ind;
534
535 if (qp->state == IB_QPS_ERR) {
536 attr_mask = IB_QP_STATE;
537 attr.qp_state = IB_QPS_ERR;
538
539 ret = hns_roce_v2_modify_qp(&qp->ibqp, &attr, attr_mask,
540 qp->state, IB_QPS_ERR);
541 if (ret) {
542 spin_unlock_irqrestore(&qp->sq.lock, flags);
543 *bad_wr = wr;
544 return ret;
545 }
546 }
526 } 547 }
527 548
528 spin_unlock_irqrestore(&qp->sq.lock, flags); 549 spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -530,16 +551,19 @@ out:
530 return ret; 551 return ret;
531} 552}
532 553
533static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 554static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
534 struct ib_recv_wr **bad_wr) 555 const struct ib_recv_wr *wr,
556 const struct ib_recv_wr **bad_wr)
535{ 557{
536 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); 558 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
537 struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); 559 struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
538 struct hns_roce_v2_wqe_data_seg *dseg; 560 struct hns_roce_v2_wqe_data_seg *dseg;
539 struct hns_roce_rinl_sge *sge_list; 561 struct hns_roce_rinl_sge *sge_list;
540 struct device *dev = hr_dev->dev; 562 struct device *dev = hr_dev->dev;
563 struct ib_qp_attr attr;
541 unsigned long flags; 564 unsigned long flags;
542 void *wqe = NULL; 565 void *wqe = NULL;
566 int attr_mask;
543 int ret = 0; 567 int ret = 0;
544 int nreq; 568 int nreq;
545 int ind; 569 int ind;
@@ -608,6 +632,20 @@ out:
608 wmb(); 632 wmb();
609 633
610 *hr_qp->rdb.db_record = hr_qp->rq.head & 0xffff; 634 *hr_qp->rdb.db_record = hr_qp->rq.head & 0xffff;
635
636 if (hr_qp->state == IB_QPS_ERR) {
637 attr_mask = IB_QP_STATE;
638 attr.qp_state = IB_QPS_ERR;
639
640 ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr,
641 attr_mask, hr_qp->state,
642 IB_QPS_ERR);
643 if (ret) {
644 spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
645 *bad_wr = wr;
646 return ret;
647 }
648 }
611 } 649 }
612 spin_unlock_irqrestore(&hr_qp->rq.lock, flags); 650 spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
613 651
@@ -702,8 +740,8 @@ static int hns_roce_v2_cmq_init(struct hns_roce_dev *hr_dev)
702 int ret; 740 int ret;
703 741
704 /* Setup the queue entries for command queue */ 742 /* Setup the queue entries for command queue */
705 priv->cmq.csq.desc_num = 1024; 743 priv->cmq.csq.desc_num = CMD_CSQ_DESC_NUM;
706 priv->cmq.crq.desc_num = 1024; 744 priv->cmq.crq.desc_num = CMD_CRQ_DESC_NUM;
707 745
708 /* Setup the lock for command queue */ 746 /* Setup the lock for command queue */
709 spin_lock_init(&priv->cmq.csq.lock); 747 spin_lock_init(&priv->cmq.csq.lock);
@@ -925,7 +963,8 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev)
925static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) 963static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
926{ 964{
927 struct hns_roce_cmq_desc desc[2]; 965 struct hns_roce_cmq_desc desc[2];
928 struct hns_roce_pf_res *res; 966 struct hns_roce_pf_res_a *req_a;
967 struct hns_roce_pf_res_b *req_b;
929 int ret; 968 int ret;
930 int i; 969 int i;
931 970
@@ -943,21 +982,26 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
943 if (ret) 982 if (ret)
944 return ret; 983 return ret;
945 984
946 res = (struct hns_roce_pf_res *)desc[0].data; 985 req_a = (struct hns_roce_pf_res_a *)desc[0].data;
986 req_b = (struct hns_roce_pf_res_b *)desc[1].data;
947 987
948 hr_dev->caps.qpc_bt_num = roce_get_field(res->qpc_bt_idx_num, 988 hr_dev->caps.qpc_bt_num = roce_get_field(req_a->qpc_bt_idx_num,
949 PF_RES_DATA_1_PF_QPC_BT_NUM_M, 989 PF_RES_DATA_1_PF_QPC_BT_NUM_M,
950 PF_RES_DATA_1_PF_QPC_BT_NUM_S); 990 PF_RES_DATA_1_PF_QPC_BT_NUM_S);
951 hr_dev->caps.srqc_bt_num = roce_get_field(res->srqc_bt_idx_num, 991 hr_dev->caps.srqc_bt_num = roce_get_field(req_a->srqc_bt_idx_num,
952 PF_RES_DATA_2_PF_SRQC_BT_NUM_M, 992 PF_RES_DATA_2_PF_SRQC_BT_NUM_M,
953 PF_RES_DATA_2_PF_SRQC_BT_NUM_S); 993 PF_RES_DATA_2_PF_SRQC_BT_NUM_S);
954 hr_dev->caps.cqc_bt_num = roce_get_field(res->cqc_bt_idx_num, 994 hr_dev->caps.cqc_bt_num = roce_get_field(req_a->cqc_bt_idx_num,
955 PF_RES_DATA_3_PF_CQC_BT_NUM_M, 995 PF_RES_DATA_3_PF_CQC_BT_NUM_M,
956 PF_RES_DATA_3_PF_CQC_BT_NUM_S); 996 PF_RES_DATA_3_PF_CQC_BT_NUM_S);
957 hr_dev->caps.mpt_bt_num = roce_get_field(res->mpt_bt_idx_num, 997 hr_dev->caps.mpt_bt_num = roce_get_field(req_a->mpt_bt_idx_num,
958 PF_RES_DATA_4_PF_MPT_BT_NUM_M, 998 PF_RES_DATA_4_PF_MPT_BT_NUM_M,
959 PF_RES_DATA_4_PF_MPT_BT_NUM_S); 999 PF_RES_DATA_4_PF_MPT_BT_NUM_S);
960 1000
1001 hr_dev->caps.sl_num = roce_get_field(req_b->qid_idx_sl_num,
1002 PF_RES_DATA_3_PF_SL_NUM_M,
1003 PF_RES_DATA_3_PF_SL_NUM_S);
1004
961 return 0; 1005 return 0;
962} 1006}
963 1007
@@ -1203,12 +1247,14 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
1203 caps->eqe_ba_pg_sz = 0; 1247 caps->eqe_ba_pg_sz = 0;
1204 caps->eqe_buf_pg_sz = 0; 1248 caps->eqe_buf_pg_sz = 0;
1205 caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM; 1249 caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM;
1250 caps->tsq_buf_pg_sz = 0;
1206 caps->chunk_sz = HNS_ROCE_V2_TABLE_CHUNK_SIZE; 1251 caps->chunk_sz = HNS_ROCE_V2_TABLE_CHUNK_SIZE;
1207 1252
1208 caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR | 1253 caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR |
1209 HNS_ROCE_CAP_FLAG_ROCE_V1_V2 | 1254 HNS_ROCE_CAP_FLAG_ROCE_V1_V2 |
1210 HNS_ROCE_CAP_FLAG_RQ_INLINE | 1255 HNS_ROCE_CAP_FLAG_RQ_INLINE |
1211 HNS_ROCE_CAP_FLAG_RECORD_DB; 1256 HNS_ROCE_CAP_FLAG_RECORD_DB |
1257 HNS_ROCE_CAP_FLAG_SQ_RECORD_DB;
1212 caps->pkey_table_len[0] = 1; 1258 caps->pkey_table_len[0] = 1;
1213 caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; 1259 caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
1214 caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM; 1260 caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM;
@@ -1224,6 +1270,228 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
1224 return ret; 1270 return ret;
1225} 1271}
1226 1272
1273static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev,
1274 enum hns_roce_link_table_type type)
1275{
1276 struct hns_roce_cmq_desc desc[2];
1277 struct hns_roce_cfg_llm_a *req_a =
1278 (struct hns_roce_cfg_llm_a *)desc[0].data;
1279 struct hns_roce_cfg_llm_b *req_b =
1280 (struct hns_roce_cfg_llm_b *)desc[1].data;
1281 struct hns_roce_v2_priv *priv = hr_dev->priv;
1282 struct hns_roce_link_table *link_tbl;
1283 struct hns_roce_link_table_entry *entry;
1284 enum hns_roce_opcode_type opcode;
1285 u32 page_num;
1286 int i;
1287
1288 switch (type) {
1289 case TSQ_LINK_TABLE:
1290 link_tbl = &priv->tsq;
1291 opcode = HNS_ROCE_OPC_CFG_EXT_LLM;
1292 break;
1293 case TPQ_LINK_TABLE:
1294 link_tbl = &priv->tpq;
1295 opcode = HNS_ROCE_OPC_CFG_TMOUT_LLM;
1296 break;
1297 default:
1298 return -EINVAL;
1299 }
1300
1301 page_num = link_tbl->npages;
1302 entry = link_tbl->table.buf;
1303 memset(req_a, 0, sizeof(*req_a));
1304 memset(req_b, 0, sizeof(*req_b));
1305
1306 for (i = 0; i < 2; i++) {
1307 hns_roce_cmq_setup_basic_desc(&desc[i], opcode, false);
1308
1309 if (i == 0)
1310 desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
1311 else
1312 desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
1313
1314 if (i == 0) {
1315 req_a->base_addr_l = link_tbl->table.map & 0xffffffff;
1316 req_a->base_addr_h = (link_tbl->table.map >> 32) &
1317 0xffffffff;
1318 roce_set_field(req_a->depth_pgsz_init_en,
1319 CFG_LLM_QUE_DEPTH_M,
1320 CFG_LLM_QUE_DEPTH_S,
1321 link_tbl->npages);
1322 roce_set_field(req_a->depth_pgsz_init_en,
1323 CFG_LLM_QUE_PGSZ_M,
1324 CFG_LLM_QUE_PGSZ_S,
1325 link_tbl->pg_sz);
1326 req_a->head_ba_l = entry[0].blk_ba0;
1327 req_a->head_ba_h_nxtptr = entry[0].blk_ba1_nxt_ptr;
1328 roce_set_field(req_a->head_ptr,
1329 CFG_LLM_HEAD_PTR_M,
1330 CFG_LLM_HEAD_PTR_S, 0);
1331 } else {
1332 req_b->tail_ba_l = entry[page_num - 1].blk_ba0;
1333 roce_set_field(req_b->tail_ba_h,
1334 CFG_LLM_TAIL_BA_H_M,
1335 CFG_LLM_TAIL_BA_H_S,
1336 entry[page_num - 1].blk_ba1_nxt_ptr &
1337 HNS_ROCE_LINK_TABLE_BA1_M);
1338 roce_set_field(req_b->tail_ptr,
1339 CFG_LLM_TAIL_PTR_M,
1340 CFG_LLM_TAIL_PTR_S,
1341 (entry[page_num - 2].blk_ba1_nxt_ptr &
1342 HNS_ROCE_LINK_TABLE_NXT_PTR_M) >>
1343 HNS_ROCE_LINK_TABLE_NXT_PTR_S);
1344 }
1345 }
1346 roce_set_field(req_a->depth_pgsz_init_en,
1347 CFG_LLM_INIT_EN_M, CFG_LLM_INIT_EN_S, 1);
1348
1349 return hns_roce_cmq_send(hr_dev, desc, 2);
1350}
1351
1352static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev,
1353 enum hns_roce_link_table_type type)
1354{
1355 struct hns_roce_v2_priv *priv = hr_dev->priv;
1356 struct hns_roce_link_table *link_tbl;
1357 struct hns_roce_link_table_entry *entry;
1358 struct device *dev = hr_dev->dev;
1359 u32 buf_chk_sz;
1360 dma_addr_t t;
1361 int func_num = 1;
1362 int pg_num_a;
1363 int pg_num_b;
1364 int pg_num;
1365 int size;
1366 int i;
1367
1368 switch (type) {
1369 case TSQ_LINK_TABLE:
1370 link_tbl = &priv->tsq;
1371 buf_chk_sz = 1 << (hr_dev->caps.tsq_buf_pg_sz + PAGE_SHIFT);
1372 pg_num_a = hr_dev->caps.num_qps * 8 / buf_chk_sz;
1373 pg_num_b = hr_dev->caps.sl_num * 4 + 2;
1374 break;
1375 case TPQ_LINK_TABLE:
1376 link_tbl = &priv->tpq;
1377 buf_chk_sz = 1 << (hr_dev->caps.tpq_buf_pg_sz + PAGE_SHIFT);
1378 pg_num_a = hr_dev->caps.num_cqs * 4 / buf_chk_sz;
1379 pg_num_b = 2 * 4 * func_num + 2;
1380 break;
1381 default:
1382 return -EINVAL;
1383 }
1384
1385 pg_num = max(pg_num_a, pg_num_b);
1386 size = pg_num * sizeof(struct hns_roce_link_table_entry);
1387
1388 link_tbl->table.buf = dma_alloc_coherent(dev, size,
1389 &link_tbl->table.map,
1390 GFP_KERNEL);
1391 if (!link_tbl->table.buf)
1392 goto out;
1393
1394 link_tbl->pg_list = kcalloc(pg_num, sizeof(*link_tbl->pg_list),
1395 GFP_KERNEL);
1396 if (!link_tbl->pg_list)
1397 goto err_kcalloc_failed;
1398
1399 entry = link_tbl->table.buf;
1400 for (i = 0; i < pg_num; ++i) {
1401 link_tbl->pg_list[i].buf = dma_alloc_coherent(dev, buf_chk_sz,
1402 &t, GFP_KERNEL);
1403 if (!link_tbl->pg_list[i].buf)
1404 goto err_alloc_buf_failed;
1405
1406 link_tbl->pg_list[i].map = t;
1407 memset(link_tbl->pg_list[i].buf, 0, buf_chk_sz);
1408
1409 entry[i].blk_ba0 = (t >> 12) & 0xffffffff;
1410 roce_set_field(entry[i].blk_ba1_nxt_ptr,
1411 HNS_ROCE_LINK_TABLE_BA1_M,
1412 HNS_ROCE_LINK_TABLE_BA1_S,
1413 t >> 44);
1414
1415 if (i < (pg_num - 1))
1416 roce_set_field(entry[i].blk_ba1_nxt_ptr,
1417 HNS_ROCE_LINK_TABLE_NXT_PTR_M,
1418 HNS_ROCE_LINK_TABLE_NXT_PTR_S,
1419 i + 1);
1420 }
1421 link_tbl->npages = pg_num;
1422 link_tbl->pg_sz = buf_chk_sz;
1423
1424 return hns_roce_config_link_table(hr_dev, type);
1425
1426err_alloc_buf_failed:
1427 for (i -= 1; i >= 0; i--)
1428 dma_free_coherent(dev, buf_chk_sz,
1429 link_tbl->pg_list[i].buf,
1430 link_tbl->pg_list[i].map);
1431 kfree(link_tbl->pg_list);
1432
1433err_kcalloc_failed:
1434 dma_free_coherent(dev, size, link_tbl->table.buf,
1435 link_tbl->table.map);
1436
1437out:
1438 return -ENOMEM;
1439}
1440
1441static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev,
1442 struct hns_roce_link_table *link_tbl)
1443{
1444 struct device *dev = hr_dev->dev;
1445 int size;
1446 int i;
1447
1448 size = link_tbl->npages * sizeof(struct hns_roce_link_table_entry);
1449
1450 for (i = 0; i < link_tbl->npages; ++i)
1451 if (link_tbl->pg_list[i].buf)
1452 dma_free_coherent(dev, link_tbl->pg_sz,
1453 link_tbl->pg_list[i].buf,
1454 link_tbl->pg_list[i].map);
1455 kfree(link_tbl->pg_list);
1456
1457 dma_free_coherent(dev, size, link_tbl->table.buf,
1458 link_tbl->table.map);
1459}
1460
1461static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
1462{
1463 struct hns_roce_v2_priv *priv = hr_dev->priv;
1464 int ret;
1465
1466 /* TSQ includes SQ doorbell and ack doorbell */
1467 ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE);
1468 if (ret) {
1469 dev_err(hr_dev->dev, "TSQ init failed, ret = %d.\n", ret);
1470 return ret;
1471 }
1472
1473 ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE);
1474 if (ret) {
1475 dev_err(hr_dev->dev, "TPQ init failed, ret = %d.\n", ret);
1476 goto err_tpq_init_failed;
1477 }
1478
1479 return 0;
1480
1481err_tpq_init_failed:
1482 hns_roce_free_link_table(hr_dev, &priv->tsq);
1483
1484 return ret;
1485}
1486
1487static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
1488{
1489 struct hns_roce_v2_priv *priv = hr_dev->priv;
1490
1491 hns_roce_free_link_table(hr_dev, &priv->tpq);
1492 hns_roce_free_link_table(hr_dev, &priv->tsq);
1493}
1494
1227static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev) 1495static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev)
1228{ 1496{
1229 u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG); 1497 u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG);
@@ -1307,13 +1575,45 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev,
1307 return 0; 1575 return 0;
1308} 1576}
1309 1577
1578static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev,
1579 int gid_index, const union ib_gid *gid,
1580 enum hns_roce_sgid_type sgid_type)
1581{
1582 struct hns_roce_cmq_desc desc;
1583 struct hns_roce_cfg_sgid_tb *sgid_tb =
1584 (struct hns_roce_cfg_sgid_tb *)desc.data;
1585 u32 *p;
1586
1587 hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false);
1588
1589 roce_set_field(sgid_tb->table_idx_rsv,
1590 CFG_SGID_TB_TABLE_IDX_M,
1591 CFG_SGID_TB_TABLE_IDX_S, gid_index);
1592 roce_set_field(sgid_tb->vf_sgid_type_rsv,
1593 CFG_SGID_TB_VF_SGID_TYPE_M,
1594 CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type);
1595
1596 p = (u32 *)&gid->raw[0];
1597 sgid_tb->vf_sgid_l = cpu_to_le32(*p);
1598
1599 p = (u32 *)&gid->raw[4];
1600 sgid_tb->vf_sgid_ml = cpu_to_le32(*p);
1601
1602 p = (u32 *)&gid->raw[8];
1603 sgid_tb->vf_sgid_mh = cpu_to_le32(*p);
1604
1605 p = (u32 *)&gid->raw[0xc];
1606 sgid_tb->vf_sgid_h = cpu_to_le32(*p);
1607
1608 return hns_roce_cmq_send(hr_dev, &desc, 1);
1609}
1610
1310static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port, 1611static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port,
1311 int gid_index, union ib_gid *gid, 1612 int gid_index, const union ib_gid *gid,
1312 const struct ib_gid_attr *attr) 1613 const struct ib_gid_attr *attr)
1313{ 1614{
1314 enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1; 1615 enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1;
1315 u32 *p; 1616 int ret;
1316 u32 val;
1317 1617
1318 if (!gid || !attr) 1618 if (!gid || !attr)
1319 return -EINVAL; 1619 return -EINVAL;
@@ -1328,49 +1628,37 @@ static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port,
1328 sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6; 1628 sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6;
1329 } 1629 }
1330 1630
1331 p = (u32 *)&gid->raw[0]; 1631 ret = hns_roce_config_sgid_table(hr_dev, gid_index, gid, sgid_type);
1332 roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG0_REG + 1632 if (ret)
1333 0x20 * gid_index); 1633 dev_err(hr_dev->dev, "Configure sgid table failed(%d)!\n", ret);
1334
1335 p = (u32 *)&gid->raw[4];
1336 roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG1_REG +
1337 0x20 * gid_index);
1338
1339 p = (u32 *)&gid->raw[8];
1340 roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG2_REG +
1341 0x20 * gid_index);
1342
1343 p = (u32 *)&gid->raw[0xc];
1344 roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG3_REG +
1345 0x20 * gid_index);
1346
1347 val = roce_read(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index);
1348 roce_set_field(val, ROCEE_VF_SGID_CFG4_SGID_TYPE_M,
1349 ROCEE_VF_SGID_CFG4_SGID_TYPE_S, sgid_type);
1350
1351 roce_write(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index, val);
1352 1634
1353 return 0; 1635 return ret;
1354} 1636}
1355 1637
1356static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, 1638static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
1357 u8 *addr) 1639 u8 *addr)
1358{ 1640{
1641 struct hns_roce_cmq_desc desc;
1642 struct hns_roce_cfg_smac_tb *smac_tb =
1643 (struct hns_roce_cfg_smac_tb *)desc.data;
1359 u16 reg_smac_h; 1644 u16 reg_smac_h;
1360 u32 reg_smac_l; 1645 u32 reg_smac_l;
1361 u32 val; 1646
1647 hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SMAC_TB, false);
1362 1648
1363 reg_smac_l = *(u32 *)(&addr[0]); 1649 reg_smac_l = *(u32 *)(&addr[0]);
1364 roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_VF_SMAC_CFG0_REG + 1650 reg_smac_h = *(u16 *)(&addr[4]);
1365 0x08 * phy_port);
1366 val = roce_read(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port);
1367 1651
1368 reg_smac_h = *(u16 *)(&addr[4]); 1652 memset(smac_tb, 0, sizeof(*smac_tb));
1369 roce_set_field(val, ROCEE_VF_SMAC_CFG1_VF_SMAC_H_M, 1653 roce_set_field(smac_tb->tb_idx_rsv,
1370 ROCEE_VF_SMAC_CFG1_VF_SMAC_H_S, reg_smac_h); 1654 CFG_SMAC_TB_IDX_M,
1371 roce_write(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port, val); 1655 CFG_SMAC_TB_IDX_S, phy_port);
1656 roce_set_field(smac_tb->vf_smac_h_rsv,
1657 CFG_SMAC_TB_VF_SMAC_H_M,
1658 CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h);
1659 smac_tb->vf_smac_l = reg_smac_l;
1372 1660
1373 return 0; 1661 return hns_roce_cmq_send(hr_dev, &desc, 1);
1374} 1662}
1375 1663
1376static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, 1664static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
@@ -1758,6 +2046,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
1758 struct hns_roce_v2_cqe *cqe; 2046 struct hns_roce_v2_cqe *cqe;
1759 struct hns_roce_qp *hr_qp; 2047 struct hns_roce_qp *hr_qp;
1760 struct hns_roce_wq *wq; 2048 struct hns_roce_wq *wq;
2049 struct ib_qp_attr attr;
2050 int attr_mask;
1761 int is_send; 2051 int is_send;
1762 u16 wqe_ctr; 2052 u16 wqe_ctr;
1763 u32 opcode; 2053 u32 opcode;
@@ -1844,8 +2134,17 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
1844 break; 2134 break;
1845 } 2135 }
1846 2136
1847 /* CQE status error, directly return */ 2137 /* flush cqe if wc status is error, excluding flush error */
1848 if (wc->status != IB_WC_SUCCESS) 2138 if ((wc->status != IB_WC_SUCCESS) &&
2139 (wc->status != IB_WC_WR_FLUSH_ERR)) {
2140 attr_mask = IB_QP_STATE;
2141 attr.qp_state = IB_QPS_ERR;
2142 return hns_roce_v2_modify_qp(&(*cur_qp)->ibqp,
2143 &attr, attr_mask,
2144 (*cur_qp)->state, IB_QPS_ERR);
2145 }
2146
2147 if (wc->status == IB_WC_WR_FLUSH_ERR)
1849 return 0; 2148 return 0;
1850 2149
1851 if (is_send) { 2150 if (is_send) {
@@ -1931,7 +2230,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
1931 case HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM: 2230 case HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM:
1932 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; 2231 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
1933 wc->wc_flags = IB_WC_WITH_IMM; 2232 wc->wc_flags = IB_WC_WITH_IMM;
1934 wc->ex.imm_data = cqe->immtdata; 2233 wc->ex.imm_data =
2234 cpu_to_be32(le32_to_cpu(cqe->immtdata));
1935 break; 2235 break;
1936 case HNS_ROCE_V2_OPCODE_SEND: 2236 case HNS_ROCE_V2_OPCODE_SEND:
1937 wc->opcode = IB_WC_RECV; 2237 wc->opcode = IB_WC_RECV;
@@ -1940,7 +2240,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
1940 case HNS_ROCE_V2_OPCODE_SEND_WITH_IMM: 2240 case HNS_ROCE_V2_OPCODE_SEND_WITH_IMM:
1941 wc->opcode = IB_WC_RECV; 2241 wc->opcode = IB_WC_RECV;
1942 wc->wc_flags = IB_WC_WITH_IMM; 2242 wc->wc_flags = IB_WC_WITH_IMM;
1943 wc->ex.imm_data = cqe->immtdata; 2243 wc->ex.imm_data =
2244 cpu_to_be32(le32_to_cpu(cqe->immtdata));
1944 break; 2245 break;
1945 case HNS_ROCE_V2_OPCODE_SEND_WITH_INV: 2246 case HNS_ROCE_V2_OPCODE_SEND_WITH_INV:
1946 wc->opcode = IB_WC_RECV; 2247 wc->opcode = IB_WC_RECV;
@@ -2273,10 +2574,10 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
2273 V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); 2574 V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
2274 2575
2275 /* No VLAN need to set 0xFFF */ 2576 /* No VLAN need to set 0xFFF */
2276 roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_IDX_M, 2577 roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
2277 V2_QPC_BYTE_24_VLAN_IDX_S, 0xfff); 2578 V2_QPC_BYTE_24_VLAN_ID_S, 0xfff);
2278 roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_IDX_M, 2579 roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
2279 V2_QPC_BYTE_24_VLAN_IDX_S, 0); 2580 V2_QPC_BYTE_24_VLAN_ID_S, 0);
2280 2581
2281 /* 2582 /*
2282 * Set some fields in context to zero, Because the default values 2583 * Set some fields in context to zero, Because the default values
@@ -2886,21 +3187,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
2886 roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, 3187 roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M,
2887 V2_QPC_BYTE_56_LP_PKTN_INI_S, 0); 3188 V2_QPC_BYTE_56_LP_PKTN_INI_S, 0);
2888 3189
2889 roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M,
2890 V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit);
2891 roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M,
2892 V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
2893
2894 roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
2895 V2_QPC_BYTE_28_FL_S, grh->flow_label);
2896 roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
2897 V2_QPC_BYTE_28_FL_S, 0);
2898
2899 roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
2900 V2_QPC_BYTE_24_TC_S, grh->traffic_class);
2901 roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
2902 V2_QPC_BYTE_24_TC_S, 0);
2903
2904 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD) 3190 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD)
2905 roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, 3191 roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
2906 V2_QPC_BYTE_24_MTU_S, IB_MTU_4096); 3192 V2_QPC_BYTE_24_MTU_S, IB_MTU_4096);
@@ -2911,9 +3197,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
2911 roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, 3197 roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
2912 V2_QPC_BYTE_24_MTU_S, 0); 3198 V2_QPC_BYTE_24_MTU_S, 0);
2913 3199
2914 memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
2915 memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
2916
2917 roce_set_field(context->byte_84_rq_ci_pi, 3200 roce_set_field(context->byte_84_rq_ci_pi,
2918 V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, 3201 V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
2919 V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, hr_qp->rq.head); 3202 V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, hr_qp->rq.head);
@@ -2952,12 +3235,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
2952 V2_QPC_BYTE_168_LP_SGEN_INI_M, 3235 V2_QPC_BYTE_168_LP_SGEN_INI_M,
2953 V2_QPC_BYTE_168_LP_SGEN_INI_S, 0); 3236 V2_QPC_BYTE_168_LP_SGEN_INI_S, 0);
2954 3237
2955 roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
2956 V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr));
2957 roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
2958 V2_QPC_BYTE_28_SL_S, 0);
2959 hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
2960
2961 return 0; 3238 return 0;
2962} 3239}
2963 3240
@@ -3135,13 +3412,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
3135 V2_QPC_BYTE_28_AT_S, 0); 3412 V2_QPC_BYTE_28_AT_S, 0);
3136 } 3413 }
3137 3414
3138 roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
3139 V2_QPC_BYTE_28_SL_S,
3140 rdma_ah_get_sl(&attr->ah_attr));
3141 roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
3142 V2_QPC_BYTE_28_SL_S, 0);
3143 hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
3144
3145 roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, 3415 roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M,
3146 V2_QPC_BYTE_172_SQ_CUR_PSN_S, attr->sq_psn); 3416 V2_QPC_BYTE_172_SQ_CUR_PSN_S, attr->sq_psn);
3147 roce_set_field(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, 3417 roce_set_field(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M,
@@ -3224,9 +3494,114 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
3224 ; 3494 ;
3225 } else { 3495 } else {
3226 dev_err(dev, "Illegal state for QP!\n"); 3496 dev_err(dev, "Illegal state for QP!\n");
3497 ret = -EINVAL;
3227 goto out; 3498 goto out;
3228 } 3499 }
3229 3500
3501 /* When QP state is err, SQ and RQ WQE should be flushed */
3502 if (new_state == IB_QPS_ERR) {
3503 roce_set_field(context->byte_160_sq_ci_pi,
3504 V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
3505 V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S,
3506 hr_qp->sq.head);
3507 roce_set_field(qpc_mask->byte_160_sq_ci_pi,
3508 V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
3509 V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0);
3510 roce_set_field(context->byte_84_rq_ci_pi,
3511 V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
3512 V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S,
3513 hr_qp->rq.head);
3514 roce_set_field(qpc_mask->byte_84_rq_ci_pi,
3515 V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
3516 V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0);
3517 }
3518
3519 if (attr_mask & IB_QP_AV) {
3520 const struct ib_global_route *grh =
3521 rdma_ah_read_grh(&attr->ah_attr);
3522 const struct ib_gid_attr *gid_attr = NULL;
3523 u8 src_mac[ETH_ALEN];
3524 int is_roce_protocol;
3525 u16 vlan = 0xffff;
3526 u8 ib_port;
3527 u8 hr_port;
3528
3529 ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num :
3530 hr_qp->port + 1;
3531 hr_port = ib_port - 1;
3532 is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) &&
3533 rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
3534
3535 if (is_roce_protocol) {
3536 gid_attr = attr->ah_attr.grh.sgid_attr;
3537 vlan = rdma_vlan_dev_vlan_id(gid_attr->ndev);
3538 memcpy(src_mac, gid_attr->ndev->dev_addr, ETH_ALEN);
3539 }
3540
3541 roce_set_field(context->byte_24_mtu_tc,
3542 V2_QPC_BYTE_24_VLAN_ID_M,
3543 V2_QPC_BYTE_24_VLAN_ID_S, vlan);
3544 roce_set_field(qpc_mask->byte_24_mtu_tc,
3545 V2_QPC_BYTE_24_VLAN_ID_M,
3546 V2_QPC_BYTE_24_VLAN_ID_S, 0);
3547
3548 if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) {
3549 dev_err(hr_dev->dev,
3550 "sgid_index(%u) too large. max is %d\n",
3551 grh->sgid_index,
3552 hr_dev->caps.gid_table_len[hr_port]);
3553 ret = -EINVAL;
3554 goto out;
3555 }
3556
3557 if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) {
3558 dev_err(hr_dev->dev, "ah attr is not RDMA roce type\n");
3559 ret = -EINVAL;
3560 goto out;
3561 }
3562
3563 roce_set_field(context->byte_52_udpspn_dmac,
3564 V2_QPC_BYTE_52_UDPSPN_M, V2_QPC_BYTE_52_UDPSPN_S,
3565 (gid_attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) ?
3566 0 : 0x12b7);
3567
3568 roce_set_field(qpc_mask->byte_52_udpspn_dmac,
3569 V2_QPC_BYTE_52_UDPSPN_M,
3570 V2_QPC_BYTE_52_UDPSPN_S, 0);
3571
3572 roce_set_field(context->byte_20_smac_sgid_idx,
3573 V2_QPC_BYTE_20_SGID_IDX_M,
3574 V2_QPC_BYTE_20_SGID_IDX_S, grh->sgid_index);
3575
3576 roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
3577 V2_QPC_BYTE_20_SGID_IDX_M,
3578 V2_QPC_BYTE_20_SGID_IDX_S, 0);
3579
3580 roce_set_field(context->byte_24_mtu_tc,
3581 V2_QPC_BYTE_24_HOP_LIMIT_M,
3582 V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit);
3583 roce_set_field(qpc_mask->byte_24_mtu_tc,
3584 V2_QPC_BYTE_24_HOP_LIMIT_M,
3585 V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
3586
3587 roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
3588 V2_QPC_BYTE_24_TC_S, grh->traffic_class);
3589 roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
3590 V2_QPC_BYTE_24_TC_S, 0);
3591 roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
3592 V2_QPC_BYTE_28_FL_S, grh->flow_label);
3593 roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
3594 V2_QPC_BYTE_28_FL_S, 0);
3595 memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
3596 memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
3597 roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
3598 V2_QPC_BYTE_28_SL_S,
3599 rdma_ah_get_sl(&attr->ah_attr));
3600 roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
3601 V2_QPC_BYTE_28_SL_S, 0);
3602 hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
3603 }
3604
3230 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) 3605 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
3231 set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); 3606 set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
3232 3607
@@ -3497,6 +3872,11 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
3497 hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); 3872 hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
3498 3873
3499 if (is_user) { 3874 if (is_user) {
3875 if (hr_qp->sq.wqe_cnt && (hr_qp->sdb_en == 1))
3876 hns_roce_db_unmap_user(
3877 to_hr_ucontext(hr_qp->ibqp.uobject->context),
3878 &hr_qp->sdb);
3879
3500 if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1)) 3880 if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1))
3501 hns_roce_db_unmap_user( 3881 hns_roce_db_unmap_user(
3502 to_hr_ucontext(hr_qp->ibqp.uobject->context), 3882 to_hr_ucontext(hr_qp->ibqp.uobject->context),
@@ -3579,6 +3959,74 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
3579 return ret; 3959 return ret;
3580} 3960}
3581 3961
3962static void hns_roce_set_qps_to_err(struct hns_roce_dev *hr_dev, u32 qpn)
3963{
3964 struct hns_roce_qp *hr_qp;
3965 struct ib_qp_attr attr;
3966 int attr_mask;
3967 int ret;
3968
3969 hr_qp = __hns_roce_qp_lookup(hr_dev, qpn);
3970 if (!hr_qp) {
3971 dev_warn(hr_dev->dev, "no hr_qp can be found!\n");
3972 return;
3973 }
3974
3975 if (hr_qp->ibqp.uobject) {
3976 if (hr_qp->sdb_en == 1) {
3977 hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr);
3978 hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
3979 } else {
3980 dev_warn(hr_dev->dev, "flush cqe is unsupported in userspace!\n");
3981 return;
3982 }
3983 }
3984
3985 attr_mask = IB_QP_STATE;
3986 attr.qp_state = IB_QPS_ERR;
3987 ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr, attr_mask,
3988 hr_qp->state, IB_QPS_ERR);
3989 if (ret)
3990 dev_err(hr_dev->dev, "failed to modify qp %d to err state.\n",
3991 qpn);
3992}
3993
3994static void hns_roce_irq_work_handle(struct work_struct *work)
3995{
3996 struct hns_roce_work *irq_work =
3997 container_of(work, struct hns_roce_work, work);
3998 u32 qpn = irq_work->qpn;
3999
4000 switch (irq_work->event_type) {
4001 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
4002 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
4003 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
4004 hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
4005 break;
4006 default:
4007 break;
4008 }
4009
4010 kfree(irq_work);
4011}
4012
4013static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
4014 struct hns_roce_eq *eq, u32 qpn)
4015{
4016 struct hns_roce_work *irq_work;
4017
4018 irq_work = kzalloc(sizeof(struct hns_roce_work), GFP_ATOMIC);
4019 if (!irq_work)
4020 return;
4021
4022 INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle);
4023 irq_work->hr_dev = hr_dev;
4024 irq_work->qpn = qpn;
4025 irq_work->event_type = eq->event_type;
4026 irq_work->sub_type = eq->sub_type;
4027 queue_work(hr_dev->irq_workq, &(irq_work->work));
4028}
4029
3582static void set_eq_cons_index_v2(struct hns_roce_eq *eq) 4030static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
3583{ 4031{
3584 u32 doorbell[2]; 4032 u32 doorbell[2];
@@ -3681,14 +4129,9 @@ static void hns_roce_v2_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
3681 4129
3682static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev, 4130static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev,
3683 struct hns_roce_aeqe *aeqe, 4131 struct hns_roce_aeqe *aeqe,
3684 int event_type) 4132 int event_type, u32 qpn)
3685{ 4133{
3686 struct device *dev = hr_dev->dev; 4134 struct device *dev = hr_dev->dev;
3687 u32 qpn;
3688
3689 qpn = roce_get_field(aeqe->event.qp_event.qp,
3690 HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
3691 HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
3692 4135
3693 switch (event_type) { 4136 switch (event_type) {
3694 case HNS_ROCE_EVENT_TYPE_COMM_EST: 4137 case HNS_ROCE_EVENT_TYPE_COMM_EST:
@@ -3715,14 +4158,9 @@ static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev,
3715 4158
3716static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev, 4159static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev,
3717 struct hns_roce_aeqe *aeqe, 4160 struct hns_roce_aeqe *aeqe,
3718 int event_type) 4161 int event_type, u32 cqn)
3719{ 4162{
3720 struct device *dev = hr_dev->dev; 4163 struct device *dev = hr_dev->dev;
3721 u32 cqn;
3722
3723 cqn = roce_get_field(aeqe->event.cq_event.cq,
3724 HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
3725 HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
3726 4164
3727 switch (event_type) { 4165 switch (event_type) {
3728 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: 4166 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
@@ -3787,6 +4225,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
3787 struct hns_roce_aeqe *aeqe; 4225 struct hns_roce_aeqe *aeqe;
3788 int aeqe_found = 0; 4226 int aeqe_found = 0;
3789 int event_type; 4227 int event_type;
4228 int sub_type;
4229 u32 qpn;
4230 u32 cqn;
3790 4231
3791 while ((aeqe = next_aeqe_sw_v2(eq))) { 4232 while ((aeqe = next_aeqe_sw_v2(eq))) {
3792 4233
@@ -3798,6 +4239,15 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
3798 event_type = roce_get_field(aeqe->asyn, 4239 event_type = roce_get_field(aeqe->asyn,
3799 HNS_ROCE_V2_AEQE_EVENT_TYPE_M, 4240 HNS_ROCE_V2_AEQE_EVENT_TYPE_M,
3800 HNS_ROCE_V2_AEQE_EVENT_TYPE_S); 4241 HNS_ROCE_V2_AEQE_EVENT_TYPE_S);
4242 sub_type = roce_get_field(aeqe->asyn,
4243 HNS_ROCE_V2_AEQE_SUB_TYPE_M,
4244 HNS_ROCE_V2_AEQE_SUB_TYPE_S);
4245 qpn = roce_get_field(aeqe->event.qp_event.qp,
4246 HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
4247 HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
4248 cqn = roce_get_field(aeqe->event.cq_event.cq,
4249 HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
4250 HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
3801 4251
3802 switch (event_type) { 4252 switch (event_type) {
3803 case HNS_ROCE_EVENT_TYPE_PATH_MIG: 4253 case HNS_ROCE_EVENT_TYPE_PATH_MIG:
@@ -3811,7 +4261,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
3811 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: 4261 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
3812 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: 4262 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
3813 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: 4263 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
3814 hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type); 4264 hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type,
4265 qpn);
3815 break; 4266 break;
3816 case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: 4267 case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
3817 case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: 4268 case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
@@ -3820,7 +4271,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
3820 break; 4271 break;
3821 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: 4272 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
3822 case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: 4273 case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
3823 hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type); 4274 hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type,
4275 cqn);
3824 break; 4276 break;
3825 case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: 4277 case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
3826 dev_warn(dev, "DB overflow.\n"); 4278 dev_warn(dev, "DB overflow.\n");
@@ -3843,6 +4295,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
3843 break; 4295 break;
3844 }; 4296 };
3845 4297
4298 eq->event_type = event_type;
4299 eq->sub_type = sub_type;
3846 ++eq->cons_index; 4300 ++eq->cons_index;
3847 aeqe_found = 1; 4301 aeqe_found = 1;
3848 4302
@@ -3850,6 +4304,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
3850 dev_warn(dev, "cons_index overflow, set back to 0.\n"); 4304 dev_warn(dev, "cons_index overflow, set back to 0.\n");
3851 eq->cons_index = 0; 4305 eq->cons_index = 0;
3852 } 4306 }
4307 hns_roce_v2_init_irq_work(hr_dev, eq, qpn);
3853 } 4308 }
3854 4309
3855 set_eq_cons_index_v2(eq); 4310 set_eq_cons_index_v2(eq);
@@ -4052,15 +4507,12 @@ static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev,
4052 u32 bt_chk_sz; 4507 u32 bt_chk_sz;
4053 u32 mhop_num; 4508 u32 mhop_num;
4054 int eqe_alloc; 4509 int eqe_alloc;
4055 int ba_num;
4056 int i = 0; 4510 int i = 0;
4057 int j = 0; 4511 int j = 0;
4058 4512
4059 mhop_num = hr_dev->caps.eqe_hop_num; 4513 mhop_num = hr_dev->caps.eqe_hop_num;
4060 buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT); 4514 buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
4061 bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT); 4515 bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
4062 ba_num = (PAGE_ALIGN(eq->entries * eq->eqe_size) + buf_chk_sz - 1) /
4063 buf_chk_sz;
4064 4516
4065 /* hop_num = 0 */ 4517 /* hop_num = 0 */
4066 if (mhop_num == HNS_ROCE_HOP_NUM_0) { 4518 if (mhop_num == HNS_ROCE_HOP_NUM_0) {
@@ -4669,6 +5121,13 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
4669 } 5121 }
4670 } 5122 }
4671 5123
5124 hr_dev->irq_workq =
5125 create_singlethread_workqueue("hns_roce_irq_workqueue");
5126 if (!hr_dev->irq_workq) {
5127 dev_err(dev, "Create irq workqueue failed!\n");
5128 goto err_request_irq_fail;
5129 }
5130
4672 return 0; 5131 return 0;
4673 5132
4674err_request_irq_fail: 5133err_request_irq_fail:
@@ -4719,12 +5178,17 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
4719 kfree(hr_dev->irq_names[i]); 5178 kfree(hr_dev->irq_names[i]);
4720 5179
4721 kfree(eq_table->eq); 5180 kfree(eq_table->eq);
5181
5182 flush_workqueue(hr_dev->irq_workq);
5183 destroy_workqueue(hr_dev->irq_workq);
4722} 5184}
4723 5185
4724static const struct hns_roce_hw hns_roce_hw_v2 = { 5186static const struct hns_roce_hw hns_roce_hw_v2 = {
4725 .cmq_init = hns_roce_v2_cmq_init, 5187 .cmq_init = hns_roce_v2_cmq_init,
4726 .cmq_exit = hns_roce_v2_cmq_exit, 5188 .cmq_exit = hns_roce_v2_cmq_exit,
4727 .hw_profile = hns_roce_v2_profile, 5189 .hw_profile = hns_roce_v2_profile,
5190 .hw_init = hns_roce_v2_init,
5191 .hw_exit = hns_roce_v2_exit,
4728 .post_mbox = hns_roce_v2_post_mbox, 5192 .post_mbox = hns_roce_v2_post_mbox,
4729 .chk_mbox = hns_roce_v2_chk_mbox, 5193 .chk_mbox = hns_roce_v2_chk_mbox,
4730 .set_gid = hns_roce_v2_set_gid, 5194 .set_gid = hns_roce_v2_set_gid,
@@ -4749,6 +5213,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
4749static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { 5213static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
4750 {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0}, 5214 {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0},
4751 {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0}, 5215 {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0},
5216 {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
5217 {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
4752 {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0}, 5218 {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
4753 /* required last entry */ 5219 /* required last entry */
4754 {0, } 5220 {0, }
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index d47675f365c7..14aa308befef 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -112,6 +112,9 @@
112 (step_idx == 1 && hop_num == 1) || \ 112 (step_idx == 1 && hop_num == 1) || \
113 (step_idx == 2 && hop_num == 2)) 113 (step_idx == 2 && hop_num == 2))
114 114
115#define CMD_CSQ_DESC_NUM 1024
116#define CMD_CRQ_DESC_NUM 1024
117
115enum { 118enum {
116 NO_ARMED = 0x0, 119 NO_ARMED = 0x0,
117 REG_NXT_CEQE = 0x2, 120 REG_NXT_CEQE = 0x2,
@@ -203,6 +206,10 @@ enum hns_roce_opcode_type {
203 HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004, 206 HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004,
204 HNS_ROCE_OPC_QUERY_PF_RES = 0x8400, 207 HNS_ROCE_OPC_QUERY_PF_RES = 0x8400,
205 HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401, 208 HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401,
209 HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403,
210 HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404,
211 HNS_ROCE_OPC_CFG_SGID_TB = 0x8500,
212 HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501,
206 HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506, 213 HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506,
207}; 214};
208 215
@@ -447,8 +454,8 @@ struct hns_roce_v2_qp_context {
447#define V2_QPC_BYTE_24_TC_S 8 454#define V2_QPC_BYTE_24_TC_S 8
448#define V2_QPC_BYTE_24_TC_M GENMASK(15, 8) 455#define V2_QPC_BYTE_24_TC_M GENMASK(15, 8)
449 456
450#define V2_QPC_BYTE_24_VLAN_IDX_S 16 457#define V2_QPC_BYTE_24_VLAN_ID_S 16
451#define V2_QPC_BYTE_24_VLAN_IDX_M GENMASK(27, 16) 458#define V2_QPC_BYTE_24_VLAN_ID_M GENMASK(27, 16)
452 459
453#define V2_QPC_BYTE_24_MTU_S 28 460#define V2_QPC_BYTE_24_MTU_S 28
454#define V2_QPC_BYTE_24_MTU_M GENMASK(31, 28) 461#define V2_QPC_BYTE_24_MTU_M GENMASK(31, 28)
@@ -768,7 +775,7 @@ struct hns_roce_v2_cqe {
768 __le32 byte_4; 775 __le32 byte_4;
769 union { 776 union {
770 __le32 rkey; 777 __le32 rkey;
771 __be32 immtdata; 778 __le32 immtdata;
772 }; 779 };
773 __le32 byte_12; 780 __le32 byte_12;
774 __le32 byte_16; 781 __le32 byte_16;
@@ -926,7 +933,7 @@ struct hns_roce_v2_cq_db {
926struct hns_roce_v2_ud_send_wqe { 933struct hns_roce_v2_ud_send_wqe {
927 __le32 byte_4; 934 __le32 byte_4;
928 __le32 msg_len; 935 __le32 msg_len;
929 __be32 immtdata; 936 __le32 immtdata;
930 __le32 byte_16; 937 __le32 byte_16;
931 __le32 byte_20; 938 __le32 byte_20;
932 __le32 byte_24; 939 __le32 byte_24;
@@ -1012,7 +1019,7 @@ struct hns_roce_v2_rc_send_wqe {
1012 __le32 msg_len; 1019 __le32 msg_len;
1013 union { 1020 union {
1014 __le32 inv_key; 1021 __le32 inv_key;
1015 __be32 immtdata; 1022 __le32 immtdata;
1016 }; 1023 };
1017 __le32 byte_16; 1024 __le32 byte_16;
1018 __le32 byte_20; 1025 __le32 byte_20;
@@ -1061,6 +1068,40 @@ struct hns_roce_query_version {
1061 __le32 rsv[5]; 1068 __le32 rsv[5];
1062}; 1069};
1063 1070
1071struct hns_roce_cfg_llm_a {
1072 __le32 base_addr_l;
1073 __le32 base_addr_h;
1074 __le32 depth_pgsz_init_en;
1075 __le32 head_ba_l;
1076 __le32 head_ba_h_nxtptr;
1077 __le32 head_ptr;
1078};
1079
1080#define CFG_LLM_QUE_DEPTH_S 0
1081#define CFG_LLM_QUE_DEPTH_M GENMASK(12, 0)
1082
1083#define CFG_LLM_QUE_PGSZ_S 16
1084#define CFG_LLM_QUE_PGSZ_M GENMASK(19, 16)
1085
1086#define CFG_LLM_INIT_EN_S 20
1087#define CFG_LLM_INIT_EN_M GENMASK(20, 20)
1088
1089#define CFG_LLM_HEAD_PTR_S 0
1090#define CFG_LLM_HEAD_PTR_M GENMASK(11, 0)
1091
1092struct hns_roce_cfg_llm_b {
1093 __le32 tail_ba_l;
1094 __le32 tail_ba_h;
1095 __le32 tail_ptr;
1096 __le32 rsv[3];
1097};
1098
1099#define CFG_LLM_TAIL_BA_H_S 0
1100#define CFG_LLM_TAIL_BA_H_M GENMASK(19, 0)
1101
1102#define CFG_LLM_TAIL_PTR_S 0
1103#define CFG_LLM_TAIL_PTR_M GENMASK(11, 0)
1104
1064struct hns_roce_cfg_global_param { 1105struct hns_roce_cfg_global_param {
1065 __le32 time_cfg_udp_port; 1106 __le32 time_cfg_udp_port;
1066 __le32 rsv[5]; 1107 __le32 rsv[5];
@@ -1072,7 +1113,7 @@ struct hns_roce_cfg_global_param {
1072#define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S 16 1113#define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S 16
1073#define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_M GENMASK(31, 16) 1114#define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_M GENMASK(31, 16)
1074 1115
1075struct hns_roce_pf_res { 1116struct hns_roce_pf_res_a {
1076 __le32 rsv; 1117 __le32 rsv;
1077 __le32 qpc_bt_idx_num; 1118 __le32 qpc_bt_idx_num;
1078 __le32 srqc_bt_idx_num; 1119 __le32 srqc_bt_idx_num;
@@ -1111,6 +1152,32 @@ struct hns_roce_pf_res {
1111#define PF_RES_DATA_5_PF_EQC_BT_NUM_S 16 1152#define PF_RES_DATA_5_PF_EQC_BT_NUM_S 16
1112#define PF_RES_DATA_5_PF_EQC_BT_NUM_M GENMASK(25, 16) 1153#define PF_RES_DATA_5_PF_EQC_BT_NUM_M GENMASK(25, 16)
1113 1154
1155struct hns_roce_pf_res_b {
1156 __le32 rsv0;
1157 __le32 smac_idx_num;
1158 __le32 sgid_idx_num;
1159 __le32 qid_idx_sl_num;
1160 __le32 rsv[2];
1161};
1162
1163#define PF_RES_DATA_1_PF_SMAC_IDX_S 0
1164#define PF_RES_DATA_1_PF_SMAC_IDX_M GENMASK(7, 0)
1165
1166#define PF_RES_DATA_1_PF_SMAC_NUM_S 8
1167#define PF_RES_DATA_1_PF_SMAC_NUM_M GENMASK(16, 8)
1168
1169#define PF_RES_DATA_2_PF_SGID_IDX_S 0
1170#define PF_RES_DATA_2_PF_SGID_IDX_M GENMASK(7, 0)
1171
1172#define PF_RES_DATA_2_PF_SGID_NUM_S 8
1173#define PF_RES_DATA_2_PF_SGID_NUM_M GENMASK(16, 8)
1174
1175#define PF_RES_DATA_3_PF_QID_IDX_S 0
1176#define PF_RES_DATA_3_PF_QID_IDX_M GENMASK(9, 0)
1177
1178#define PF_RES_DATA_3_PF_SL_NUM_S 16
1179#define PF_RES_DATA_3_PF_SL_NUM_M GENMASK(26, 16)
1180
1114struct hns_roce_vf_res_a { 1181struct hns_roce_vf_res_a {
1115 __le32 vf_id; 1182 __le32 vf_id;
1116 __le32 vf_qpc_bt_idx_num; 1183 __le32 vf_qpc_bt_idx_num;
@@ -1179,13 +1246,6 @@ struct hns_roce_vf_res_b {
1179#define VF_RES_B_DATA_3_VF_SL_NUM_S 16 1246#define VF_RES_B_DATA_3_VF_SL_NUM_S 16
1180#define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16) 1247#define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16)
1181 1248
1182/* Reg field definition */
1183#define ROCEE_VF_SMAC_CFG1_VF_SMAC_H_S 0
1184#define ROCEE_VF_SMAC_CFG1_VF_SMAC_H_M GENMASK(15, 0)
1185
1186#define ROCEE_VF_SGID_CFG4_SGID_TYPE_S 0
1187#define ROCEE_VF_SGID_CFG4_SGID_TYPE_M GENMASK(1, 0)
1188
1189struct hns_roce_cfg_bt_attr { 1249struct hns_roce_cfg_bt_attr {
1190 __le32 vf_qpc_cfg; 1250 __le32 vf_qpc_cfg;
1191 __le32 vf_srqc_cfg; 1251 __le32 vf_srqc_cfg;
@@ -1230,6 +1290,32 @@ struct hns_roce_cfg_bt_attr {
1230#define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S 8 1290#define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S 8
1231#define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_M GENMASK(9, 8) 1291#define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_M GENMASK(9, 8)
1232 1292
1293struct hns_roce_cfg_sgid_tb {
1294 __le32 table_idx_rsv;
1295 __le32 vf_sgid_l;
1296 __le32 vf_sgid_ml;
1297 __le32 vf_sgid_mh;
1298 __le32 vf_sgid_h;
1299 __le32 vf_sgid_type_rsv;
1300};
1301#define CFG_SGID_TB_TABLE_IDX_S 0
1302#define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0)
1303
1304#define CFG_SGID_TB_VF_SGID_TYPE_S 0
1305#define CFG_SGID_TB_VF_SGID_TYPE_M GENMASK(1, 0)
1306
1307struct hns_roce_cfg_smac_tb {
1308 __le32 tb_idx_rsv;
1309 __le32 vf_smac_l;
1310 __le32 vf_smac_h_rsv;
1311 __le32 rsv[3];
1312};
1313#define CFG_SMAC_TB_IDX_S 0
1314#define CFG_SMAC_TB_IDX_M GENMASK(7, 0)
1315
1316#define CFG_SMAC_TB_VF_SMAC_H_S 0
1317#define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0)
1318
1233struct hns_roce_cmq_desc { 1319struct hns_roce_cmq_desc {
1234 __le16 opcode; 1320 __le16 opcode;
1235 __le16 flag; 1321 __le16 flag;
@@ -1276,8 +1362,32 @@ struct hns_roce_v2_cmq {
1276 u16 last_status; 1362 u16 last_status;
1277}; 1363};
1278 1364
1365enum hns_roce_link_table_type {
1366 TSQ_LINK_TABLE,
1367 TPQ_LINK_TABLE,
1368};
1369
1370struct hns_roce_link_table {
1371 struct hns_roce_buf_list table;
1372 struct hns_roce_buf_list *pg_list;
1373 u32 npages;
1374 u32 pg_sz;
1375};
1376
1377struct hns_roce_link_table_entry {
1378 u32 blk_ba0;
1379 u32 blk_ba1_nxt_ptr;
1380};
1381#define HNS_ROCE_LINK_TABLE_BA1_S 0
1382#define HNS_ROCE_LINK_TABLE_BA1_M GENMASK(19, 0)
1383
1384#define HNS_ROCE_LINK_TABLE_NXT_PTR_S 20
1385#define HNS_ROCE_LINK_TABLE_NXT_PTR_M GENMASK(31, 20)
1386
1279struct hns_roce_v2_priv { 1387struct hns_roce_v2_priv {
1280 struct hns_roce_v2_cmq cmq; 1388 struct hns_roce_v2_cmq cmq;
1389 struct hns_roce_link_table tsq;
1390 struct hns_roce_link_table tpq;
1281}; 1391};
1282 1392
1283struct hns_roce_eq_context { 1393struct hns_roce_eq_context {
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 21b901cfa2d6..c5cae9a38c04 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -74,8 +74,7 @@ static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
74 return hr_dev->hw->set_mac(hr_dev, phy_port, addr); 74 return hr_dev->hw->set_mac(hr_dev, phy_port, addr);
75} 75}
76 76
77static int hns_roce_add_gid(const union ib_gid *gid, 77static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context)
78 const struct ib_gid_attr *attr, void **context)
79{ 78{
80 struct hns_roce_dev *hr_dev = to_hr_dev(attr->device); 79 struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
81 u8 port = attr->port_num - 1; 80 u8 port = attr->port_num - 1;
@@ -87,8 +86,7 @@ static int hns_roce_add_gid(const union ib_gid *gid,
87 86
88 spin_lock_irqsave(&hr_dev->iboe.lock, flags); 87 spin_lock_irqsave(&hr_dev->iboe.lock, flags);
89 88
90 ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, 89 ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &attr->gid, attr);
91 (union ib_gid *)gid, attr);
92 90
93 spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); 91 spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
94 92
@@ -208,7 +206,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
208 props->max_qp_wr = hr_dev->caps.max_wqes; 206 props->max_qp_wr = hr_dev->caps.max_wqes;
209 props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT | 207 props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
210 IB_DEVICE_RC_RNR_NAK_GEN; 208 IB_DEVICE_RC_RNR_NAK_GEN;
211 props->max_sge = max(hr_dev->caps.max_sq_sg, hr_dev->caps.max_rq_sg); 209 props->max_send_sge = hr_dev->caps.max_sq_sg;
210 props->max_recv_sge = hr_dev->caps.max_rq_sg;
212 props->max_sge_rd = 1; 211 props->max_sge_rd = 1;
213 props->max_cq = hr_dev->caps.num_cqs; 212 props->max_cq = hr_dev->caps.num_cqs;
214 props->max_cqe = hr_dev->caps.max_cqes; 213 props->max_cqe = hr_dev->caps.max_cqes;
@@ -535,6 +534,9 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
535 (1ULL << IB_USER_VERBS_CMD_QUERY_QP) | 534 (1ULL << IB_USER_VERBS_CMD_QUERY_QP) |
536 (1ULL << IB_USER_VERBS_CMD_DESTROY_QP); 535 (1ULL << IB_USER_VERBS_CMD_DESTROY_QP);
537 536
537 ib_dev->uverbs_ex_cmd_mask |=
538 (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
539
538 /* HCA||device||port */ 540 /* HCA||device||port */
539 ib_dev->modify_device = hns_roce_modify_device; 541 ib_dev->modify_device = hns_roce_modify_device;
540 ib_dev->query_device = hns_roce_query_device; 542 ib_dev->query_device = hns_roce_query_device;
@@ -887,8 +889,7 @@ error_failed_cmd_init:
887 889
888error_failed_cmq_init: 890error_failed_cmq_init:
889 if (hr_dev->hw->reset) { 891 if (hr_dev->hw->reset) {
890 ret = hr_dev->hw->reset(hr_dev, false); 892 if (hr_dev->hw->reset(hr_dev, false))
891 if (ret)
892 dev_err(dev, "Dereset RoCE engine failed!\n"); 893 dev_err(dev, "Dereset RoCE engine failed!\n");
893 } 894 }
894 895
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index b9f2c871ff9a..e11c149da04d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -37,7 +37,7 @@
37 37
38static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn) 38static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn)
39{ 39{
40 return hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, pdn); 40 return hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, pdn) ? -ENOMEM : 0;
41} 41}
42 42
43static void hns_roce_pd_free(struct hns_roce_dev *hr_dev, unsigned long pdn) 43static void hns_roce_pd_free(struct hns_roce_dev *hr_dev, unsigned long pdn)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index baaf906f7c2e..efb7e961ca65 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -115,7 +115,10 @@ static int hns_roce_reserve_range_qp(struct hns_roce_dev *hr_dev, int cnt,
115{ 115{
116 struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; 116 struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
117 117
118 return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align, base); 118 return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align,
119 base) ?
120 -ENOMEM :
121 0;
119} 122}
120 123
121enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state) 124enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state)
@@ -489,6 +492,14 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
489 return 0; 492 return 0;
490} 493}
491 494
495static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr)
496{
497 if (attr->qp_type == IB_QPT_XRC_TGT)
498 return 0;
499
500 return 1;
501}
502
492static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr) 503static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr)
493{ 504{
494 if (attr->qp_type == IB_QPT_XRC_INI || 505 if (attr->qp_type == IB_QPT_XRC_INI ||
@@ -613,6 +624,23 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
613 goto err_mtt; 624 goto err_mtt;
614 } 625 }
615 626
627 if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) &&
628 (udata->inlen >= sizeof(ucmd)) &&
629 (udata->outlen >= sizeof(resp)) &&
630 hns_roce_qp_has_sq(init_attr)) {
631 ret = hns_roce_db_map_user(
632 to_hr_ucontext(ib_pd->uobject->context),
633 ucmd.sdb_addr, &hr_qp->sdb);
634 if (ret) {
635 dev_err(dev, "sq record doorbell map failed!\n");
636 goto err_mtt;
637 }
638
639 /* indicate kernel supports sq record db */
640 resp.cap_flags |= HNS_ROCE_SUPPORT_SQ_RECORD_DB;
641 hr_qp->sdb_en = 1;
642 }
643
616 if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && 644 if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
617 (udata->outlen >= sizeof(resp)) && 645 (udata->outlen >= sizeof(resp)) &&
618 hns_roce_qp_has_rq(init_attr)) { 646 hns_roce_qp_has_rq(init_attr)) {
@@ -621,7 +649,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
621 ucmd.db_addr, &hr_qp->rdb); 649 ucmd.db_addr, &hr_qp->rdb);
622 if (ret) { 650 if (ret) {
623 dev_err(dev, "rq record doorbell map failed!\n"); 651 dev_err(dev, "rq record doorbell map failed!\n");
624 goto err_mtt; 652 goto err_sq_dbmap;
625 } 653 }
626 } 654 }
627 } else { 655 } else {
@@ -734,7 +762,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
734 if (ib_pd->uobject && (udata->outlen >= sizeof(resp)) && 762 if (ib_pd->uobject && (udata->outlen >= sizeof(resp)) &&
735 (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) { 763 (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) {
736 764
737 /* indicate kernel supports record db */ 765 /* indicate kernel supports rq record db */
738 resp.cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB; 766 resp.cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB;
739 ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); 767 ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
740 if (ret) 768 if (ret)
@@ -770,6 +798,16 @@ err_wrid:
770 kfree(hr_qp->rq.wrid); 798 kfree(hr_qp->rq.wrid);
771 } 799 }
772 800
801err_sq_dbmap:
802 if (ib_pd->uobject)
803 if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) &&
804 (udata->inlen >= sizeof(ucmd)) &&
805 (udata->outlen >= sizeof(resp)) &&
806 hns_roce_qp_has_sq(init_attr))
807 hns_roce_db_unmap_user(
808 to_hr_ucontext(ib_pd->uobject->context),
809 &hr_qp->sdb);
810
773err_mtt: 811err_mtt:
774 hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); 812 hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
775 813
@@ -903,6 +941,17 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
903 new_state = attr_mask & IB_QP_STATE ? 941 new_state = attr_mask & IB_QP_STATE ?
904 attr->qp_state : cur_state; 942 attr->qp_state : cur_state;
905 943
944 if (ibqp->uobject &&
945 (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) {
946 if (hr_qp->sdb_en == 1) {
947 hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr);
948 hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
949 } else {
950 dev_warn(dev, "flush cqe is not supported in userspace!\n");
951 goto out;
952 }
953 }
954
906 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, 955 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
907 IB_LINK_LAYER_ETHERNET)) { 956 IB_LINK_LAYER_ETHERNET)) {
908 dev_err(dev, "ib_modify_qp_is_ok failed\n"); 957 dev_err(dev, "ib_modify_qp_is_ok failed\n");
diff --git a/drivers/infiniband/hw/i40iw/Kconfig b/drivers/infiniband/hw/i40iw/Kconfig
index 2962979c06e9..d867ef1ac72a 100644
--- a/drivers/infiniband/hw/i40iw/Kconfig
+++ b/drivers/infiniband/hw/i40iw/Kconfig
@@ -1,6 +1,7 @@
1config INFINIBAND_I40IW 1config INFINIBAND_I40IW
2 tristate "Intel(R) Ethernet X722 iWARP Driver" 2 tristate "Intel(R) Ethernet X722 iWARP Driver"
3 depends on INET && I40E 3 depends on INET && I40E
4 depends on IPV6 || !IPV6
4 depends on PCI 5 depends on PCI
5 select GENERIC_ALLOCATOR 6 select GENERIC_ALLOCATOR
6 ---help--- 7 ---help---
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 7b2655128b9f..423818a7d333 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -57,6 +57,7 @@
57#include <net/addrconf.h> 57#include <net/addrconf.h>
58#include <net/ip6_route.h> 58#include <net/ip6_route.h>
59#include <net/ip_fib.h> 59#include <net/ip_fib.h>
60#include <net/secure_seq.h>
60#include <net/tcp.h> 61#include <net/tcp.h>
61#include <asm/checksum.h> 62#include <asm/checksum.h>
62 63
@@ -2164,7 +2165,6 @@ static struct i40iw_cm_node *i40iw_make_cm_node(
2164 struct i40iw_cm_listener *listener) 2165 struct i40iw_cm_listener *listener)
2165{ 2166{
2166 struct i40iw_cm_node *cm_node; 2167 struct i40iw_cm_node *cm_node;
2167 struct timespec ts;
2168 int oldarpindex; 2168 int oldarpindex;
2169 int arpindex; 2169 int arpindex;
2170 struct net_device *netdev = iwdev->netdev; 2170 struct net_device *netdev = iwdev->netdev;
@@ -2214,10 +2214,26 @@ static struct i40iw_cm_node *i40iw_make_cm_node(
2214 cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE; 2214 cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
2215 cm_node->tcp_cntxt.rcv_wnd = 2215 cm_node->tcp_cntxt.rcv_wnd =
2216 I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE; 2216 I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE;
2217 ts = current_kernel_time(); 2217 if (cm_node->ipv4) {
2218 cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec; 2218 cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr[0]),
2219 cm_node->tcp_cntxt.mss = (cm_node->ipv4) ? (iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV4) : 2219 htonl(cm_node->rem_addr[0]),
2220 (iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV6); 2220 htons(cm_node->loc_port),
2221 htons(cm_node->rem_port));
2222 cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV4;
2223 } else if (IS_ENABLED(CONFIG_IPV6)) {
2224 __be32 loc[4] = {
2225 htonl(cm_node->loc_addr[0]), htonl(cm_node->loc_addr[1]),
2226 htonl(cm_node->loc_addr[2]), htonl(cm_node->loc_addr[3])
2227 };
2228 __be32 rem[4] = {
2229 htonl(cm_node->rem_addr[0]), htonl(cm_node->rem_addr[1]),
2230 htonl(cm_node->rem_addr[2]), htonl(cm_node->rem_addr[3])
2231 };
2232 cm_node->tcp_cntxt.loc_seq_num = secure_tcpv6_seq(loc, rem,
2233 htons(cm_node->loc_port),
2234 htons(cm_node->rem_port));
2235 cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV6;
2236 }
2221 2237
2222 cm_node->iwdev = iwdev; 2238 cm_node->iwdev = iwdev;
2223 cm_node->dev = &iwdev->sc_dev; 2239 cm_node->dev = &iwdev->sc_dev;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index 2836c5420d60..55a1fbf0e670 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -435,45 +435,24 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
435} 435}
436 436
437/** 437/**
438 * i40iw_manage_apbvt - add or delete tcp port 438 * i40iw_cqp_manage_abvpt_cmd - send cqp command manage abpvt
439 * @iwdev: iwarp device 439 * @iwdev: iwarp device
440 * @accel_local_port: port for apbvt 440 * @accel_local_port: port for apbvt
441 * @add_port: add or delete port 441 * @add_port: add or delete port
442 */ 442 */
443int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool add_port) 443static enum i40iw_status_code
444i40iw_cqp_manage_abvpt_cmd(struct i40iw_device *iwdev,
445 u16 accel_local_port,
446 bool add_port)
444{ 447{
445 struct i40iw_apbvt_info *info; 448 struct i40iw_apbvt_info *info;
446 struct i40iw_cqp_request *cqp_request; 449 struct i40iw_cqp_request *cqp_request;
447 struct cqp_commands_info *cqp_info; 450 struct cqp_commands_info *cqp_info;
448 unsigned long flags; 451 enum i40iw_status_code status;
449 struct i40iw_cm_core *cm_core = &iwdev->cm_core;
450 enum i40iw_status_code status = 0;
451 bool in_use;
452
453 /* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to
454 * protect against race where add APBVT CQP can race ahead of the delete
455 * APBVT for same port.
456 */
457 spin_lock_irqsave(&cm_core->apbvt_lock, flags);
458
459 if (!add_port) {
460 in_use = i40iw_port_in_use(cm_core, accel_local_port);
461 if (in_use)
462 goto exit;
463 clear_bit(accel_local_port, cm_core->ports_in_use);
464 } else {
465 in_use = test_and_set_bit(accel_local_port,
466 cm_core->ports_in_use);
467 spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
468 if (in_use)
469 return 0;
470 }
471 452
472 cqp_request = i40iw_get_cqp_request(&iwdev->cqp, add_port); 453 cqp_request = i40iw_get_cqp_request(&iwdev->cqp, add_port);
473 if (!cqp_request) { 454 if (!cqp_request)
474 status = -ENOMEM; 455 return I40IW_ERR_NO_MEMORY;
475 goto exit;
476 }
477 456
478 cqp_info = &cqp_request->info; 457 cqp_info = &cqp_request->info;
479 info = &cqp_info->in.u.manage_apbvt_entry.info; 458 info = &cqp_info->in.u.manage_apbvt_entry.info;
@@ -489,14 +468,54 @@ int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool ad
489 status = i40iw_handle_cqp_op(iwdev, cqp_request); 468 status = i40iw_handle_cqp_op(iwdev, cqp_request);
490 if (status) 469 if (status)
491 i40iw_pr_err("CQP-OP Manage APBVT entry fail"); 470 i40iw_pr_err("CQP-OP Manage APBVT entry fail");
492exit:
493 if (!add_port)
494 spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
495 471
496 return status; 472 return status;
497} 473}
498 474
499/** 475/**
476 * i40iw_manage_apbvt - add or delete tcp port
477 * @iwdev: iwarp device
478 * @accel_local_port: port for apbvt
479 * @add_port: add or delete port
480 */
481enum i40iw_status_code i40iw_manage_apbvt(struct i40iw_device *iwdev,
482 u16 accel_local_port,
483 bool add_port)
484{
485 struct i40iw_cm_core *cm_core = &iwdev->cm_core;
486 enum i40iw_status_code status;
487 unsigned long flags;
488 bool in_use;
489
490 /* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to
491 * protect against race where add APBVT CQP can race ahead of the delete
492 * APBVT for same port.
493 */
494 if (add_port) {
495 spin_lock_irqsave(&cm_core->apbvt_lock, flags);
496 in_use = __test_and_set_bit(accel_local_port,
497 cm_core->ports_in_use);
498 spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
499 if (in_use)
500 return 0;
501 return i40iw_cqp_manage_abvpt_cmd(iwdev, accel_local_port,
502 true);
503 } else {
504 spin_lock_irqsave(&cm_core->apbvt_lock, flags);
505 in_use = i40iw_port_in_use(cm_core, accel_local_port);
506 if (in_use) {
507 spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
508 return 0;
509 }
510 __clear_bit(accel_local_port, cm_core->ports_in_use);
511 status = i40iw_cqp_manage_abvpt_cmd(iwdev, accel_local_port,
512 false);
513 spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
514 return status;
515 }
516}
517
518/**
500 * i40iw_manage_arp_cache - manage hw arp cache 519 * i40iw_manage_arp_cache - manage hw arp cache
501 * @iwdev: iwarp device 520 * @iwdev: iwarp device
502 * @mac_addr: mac address ptr 521 * @mac_addr: mac address ptr
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 68679ad4c6da..e2e6c74a7452 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -71,7 +71,8 @@ static int i40iw_query_device(struct ib_device *ibdev,
71 props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE; 71 props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
72 props->max_qp = iwdev->max_qp - iwdev->used_qps; 72 props->max_qp = iwdev->max_qp - iwdev->used_qps;
73 props->max_qp_wr = I40IW_MAX_QP_WRS; 73 props->max_qp_wr = I40IW_MAX_QP_WRS;
74 props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT; 74 props->max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
75 props->max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
75 props->max_cq = iwdev->max_cq - iwdev->used_cqs; 76 props->max_cq = iwdev->max_cq - iwdev->used_cqs;
76 props->max_cqe = iwdev->max_cqe; 77 props->max_cqe = iwdev->max_cqe;
77 props->max_mr = iwdev->max_mr - iwdev->used_mrs; 78 props->max_mr = iwdev->max_mr - iwdev->used_mrs;
@@ -1409,6 +1410,7 @@ static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr)
1409 struct vm_area_struct *vma; 1410 struct vm_area_struct *vma;
1410 struct hstate *h; 1411 struct hstate *h;
1411 1412
1413 down_read(&current->mm->mmap_sem);
1412 vma = find_vma(current->mm, addr); 1414 vma = find_vma(current->mm, addr);
1413 if (vma && is_vm_hugetlb_page(vma)) { 1415 if (vma && is_vm_hugetlb_page(vma)) {
1414 h = hstate_vma(vma); 1416 h = hstate_vma(vma);
@@ -1417,6 +1419,7 @@ static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr)
1417 iwmr->page_msk = huge_page_mask(h); 1419 iwmr->page_msk = huge_page_mask(h);
1418 } 1420 }
1419 } 1421 }
1422 up_read(&current->mm->mmap_sem);
1420} 1423}
1421 1424
1422/** 1425/**
@@ -2198,8 +2201,8 @@ static void i40iw_copy_sg_list(struct i40iw_sge *sg_list, struct ib_sge *sgl, in
2198 * @bad_wr: return of bad wr if err 2201 * @bad_wr: return of bad wr if err
2199 */ 2202 */
2200static int i40iw_post_send(struct ib_qp *ibqp, 2203static int i40iw_post_send(struct ib_qp *ibqp,
2201 struct ib_send_wr *ib_wr, 2204 const struct ib_send_wr *ib_wr,
2202 struct ib_send_wr **bad_wr) 2205 const struct ib_send_wr **bad_wr)
2203{ 2206{
2204 struct i40iw_qp *iwqp; 2207 struct i40iw_qp *iwqp;
2205 struct i40iw_qp_uk *ukqp; 2208 struct i40iw_qp_uk *ukqp;
@@ -2374,9 +2377,8 @@ out:
2374 * @ib_wr: work request for receive 2377 * @ib_wr: work request for receive
2375 * @bad_wr: bad wr caused an error 2378 * @bad_wr: bad wr caused an error
2376 */ 2379 */
2377static int i40iw_post_recv(struct ib_qp *ibqp, 2380static int i40iw_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *ib_wr,
2378 struct ib_recv_wr *ib_wr, 2381 const struct ib_recv_wr **bad_wr)
2379 struct ib_recv_wr **bad_wr)
2380{ 2382{
2381 struct i40iw_qp *iwqp; 2383 struct i40iw_qp *iwqp;
2382 struct i40iw_qp_uk *ukqp; 2384 struct i40iw_qp_uk *ukqp;
@@ -2701,21 +2703,6 @@ static int i40iw_query_gid(struct ib_device *ibdev,
2701} 2703}
2702 2704
2703/** 2705/**
2704 * i40iw_modify_port Modify port properties
2705 * @ibdev: device pointer from stack
2706 * @port: port number
2707 * @port_modify_mask: mask for port modifications
2708 * @props: port properties
2709 */
2710static int i40iw_modify_port(struct ib_device *ibdev,
2711 u8 port,
2712 int port_modify_mask,
2713 struct ib_port_modify *props)
2714{
2715 return -ENOSYS;
2716}
2717
2718/**
2719 * i40iw_query_pkey - Query partition key 2706 * i40iw_query_pkey - Query partition key
2720 * @ibdev: device pointer from stack 2707 * @ibdev: device pointer from stack
2721 * @port: port number 2708 * @port: port number
@@ -2732,28 +2719,6 @@ static int i40iw_query_pkey(struct ib_device *ibdev,
2732} 2719}
2733 2720
2734/** 2721/**
2735 * i40iw_create_ah - create address handle
2736 * @ibpd: ptr of pd
2737 * @ah_attr: address handle attributes
2738 */
2739static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd,
2740 struct rdma_ah_attr *attr,
2741 struct ib_udata *udata)
2742
2743{
2744 return ERR_PTR(-ENOSYS);
2745}
2746
2747/**
2748 * i40iw_destroy_ah - Destroy address handle
2749 * @ah: pointer to address handle
2750 */
2751static int i40iw_destroy_ah(struct ib_ah *ah)
2752{
2753 return -ENOSYS;
2754}
2755
2756/**
2757 * i40iw_get_vector_affinity - report IRQ affinity mask 2722 * i40iw_get_vector_affinity - report IRQ affinity mask
2758 * @ibdev: IB device 2723 * @ibdev: IB device
2759 * @comp_vector: completion vector index 2724 * @comp_vector: completion vector index
@@ -2820,7 +2785,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
2820 iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count; 2785 iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count;
2821 iwibdev->ibdev.dev.parent = &pcidev->dev; 2786 iwibdev->ibdev.dev.parent = &pcidev->dev;
2822 iwibdev->ibdev.query_port = i40iw_query_port; 2787 iwibdev->ibdev.query_port = i40iw_query_port;
2823 iwibdev->ibdev.modify_port = i40iw_modify_port;
2824 iwibdev->ibdev.query_pkey = i40iw_query_pkey; 2788 iwibdev->ibdev.query_pkey = i40iw_query_pkey;
2825 iwibdev->ibdev.query_gid = i40iw_query_gid; 2789 iwibdev->ibdev.query_gid = i40iw_query_gid;
2826 iwibdev->ibdev.alloc_ucontext = i40iw_alloc_ucontext; 2790 iwibdev->ibdev.alloc_ucontext = i40iw_alloc_ucontext;
@@ -2840,8 +2804,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
2840 iwibdev->ibdev.alloc_hw_stats = i40iw_alloc_hw_stats; 2804 iwibdev->ibdev.alloc_hw_stats = i40iw_alloc_hw_stats;
2841 iwibdev->ibdev.get_hw_stats = i40iw_get_hw_stats; 2805 iwibdev->ibdev.get_hw_stats = i40iw_get_hw_stats;
2842 iwibdev->ibdev.query_device = i40iw_query_device; 2806 iwibdev->ibdev.query_device = i40iw_query_device;
2843 iwibdev->ibdev.create_ah = i40iw_create_ah;
2844 iwibdev->ibdev.destroy_ah = i40iw_destroy_ah;
2845 iwibdev->ibdev.drain_sq = i40iw_drain_sq; 2807 iwibdev->ibdev.drain_sq = i40iw_drain_sq;
2846 iwibdev->ibdev.drain_rq = i40iw_drain_rq; 2808 iwibdev->ibdev.drain_rq = i40iw_drain_rq;
2847 iwibdev->ibdev.alloc_mr = i40iw_alloc_mr; 2809 iwibdev->ibdev.alloc_mr = i40iw_alloc_mr;
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 9345d5b546d1..e9e3a6f390db 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -82,12 +82,11 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd,
82 struct mlx4_ib_ah *ah) 82 struct mlx4_ib_ah *ah)
83{ 83{
84 struct mlx4_ib_dev *ibdev = to_mdev(pd->device); 84 struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
85 const struct ib_gid_attr *gid_attr;
85 struct mlx4_dev *dev = ibdev->dev; 86 struct mlx4_dev *dev = ibdev->dev;
86 int is_mcast = 0; 87 int is_mcast = 0;
87 struct in6_addr in6; 88 struct in6_addr in6;
88 u16 vlan_tag = 0xffff; 89 u16 vlan_tag = 0xffff;
89 union ib_gid sgid;
90 struct ib_gid_attr gid_attr;
91 const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); 90 const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
92 int ret; 91 int ret;
93 92
@@ -96,25 +95,30 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd,
96 is_mcast = 1; 95 is_mcast = 1;
97 96
98 memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN); 97 memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN);
99 ret = ib_get_cached_gid(pd->device, rdma_ah_get_port_num(ah_attr),
100 grh->sgid_index, &sgid, &gid_attr);
101 if (ret)
102 return ERR_PTR(ret);
103 eth_zero_addr(ah->av.eth.s_mac); 98 eth_zero_addr(ah->av.eth.s_mac);
104 if (is_vlan_dev(gid_attr.ndev)) 99
105 vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); 100 /*
106 memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN); 101 * If sgid_attr is NULL we are being called by mlx4_ib_create_ah_slave
107 dev_put(gid_attr.ndev); 102 * and we are directly creating an AV for a slave's gid_index.
103 */
104 gid_attr = ah_attr->grh.sgid_attr;
105 if (gid_attr) {
106 if (is_vlan_dev(gid_attr->ndev))
107 vlan_tag = vlan_dev_vlan_id(gid_attr->ndev);
108 memcpy(ah->av.eth.s_mac, gid_attr->ndev->dev_addr, ETH_ALEN);
109 ret = mlx4_ib_gid_index_to_real_index(ibdev, gid_attr);
110 if (ret < 0)
111 return ERR_PTR(ret);
112 ah->av.eth.gid_index = ret;
113 } else {
114 /* mlx4_ib_create_ah_slave fills in the s_mac and the vlan */
115 ah->av.eth.gid_index = ah_attr->grh.sgid_index;
116 }
117
108 if (vlan_tag < 0x1000) 118 if (vlan_tag < 0x1000)
109 vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13; 119 vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13;
110 ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | 120 ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn |
111 (rdma_ah_get_port_num(ah_attr) << 24)); 121 (rdma_ah_get_port_num(ah_attr) << 24));
112 ret = mlx4_ib_gid_index_to_real_index(ibdev,
113 rdma_ah_get_port_num(ah_attr),
114 grh->sgid_index);
115 if (ret < 0)
116 return ERR_PTR(ret);
117 ah->av.eth.gid_index = ret;
118 ah->av.eth.vlan = cpu_to_be16(vlan_tag); 122 ah->av.eth.vlan = cpu_to_be16(vlan_tag);
119 ah->av.eth.hop_limit = grh->hop_limit; 123 ah->av.eth.hop_limit = grh->hop_limit;
120 if (rdma_ah_get_static_rate(ah_attr)) { 124 if (rdma_ah_get_static_rate(ah_attr)) {
@@ -173,6 +177,40 @@ struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
173 return create_ib_ah(pd, ah_attr, ah); /* never fails */ 177 return create_ib_ah(pd, ah_attr, ah); /* never fails */
174} 178}
175 179
180/* AH's created via this call must be free'd by mlx4_ib_destroy_ah. */
181struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd,
182 struct rdma_ah_attr *ah_attr,
183 int slave_sgid_index, u8 *s_mac,
184 u16 vlan_tag)
185{
186 struct rdma_ah_attr slave_attr = *ah_attr;
187 struct mlx4_ib_ah *mah;
188 struct ib_ah *ah;
189
190 slave_attr.grh.sgid_attr = NULL;
191 slave_attr.grh.sgid_index = slave_sgid_index;
192 ah = mlx4_ib_create_ah(pd, &slave_attr, NULL);
193 if (IS_ERR(ah))
194 return ah;
195
196 ah->device = pd->device;
197 ah->pd = pd;
198 ah->type = ah_attr->type;
199 mah = to_mah(ah);
200
201 /* get rid of force-loopback bit */
202 mah->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
203
204 if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE)
205 memcpy(mah->av.eth.s_mac, s_mac, 6);
206
207 if (vlan_tag < 0x1000)
208 vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13;
209 mah->av.eth.vlan = cpu_to_be16(vlan_tag);
210
211 return ah;
212}
213
176int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) 214int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
177{ 215{
178 struct mlx4_ib_ah *ah = to_mah(ibah); 216 struct mlx4_ib_ah *ah = to_mah(ibah);
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 90a3e2642c2e..e5466d786bb1 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -506,7 +506,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
506{ 506{
507 struct ib_sge list; 507 struct ib_sge list;
508 struct ib_ud_wr wr; 508 struct ib_ud_wr wr;
509 struct ib_send_wr *bad_wr; 509 const struct ib_send_wr *bad_wr;
510 struct mlx4_ib_demux_pv_ctx *tun_ctx; 510 struct mlx4_ib_demux_pv_ctx *tun_ctx;
511 struct mlx4_ib_demux_pv_qp *tun_qp; 511 struct mlx4_ib_demux_pv_qp *tun_qp;
512 struct mlx4_rcv_tunnel_mad *tun_mad; 512 struct mlx4_rcv_tunnel_mad *tun_mad;
@@ -1310,7 +1310,8 @@ static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
1310 int index) 1310 int index)
1311{ 1311{
1312 struct ib_sge sg_list; 1312 struct ib_sge sg_list;
1313 struct ib_recv_wr recv_wr, *bad_recv_wr; 1313 struct ib_recv_wr recv_wr;
1314 const struct ib_recv_wr *bad_recv_wr;
1314 int size; 1315 int size;
1315 1316
1316 size = (tun_qp->qp->qp_type == IB_QPT_UD) ? 1317 size = (tun_qp->qp->qp_type == IB_QPT_UD) ?
@@ -1361,19 +1362,16 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
1361{ 1362{
1362 struct ib_sge list; 1363 struct ib_sge list;
1363 struct ib_ud_wr wr; 1364 struct ib_ud_wr wr;
1364 struct ib_send_wr *bad_wr; 1365 const struct ib_send_wr *bad_wr;
1365 struct mlx4_ib_demux_pv_ctx *sqp_ctx; 1366 struct mlx4_ib_demux_pv_ctx *sqp_ctx;
1366 struct mlx4_ib_demux_pv_qp *sqp; 1367 struct mlx4_ib_demux_pv_qp *sqp;
1367 struct mlx4_mad_snd_buf *sqp_mad; 1368 struct mlx4_mad_snd_buf *sqp_mad;
1368 struct ib_ah *ah; 1369 struct ib_ah *ah;
1369 struct ib_qp *send_qp = NULL; 1370 struct ib_qp *send_qp = NULL;
1370 struct ib_global_route *grh;
1371 unsigned wire_tx_ix = 0; 1371 unsigned wire_tx_ix = 0;
1372 int ret = 0; 1372 int ret = 0;
1373 u16 wire_pkey_ix; 1373 u16 wire_pkey_ix;
1374 int src_qpnum; 1374 int src_qpnum;
1375 u8 sgid_index;
1376
1377 1375
1378 sqp_ctx = dev->sriov.sqps[port-1]; 1376 sqp_ctx = dev->sriov.sqps[port-1];
1379 1377
@@ -1394,16 +1392,11 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
1394 send_qp = sqp->qp; 1392 send_qp = sqp->qp;
1395 1393
1396 /* create ah */ 1394 /* create ah */
1397 grh = rdma_ah_retrieve_grh(attr); 1395 ah = mlx4_ib_create_ah_slave(sqp_ctx->pd, attr,
1398 sgid_index = grh->sgid_index; 1396 rdma_ah_retrieve_grh(attr)->sgid_index,
1399 grh->sgid_index = 0; 1397 s_mac, vlan_id);
1400 ah = rdma_create_ah(sqp_ctx->pd, attr);
1401 if (IS_ERR(ah)) 1398 if (IS_ERR(ah))
1402 return -ENOMEM; 1399 return -ENOMEM;
1403 grh->sgid_index = sgid_index;
1404 to_mah(ah)->av.ib.gid_index = sgid_index;
1405 /* get rid of force-loopback bit */
1406 to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
1407 spin_lock(&sqp->tx_lock); 1400 spin_lock(&sqp->tx_lock);
1408 if (sqp->tx_ix_head - sqp->tx_ix_tail >= 1401 if (sqp->tx_ix_head - sqp->tx_ix_tail >=
1409 (MLX4_NUM_TUNNEL_BUFS - 1)) 1402 (MLX4_NUM_TUNNEL_BUFS - 1))
@@ -1445,12 +1438,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
1445 wr.wr.num_sge = 1; 1438 wr.wr.num_sge = 1;
1446 wr.wr.opcode = IB_WR_SEND; 1439 wr.wr.opcode = IB_WR_SEND;
1447 wr.wr.send_flags = IB_SEND_SIGNALED; 1440 wr.wr.send_flags = IB_SEND_SIGNALED;
1448 if (s_mac)
1449 memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
1450 if (vlan_id < 0x1000)
1451 vlan_id |= (rdma_ah_get_sl(attr) & 7) << 13;
1452 to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id);
1453
1454 1441
1455 ret = ib_post_send(send_qp, &wr.wr, &bad_wr); 1442 ret = ib_post_send(send_qp, &wr.wr, &bad_wr);
1456 if (!ret) 1443 if (!ret)
@@ -1461,7 +1448,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
1461 spin_unlock(&sqp->tx_lock); 1448 spin_unlock(&sqp->tx_lock);
1462 sqp->tx_ring[wire_tx_ix].ah = NULL; 1449 sqp->tx_ring[wire_tx_ix].ah = NULL;
1463out: 1450out:
1464 rdma_destroy_ah(ah); 1451 mlx4_ib_destroy_ah(ah);
1465 return ret; 1452 return ret;
1466} 1453}
1467 1454
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 4ec519afc45b..ca0f1ee26091 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -246,9 +246,7 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
246 return mlx4_ib_update_gids_v1(gids, ibdev, port_num); 246 return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
247} 247}
248 248
249static int mlx4_ib_add_gid(const union ib_gid *gid, 249static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
250 const struct ib_gid_attr *attr,
251 void **context)
252{ 250{
253 struct mlx4_ib_dev *ibdev = to_mdev(attr->device); 251 struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
254 struct mlx4_ib_iboe *iboe = &ibdev->iboe; 252 struct mlx4_ib_iboe *iboe = &ibdev->iboe;
@@ -271,8 +269,9 @@ static int mlx4_ib_add_gid(const union ib_gid *gid,
271 port_gid_table = &iboe->gids[attr->port_num - 1]; 269 port_gid_table = &iboe->gids[attr->port_num - 1];
272 spin_lock_bh(&iboe->lock); 270 spin_lock_bh(&iboe->lock);
273 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) { 271 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
274 if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) && 272 if (!memcmp(&port_gid_table->gids[i].gid,
275 (port_gid_table->gids[i].gid_type == attr->gid_type)) { 273 &attr->gid, sizeof(attr->gid)) &&
274 port_gid_table->gids[i].gid_type == attr->gid_type) {
276 found = i; 275 found = i;
277 break; 276 break;
278 } 277 }
@@ -289,7 +288,8 @@ static int mlx4_ib_add_gid(const union ib_gid *gid,
289 ret = -ENOMEM; 288 ret = -ENOMEM;
290 } else { 289 } else {
291 *context = port_gid_table->gids[free].ctx; 290 *context = port_gid_table->gids[free].ctx;
292 memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid)); 291 memcpy(&port_gid_table->gids[free].gid,
292 &attr->gid, sizeof(attr->gid));
293 port_gid_table->gids[free].gid_type = attr->gid_type; 293 port_gid_table->gids[free].gid_type = attr->gid_type;
294 port_gid_table->gids[free].ctx->real_index = free; 294 port_gid_table->gids[free].ctx->real_index = free;
295 port_gid_table->gids[free].ctx->refcount = 1; 295 port_gid_table->gids[free].ctx->refcount = 1;
@@ -380,17 +380,15 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
380} 380}
381 381
382int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, 382int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
383 u8 port_num, int index) 383 const struct ib_gid_attr *attr)
384{ 384{
385 struct mlx4_ib_iboe *iboe = &ibdev->iboe; 385 struct mlx4_ib_iboe *iboe = &ibdev->iboe;
386 struct gid_cache_context *ctx = NULL; 386 struct gid_cache_context *ctx = NULL;
387 union ib_gid gid;
388 struct mlx4_port_gid_table *port_gid_table; 387 struct mlx4_port_gid_table *port_gid_table;
389 int real_index = -EINVAL; 388 int real_index = -EINVAL;
390 int i; 389 int i;
391 int ret;
392 unsigned long flags; 390 unsigned long flags;
393 struct ib_gid_attr attr; 391 u8 port_num = attr->port_num;
394 392
395 if (port_num > MLX4_MAX_PORTS) 393 if (port_num > MLX4_MAX_PORTS)
396 return -EINVAL; 394 return -EINVAL;
@@ -399,21 +397,15 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
399 port_num = 1; 397 port_num = 1;
400 398
401 if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num)) 399 if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
402 return index; 400 return attr->index;
403
404 ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr);
405 if (ret)
406 return ret;
407
408 if (attr.ndev)
409 dev_put(attr.ndev);
410 401
411 spin_lock_irqsave(&iboe->lock, flags); 402 spin_lock_irqsave(&iboe->lock, flags);
412 port_gid_table = &iboe->gids[port_num - 1]; 403 port_gid_table = &iboe->gids[port_num - 1];
413 404
414 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) 405 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
415 if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) && 406 if (!memcmp(&port_gid_table->gids[i].gid,
416 attr.gid_type == port_gid_table->gids[i].gid_type) { 407 &attr->gid, sizeof(attr->gid)) &&
408 attr->gid_type == port_gid_table->gids[i].gid_type) {
417 ctx = port_gid_table->gids[i].ctx; 409 ctx = port_gid_table->gids[i].ctx;
418 break; 410 break;
419 } 411 }
@@ -525,8 +517,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
525 props->page_size_cap = dev->dev->caps.page_size_cap; 517 props->page_size_cap = dev->dev->caps.page_size_cap;
526 props->max_qp = dev->dev->quotas.qp; 518 props->max_qp = dev->dev->quotas.qp;
527 props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; 519 props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
528 props->max_sge = min(dev->dev->caps.max_sq_sg, 520 props->max_send_sge = dev->dev->caps.max_sq_sg;
529 dev->dev->caps.max_rq_sg); 521 props->max_recv_sge = dev->dev->caps.max_rq_sg;
530 props->max_sge_rd = MLX4_MAX_SGE_RD; 522 props->max_sge_rd = MLX4_MAX_SGE_RD;
531 props->max_cq = dev->dev->quotas.cq; 523 props->max_cq = dev->dev->quotas.cq;
532 props->max_cqe = dev->dev->caps.max_cqes; 524 props->max_cqe = dev->dev->caps.max_cqes;
@@ -770,7 +762,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
770 IB_WIDTH_4X : IB_WIDTH_1X; 762 IB_WIDTH_4X : IB_WIDTH_1X;
771 props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? 763 props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
772 IB_SPEED_FDR : IB_SPEED_QDR; 764 IB_SPEED_FDR : IB_SPEED_QDR;
773 props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; 765 props->port_cap_flags = IB_PORT_CM_SUP;
766 props->ip_gids = true;
774 props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; 767 props->gid_tbl_len = mdev->dev->caps.gid_table_len[port];
775 props->max_msg_sz = mdev->dev->caps.max_msg_sz; 768 props->max_msg_sz = mdev->dev->caps.max_msg_sz;
776 props->pkey_tbl_len = 1; 769 props->pkey_tbl_len = 1;
@@ -2709,6 +2702,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
2709 ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; 2702 ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp;
2710 ibdev->ib_dev.query_qp = mlx4_ib_query_qp; 2703 ibdev->ib_dev.query_qp = mlx4_ib_query_qp;
2711 ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; 2704 ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp;
2705 ibdev->ib_dev.drain_sq = mlx4_ib_drain_sq;
2706 ibdev->ib_dev.drain_rq = mlx4_ib_drain_rq;
2712 ibdev->ib_dev.post_send = mlx4_ib_post_send; 2707 ibdev->ib_dev.post_send = mlx4_ib_post_send;
2713 ibdev->ib_dev.post_recv = mlx4_ib_post_recv; 2708 ibdev->ib_dev.post_recv = mlx4_ib_post_recv;
2714 ibdev->ib_dev.create_cq = mlx4_ib_create_cq; 2709 ibdev->ib_dev.create_cq = mlx4_ib_create_cq;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 7b1429917aba..e10dccc7958f 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -322,7 +322,6 @@ struct mlx4_ib_qp {
322 u32 doorbell_qpn; 322 u32 doorbell_qpn;
323 __be32 sq_signal_bits; 323 __be32 sq_signal_bits;
324 unsigned sq_next_wqe; 324 unsigned sq_next_wqe;
325 int sq_max_wqes_per_wr;
326 int sq_spare_wqes; 325 int sq_spare_wqes;
327 struct mlx4_ib_wq sq; 326 struct mlx4_ib_wq sq;
328 327
@@ -760,6 +759,10 @@ void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
760 759
761struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, 760struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
762 struct ib_udata *udata); 761 struct ib_udata *udata);
762struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd,
763 struct rdma_ah_attr *ah_attr,
764 int slave_sgid_index, u8 *s_mac,
765 u16 vlan_tag);
763int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); 766int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
764int mlx4_ib_destroy_ah(struct ib_ah *ah); 767int mlx4_ib_destroy_ah(struct ib_ah *ah);
765 768
@@ -771,21 +774,23 @@ int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
771int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); 774int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
772int mlx4_ib_destroy_srq(struct ib_srq *srq); 775int mlx4_ib_destroy_srq(struct ib_srq *srq);
773void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); 776void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
774int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, 777int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
775 struct ib_recv_wr **bad_wr); 778 const struct ib_recv_wr **bad_wr);
776 779
777struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, 780struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
778 struct ib_qp_init_attr *init_attr, 781 struct ib_qp_init_attr *init_attr,
779 struct ib_udata *udata); 782 struct ib_udata *udata);
780int mlx4_ib_destroy_qp(struct ib_qp *qp); 783int mlx4_ib_destroy_qp(struct ib_qp *qp);
784void mlx4_ib_drain_sq(struct ib_qp *qp);
785void mlx4_ib_drain_rq(struct ib_qp *qp);
781int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 786int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
782 int attr_mask, struct ib_udata *udata); 787 int attr_mask, struct ib_udata *udata);
783int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, 788int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
784 struct ib_qp_init_attr *qp_init_attr); 789 struct ib_qp_init_attr *qp_init_attr);
785int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 790int mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
786 struct ib_send_wr **bad_wr); 791 const struct ib_send_wr **bad_wr);
787int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 792int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
788 struct ib_recv_wr **bad_wr); 793 const struct ib_recv_wr **bad_wr);
789 794
790int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, 795int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
791 int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, 796 int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
@@ -900,7 +905,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
900 int mr_access_flags, struct ib_pd *pd, 905 int mr_access_flags, struct ib_pd *pd,
901 struct ib_udata *udata); 906 struct ib_udata *udata);
902int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, 907int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
903 u8 port_num, int index); 908 const struct ib_gid_attr *attr);
904 909
905void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev, 910void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
906 int port); 911 int port);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 3b8045fd23ed..6dd3cd2c2f80 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -204,91 +204,26 @@ static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)
204 204
205/* 205/*
206 * Stamp a SQ WQE so that it is invalid if prefetched by marking the 206 * Stamp a SQ WQE so that it is invalid if prefetched by marking the
207 * first four bytes of every 64 byte chunk with 207 * first four bytes of every 64 byte chunk with 0xffffffff, except for
208 * 0x7FFFFFF | (invalid_ownership_value << 31). 208 * the very first chunk of the WQE.
209 *
210 * When the max work request size is less than or equal to the WQE
211 * basic block size, as an optimization, we can stamp all WQEs with
212 * 0xffffffff, and skip the very first chunk of each WQE.
213 */ 209 */
214static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) 210static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n)
215{ 211{
216 __be32 *wqe; 212 __be32 *wqe;
217 int i; 213 int i;
218 int s; 214 int s;
219 int ind;
220 void *buf; 215 void *buf;
221 __be32 stamp;
222 struct mlx4_wqe_ctrl_seg *ctrl; 216 struct mlx4_wqe_ctrl_seg *ctrl;
223 217
224 if (qp->sq_max_wqes_per_wr > 1) { 218 buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
225 s = roundup(size, 1U << qp->sq.wqe_shift); 219 ctrl = (struct mlx4_wqe_ctrl_seg *)buf;
226 for (i = 0; i < s; i += 64) { 220 s = (ctrl->qpn_vlan.fence_size & 0x3f) << 4;
227 ind = (i >> qp->sq.wqe_shift) + n; 221 for (i = 64; i < s; i += 64) {
228 stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) : 222 wqe = buf + i;
229 cpu_to_be32(0xffffffff); 223 *wqe = cpu_to_be32(0xffffffff);
230 buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
231 wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1));
232 *wqe = stamp;
233 }
234 } else {
235 ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
236 s = (ctrl->qpn_vlan.fence_size & 0x3f) << 4;
237 for (i = 64; i < s; i += 64) {
238 wqe = buf + i;
239 *wqe = cpu_to_be32(0xffffffff);
240 }
241 } 224 }
242} 225}
243 226
244static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size)
245{
246 struct mlx4_wqe_ctrl_seg *ctrl;
247 struct mlx4_wqe_inline_seg *inl;
248 void *wqe;
249 int s;
250
251 ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
252 s = sizeof(struct mlx4_wqe_ctrl_seg);
253
254 if (qp->ibqp.qp_type == IB_QPT_UD) {
255 struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl;
256 struct mlx4_av *av = (struct mlx4_av *)dgram->av;
257 memset(dgram, 0, sizeof *dgram);
258 av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn);
259 s += sizeof(struct mlx4_wqe_datagram_seg);
260 }
261
262 /* Pad the remainder of the WQE with an inline data segment. */
263 if (size > s) {
264 inl = wqe + s;
265 inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl));
266 }
267 ctrl->srcrb_flags = 0;
268 ctrl->qpn_vlan.fence_size = size / 16;
269 /*
270 * Make sure descriptor is fully written before setting ownership bit
271 * (because HW can start executing as soon as we do).
272 */
273 wmb();
274
275 ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) |
276 (n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
277
278 stamp_send_wqe(qp, n + qp->sq_spare_wqes, size);
279}
280
281/* Post NOP WQE to prevent wrap-around in the middle of WR */
282static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind)
283{
284 unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1));
285 if (unlikely(s < qp->sq_max_wqes_per_wr)) {
286 post_nop_wqe(qp, ind, s << qp->sq.wqe_shift);
287 ind += s;
288 }
289 return ind;
290}
291
292static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) 227static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
293{ 228{
294 struct ib_event event; 229 struct ib_event event;
@@ -433,8 +368,7 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
433} 368}
434 369
435static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 370static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
436 enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp, 371 enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp)
437 bool shrink_wqe)
438{ 372{
439 int s; 373 int s;
440 374
@@ -461,70 +395,20 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
461 if (s > dev->dev->caps.max_sq_desc_sz) 395 if (s > dev->dev->caps.max_sq_desc_sz)
462 return -EINVAL; 396 return -EINVAL;
463 397
398 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
399
464 /* 400 /*
465 * Hermon supports shrinking WQEs, such that a single work 401 * We need to leave 2 KB + 1 WR of headroom in the SQ to
466 * request can include multiple units of 1 << wqe_shift. This 402 * allow HW to prefetch.
467 * way, work requests can differ in size, and do not have to
468 * be a power of 2 in size, saving memory and speeding up send
469 * WR posting. Unfortunately, if we do this then the
470 * wqe_index field in CQEs can't be used to look up the WR ID
471 * anymore, so we do this only if selective signaling is off.
472 *
473 * Further, on 32-bit platforms, we can't use vmap() to make
474 * the QP buffer virtually contiguous. Thus we have to use
475 * constant-sized WRs to make sure a WR is always fully within
476 * a single page-sized chunk.
477 *
478 * Finally, we use NOP work requests to pad the end of the
479 * work queue, to avoid wrap-around in the middle of WR. We
480 * set NEC bit to avoid getting completions with error for
481 * these NOP WRs, but since NEC is only supported starting
482 * with firmware 2.2.232, we use constant-sized WRs for older
483 * firmware.
484 *
485 * And, since MLX QPs only support SEND, we use constant-sized
486 * WRs in this case.
487 *
488 * We look for the smallest value of wqe_shift such that the
489 * resulting number of wqes does not exceed device
490 * capabilities.
491 *
492 * We set WQE size to at least 64 bytes, this way stamping
493 * invalidates each WQE.
494 */ 403 */
495 if (shrink_wqe && dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && 404 qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
496 qp->sq_signal_bits && BITS_PER_LONG == 64 && 405 qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr +
497 type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI && 406 qp->sq_spare_wqes);
498 !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | 407
499 MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) 408 qp->sq.max_gs =
500 qp->sq.wqe_shift = ilog2(64); 409 (min(dev->dev->caps.max_sq_desc_sz,
501 else 410 (1 << qp->sq.wqe_shift)) -
502 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); 411 send_wqe_overhead(type, qp->flags)) /
503
504 for (;;) {
505 qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift);
506
507 /*
508 * We need to leave 2 KB + 1 WR of headroom in the SQ to
509 * allow HW to prefetch.
510 */
511 qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr;
512 qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr *
513 qp->sq_max_wqes_per_wr +
514 qp->sq_spare_wqes);
515
516 if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes)
517 break;
518
519 if (qp->sq_max_wqes_per_wr <= 1)
520 return -EINVAL;
521
522 ++qp->sq.wqe_shift;
523 }
524
525 qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz,
526 (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) -
527 send_wqe_overhead(type, qp->flags)) /
528 sizeof (struct mlx4_wqe_data_seg); 412 sizeof (struct mlx4_wqe_data_seg);
529 413
530 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + 414 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
@@ -538,7 +422,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
538 } 422 }
539 423
540 cap->max_send_wr = qp->sq.max_post = 424 cap->max_send_wr = qp->sq.max_post =
541 (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr; 425 qp->sq.wqe_cnt - qp->sq_spare_wqes;
542 cap->max_send_sge = min(qp->sq.max_gs, 426 cap->max_send_sge = min(qp->sq.max_gs,
543 min(dev->dev->caps.max_sq_sg, 427 min(dev->dev->caps.max_sq_sg,
544 dev->dev->caps.max_rq_sg)); 428 dev->dev->caps.max_rq_sg));
@@ -977,7 +861,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
977{ 861{
978 int qpn; 862 int qpn;
979 int err; 863 int err;
980 struct ib_qp_cap backup_cap;
981 struct mlx4_ib_sqp *sqp = NULL; 864 struct mlx4_ib_sqp *sqp = NULL;
982 struct mlx4_ib_qp *qp; 865 struct mlx4_ib_qp *qp;
983 enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; 866 enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
@@ -1178,9 +1061,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
1178 goto err; 1061 goto err;
1179 } 1062 }
1180 1063
1181 memcpy(&backup_cap, &init_attr->cap, sizeof(backup_cap)); 1064 err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
1182 err = set_kernel_sq_size(dev, &init_attr->cap,
1183 qp_type, qp, true);
1184 if (err) 1065 if (err)
1185 goto err; 1066 goto err;
1186 1067
@@ -1192,20 +1073,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
1192 *qp->db.db = 0; 1073 *qp->db.db = 0;
1193 } 1074 }
1194 1075
1195 if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size, 1076 if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2,
1196 &qp->buf)) { 1077 &qp->buf)) {
1197 memcpy(&init_attr->cap, &backup_cap, 1078 err = -ENOMEM;
1198 sizeof(backup_cap)); 1079 goto err_db;
1199 err = set_kernel_sq_size(dev, &init_attr->cap, qp_type,
1200 qp, false);
1201 if (err)
1202 goto err_db;
1203
1204 if (mlx4_buf_alloc(dev->dev, qp->buf_size,
1205 PAGE_SIZE * 2, &qp->buf)) {
1206 err = -ENOMEM;
1207 goto err_db;
1208 }
1209 } 1080 }
1210 1081
1211 err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift, 1082 err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
@@ -1859,8 +1730,7 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev,
1859 if (rdma_ah_get_ah_flags(ah) & IB_AH_GRH) { 1730 if (rdma_ah_get_ah_flags(ah) & IB_AH_GRH) {
1860 const struct ib_global_route *grh = rdma_ah_read_grh(ah); 1731 const struct ib_global_route *grh = rdma_ah_read_grh(ah);
1861 int real_sgid_index = 1732 int real_sgid_index =
1862 mlx4_ib_gid_index_to_real_index(dev, port, 1733 mlx4_ib_gid_index_to_real_index(dev, grh->sgid_attr);
1863 grh->sgid_index);
1864 1734
1865 if (real_sgid_index < 0) 1735 if (real_sgid_index < 0)
1866 return real_sgid_index; 1736 return real_sgid_index;
@@ -2176,6 +2046,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
2176{ 2046{
2177 struct ib_uobject *ibuobject; 2047 struct ib_uobject *ibuobject;
2178 struct ib_srq *ibsrq; 2048 struct ib_srq *ibsrq;
2049 const struct ib_gid_attr *gid_attr = NULL;
2179 struct ib_rwq_ind_table *rwq_ind_tbl; 2050 struct ib_rwq_ind_table *rwq_ind_tbl;
2180 enum ib_qp_type qp_type; 2051 enum ib_qp_type qp_type;
2181 struct mlx4_ib_dev *dev; 2052 struct mlx4_ib_dev *dev;
@@ -2356,29 +2227,17 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
2356 if (attr_mask & IB_QP_AV) { 2227 if (attr_mask & IB_QP_AV) {
2357 u8 port_num = mlx4_is_bonded(dev->dev) ? 1 : 2228 u8 port_num = mlx4_is_bonded(dev->dev) ? 1 :
2358 attr_mask & IB_QP_PORT ? attr->port_num : qp->port; 2229 attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
2359 union ib_gid gid;
2360 struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB};
2361 u16 vlan = 0xffff; 2230 u16 vlan = 0xffff;
2362 u8 smac[ETH_ALEN]; 2231 u8 smac[ETH_ALEN];
2363 int status = 0;
2364 int is_eth = 2232 int is_eth =
2365 rdma_cap_eth_ah(&dev->ib_dev, port_num) && 2233 rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
2366 rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH; 2234 rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
2367 2235
2368 if (is_eth) { 2236 if (is_eth) {
2369 int index = 2237 gid_attr = attr->ah_attr.grh.sgid_attr;
2370 rdma_ah_read_grh(&attr->ah_attr)->sgid_index; 2238 vlan = rdma_vlan_dev_vlan_id(gid_attr->ndev);
2371 2239 memcpy(smac, gid_attr->ndev->dev_addr, ETH_ALEN);
2372 status = ib_get_cached_gid(&dev->ib_dev, port_num,
2373 index, &gid, &gid_attr);
2374 if (!status) {
2375 vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev);
2376 memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN);
2377 dev_put(gid_attr.ndev);
2378 }
2379 } 2240 }
2380 if (status)
2381 goto out;
2382 2241
2383 if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path, 2242 if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path,
2384 port_num, vlan, smac)) 2243 port_num, vlan, smac))
@@ -2389,7 +2248,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
2389 2248
2390 if (is_eth && 2249 if (is_eth &&
2391 (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) { 2250 (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) {
2392 u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type); 2251 u8 qpc_roce_mode = gid_type_to_qpc(gid_attr->gid_type);
2393 2252
2394 if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) { 2253 if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) {
2395 err = -EINVAL; 2254 err = -EINVAL;
@@ -2594,11 +2453,9 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
2594 for (i = 0; i < qp->sq.wqe_cnt; ++i) { 2453 for (i = 0; i < qp->sq.wqe_cnt; ++i) {
2595 ctrl = get_send_wqe(qp, i); 2454 ctrl = get_send_wqe(qp, i);
2596 ctrl->owner_opcode = cpu_to_be32(1 << 31); 2455 ctrl->owner_opcode = cpu_to_be32(1 << 31);
2597 if (qp->sq_max_wqes_per_wr == 1) 2456 ctrl->qpn_vlan.fence_size =
2598 ctrl->qpn_vlan.fence_size = 2457 1 << (qp->sq.wqe_shift - 4);
2599 1 << (qp->sq.wqe_shift - 4); 2458 stamp_send_wqe(qp, i);
2600
2601 stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift);
2602 } 2459 }
2603 } 2460 }
2604 2461
@@ -2937,7 +2794,7 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
2937} 2794}
2938 2795
2939static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, 2796static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
2940 struct ib_ud_wr *wr, 2797 const struct ib_ud_wr *wr,
2941 void *wqe, unsigned *mlx_seg_len) 2798 void *wqe, unsigned *mlx_seg_len)
2942{ 2799{
2943 struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device); 2800 struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
@@ -3085,7 +2942,7 @@ static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num,
3085} 2942}
3086 2943
3087#define MLX4_ROCEV2_QP1_SPORT 0xC000 2944#define MLX4_ROCEV2_QP1_SPORT 0xC000
3088static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, 2945static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
3089 void *wqe, unsigned *mlx_seg_len) 2946 void *wqe, unsigned *mlx_seg_len)
3090{ 2947{
3091 struct ib_device *ib_dev = sqp->qp.ibqp.device; 2948 struct ib_device *ib_dev = sqp->qp.ibqp.device;
@@ -3181,10 +3038,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
3181 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. 3038 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
3182 guid_cache[ah->av.ib.gid_index]; 3039 guid_cache[ah->av.ib.gid_index];
3183 } else { 3040 } else {
3184 ib_get_cached_gid(ib_dev, 3041 sqp->ud_header.grh.source_gid =
3185 be32_to_cpu(ah->av.ib.port_pd) >> 24, 3042 ah->ibah.sgid_attr->gid;
3186 ah->av.ib.gid_index,
3187 &sqp->ud_header.grh.source_gid, NULL);
3188 } 3043 }
3189 } 3044 }
3190 memcpy(sqp->ud_header.grh.destination_gid.raw, 3045 memcpy(sqp->ud_header.grh.destination_gid.raw,
@@ -3369,7 +3224,7 @@ static __be32 convert_access(int acc)
3369} 3224}
3370 3225
3371static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg, 3226static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg,
3372 struct ib_reg_wr *wr) 3227 const struct ib_reg_wr *wr)
3373{ 3228{
3374 struct mlx4_ib_mr *mr = to_mmr(wr->mr); 3229 struct mlx4_ib_mr *mr = to_mmr(wr->mr);
3375 3230
@@ -3399,7 +3254,7 @@ static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
3399} 3254}
3400 3255
3401static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, 3256static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg,
3402 struct ib_atomic_wr *wr) 3257 const struct ib_atomic_wr *wr)
3403{ 3258{
3404 if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 3259 if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
3405 aseg->swap_add = cpu_to_be64(wr->swap); 3260 aseg->swap_add = cpu_to_be64(wr->swap);
@@ -3415,7 +3270,7 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg,
3415} 3270}
3416 3271
3417static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, 3272static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
3418 struct ib_atomic_wr *wr) 3273 const struct ib_atomic_wr *wr)
3419{ 3274{
3420 aseg->swap_add = cpu_to_be64(wr->swap); 3275 aseg->swap_add = cpu_to_be64(wr->swap);
3421 aseg->swap_add_mask = cpu_to_be64(wr->swap_mask); 3276 aseg->swap_add_mask = cpu_to_be64(wr->swap_mask);
@@ -3424,7 +3279,7 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
3424} 3279}
3425 3280
3426static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, 3281static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
3427 struct ib_ud_wr *wr) 3282 const struct ib_ud_wr *wr)
3428{ 3283{
3429 memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av)); 3284 memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av));
3430 dseg->dqpn = cpu_to_be32(wr->remote_qpn); 3285 dseg->dqpn = cpu_to_be32(wr->remote_qpn);
@@ -3435,7 +3290,7 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
3435 3290
3436static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, 3291static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
3437 struct mlx4_wqe_datagram_seg *dseg, 3292 struct mlx4_wqe_datagram_seg *dseg,
3438 struct ib_ud_wr *wr, 3293 const struct ib_ud_wr *wr,
3439 enum mlx4_ib_qp_type qpt) 3294 enum mlx4_ib_qp_type qpt)
3440{ 3295{
3441 union mlx4_ext_av *av = &to_mah(wr->ah)->av; 3296 union mlx4_ext_av *av = &to_mah(wr->ah)->av;
@@ -3457,7 +3312,8 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
3457 dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); 3312 dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
3458} 3313}
3459 3314
3460static void build_tunnel_header(struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) 3315static void build_tunnel_header(const struct ib_ud_wr *wr, void *wqe,
3316 unsigned *mlx_seg_len)
3461{ 3317{
3462 struct mlx4_wqe_inline_seg *inl = wqe; 3318 struct mlx4_wqe_inline_seg *inl = wqe;
3463 struct mlx4_ib_tunnel_header hdr; 3319 struct mlx4_ib_tunnel_header hdr;
@@ -3540,9 +3396,9 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
3540 dseg->addr = cpu_to_be64(sg->addr); 3396 dseg->addr = cpu_to_be64(sg->addr);
3541} 3397}
3542 3398
3543static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr, 3399static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe,
3544 struct mlx4_ib_qp *qp, unsigned *lso_seg_len, 3400 const struct ib_ud_wr *wr, struct mlx4_ib_qp *qp,
3545 __be32 *lso_hdr_sz, __be32 *blh) 3401 unsigned *lso_seg_len, __be32 *lso_hdr_sz, __be32 *blh)
3546{ 3402{
3547 unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16); 3403 unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16);
3548 3404
@@ -3560,7 +3416,7 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr,
3560 return 0; 3416 return 0;
3561} 3417}
3562 3418
3563static __be32 send_ieth(struct ib_send_wr *wr) 3419static __be32 send_ieth(const struct ib_send_wr *wr)
3564{ 3420{
3565 switch (wr->opcode) { 3421 switch (wr->opcode) {
3566 case IB_WR_SEND_WITH_IMM: 3422 case IB_WR_SEND_WITH_IMM:
@@ -3582,8 +3438,8 @@ static void add_zero_len_inline(void *wqe)
3582 inl->byte_count = cpu_to_be32(1 << 31); 3438 inl->byte_count = cpu_to_be32(1 << 31);
3583} 3439}
3584 3440
3585int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 3441static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3586 struct ib_send_wr **bad_wr) 3442 const struct ib_send_wr **bad_wr, bool drain)
3587{ 3443{
3588 struct mlx4_ib_qp *qp = to_mqp(ibqp); 3444 struct mlx4_ib_qp *qp = to_mqp(ibqp);
3589 void *wqe; 3445 void *wqe;
@@ -3593,7 +3449,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3593 int nreq; 3449 int nreq;
3594 int err = 0; 3450 int err = 0;
3595 unsigned ind; 3451 unsigned ind;
3596 int uninitialized_var(stamp);
3597 int uninitialized_var(size); 3452 int uninitialized_var(size);
3598 unsigned uninitialized_var(seglen); 3453 unsigned uninitialized_var(seglen);
3599 __be32 dummy; 3454 __be32 dummy;
@@ -3623,7 +3478,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3623 } 3478 }
3624 3479
3625 spin_lock_irqsave(&qp->sq.lock, flags); 3480 spin_lock_irqsave(&qp->sq.lock, flags);
3626 if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { 3481 if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR &&
3482 !drain) {
3627 err = -EIO; 3483 err = -EIO;
3628 *bad_wr = wr; 3484 *bad_wr = wr;
3629 nreq = 0; 3485 nreq = 0;
@@ -3865,22 +3721,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3865 ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | 3721 ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
3866 (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh; 3722 (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
3867 3723
3868 stamp = ind + qp->sq_spare_wqes;
3869 ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
3870
3871 /* 3724 /*
3872 * We can improve latency by not stamping the last 3725 * We can improve latency by not stamping the last
3873 * send queue WQE until after ringing the doorbell, so 3726 * send queue WQE until after ringing the doorbell, so
3874 * only stamp here if there are still more WQEs to post. 3727 * only stamp here if there are still more WQEs to post.
3875 *
3876 * Same optimization applies to padding with NOP wqe
3877 * in case of WQE shrinking (used to prevent wrap-around
3878 * in the middle of WR).
3879 */ 3728 */
3880 if (wr->next) { 3729 if (wr->next)
3881 stamp_send_wqe(qp, stamp, size * 16); 3730 stamp_send_wqe(qp, ind + qp->sq_spare_wqes);
3882 ind = pad_wraparound(qp, ind); 3731 ind++;
3883 }
3884 } 3732 }
3885 3733
3886out: 3734out:
@@ -3902,9 +3750,8 @@ out:
3902 */ 3750 */
3903 mmiowb(); 3751 mmiowb();
3904 3752
3905 stamp_send_wqe(qp, stamp, size * 16); 3753 stamp_send_wqe(qp, ind + qp->sq_spare_wqes - 1);
3906 3754
3907 ind = pad_wraparound(qp, ind);
3908 qp->sq_next_wqe = ind; 3755 qp->sq_next_wqe = ind;
3909 } 3756 }
3910 3757
@@ -3913,8 +3760,14 @@ out:
3913 return err; 3760 return err;
3914} 3761}
3915 3762
3916int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 3763int mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3917 struct ib_recv_wr **bad_wr) 3764 const struct ib_send_wr **bad_wr)
3765{
3766 return _mlx4_ib_post_send(ibqp, wr, bad_wr, false);
3767}
3768
3769static int _mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
3770 const struct ib_recv_wr **bad_wr, bool drain)
3918{ 3771{
3919 struct mlx4_ib_qp *qp = to_mqp(ibqp); 3772 struct mlx4_ib_qp *qp = to_mqp(ibqp);
3920 struct mlx4_wqe_data_seg *scat; 3773 struct mlx4_wqe_data_seg *scat;
@@ -3929,7 +3782,8 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
3929 max_gs = qp->rq.max_gs; 3782 max_gs = qp->rq.max_gs;
3930 spin_lock_irqsave(&qp->rq.lock, flags); 3783 spin_lock_irqsave(&qp->rq.lock, flags);
3931 3784
3932 if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { 3785 if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR &&
3786 !drain) {
3933 err = -EIO; 3787 err = -EIO;
3934 *bad_wr = wr; 3788 *bad_wr = wr;
3935 nreq = 0; 3789 nreq = 0;
@@ -4000,6 +3854,12 @@ out:
4000 return err; 3854 return err;
4001} 3855}
4002 3856
3857int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
3858 const struct ib_recv_wr **bad_wr)
3859{
3860 return _mlx4_ib_post_recv(ibqp, wr, bad_wr, false);
3861}
3862
4003static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state) 3863static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state)
4004{ 3864{
4005 switch (mlx4_state) { 3865 switch (mlx4_state) {
@@ -4047,9 +3907,9 @@ static void to_rdma_ah_attr(struct mlx4_ib_dev *ibdev,
4047 u8 port_num = path->sched_queue & 0x40 ? 2 : 1; 3907 u8 port_num = path->sched_queue & 0x40 ? 2 : 1;
4048 3908
4049 memset(ah_attr, 0, sizeof(*ah_attr)); 3909 memset(ah_attr, 0, sizeof(*ah_attr));
4050 ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num);
4051 if (port_num == 0 || port_num > dev->caps.num_ports) 3910 if (port_num == 0 || port_num > dev->caps.num_ports)
4052 return; 3911 return;
3912 ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num);
4053 3913
4054 if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) 3914 if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE)
4055 rdma_ah_set_sl(ah_attr, ((path->sched_queue >> 3) & 0x7) | 3915 rdma_ah_set_sl(ah_attr, ((path->sched_queue >> 3) & 0x7) |
@@ -4465,3 +4325,132 @@ int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
4465 kfree(ib_rwq_ind_tbl); 4325 kfree(ib_rwq_ind_tbl);
4466 return 0; 4326 return 0;
4467} 4327}
4328
4329struct mlx4_ib_drain_cqe {
4330 struct ib_cqe cqe;
4331 struct completion done;
4332};
4333
4334static void mlx4_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
4335{
4336 struct mlx4_ib_drain_cqe *cqe = container_of(wc->wr_cqe,
4337 struct mlx4_ib_drain_cqe,
4338 cqe);
4339
4340 complete(&cqe->done);
4341}
4342
4343/* This function returns only once the drained WR was completed */
4344static void handle_drain_completion(struct ib_cq *cq,
4345 struct mlx4_ib_drain_cqe *sdrain,
4346 struct mlx4_ib_dev *dev)
4347{
4348 struct mlx4_dev *mdev = dev->dev;
4349
4350 if (cq->poll_ctx == IB_POLL_DIRECT) {
4351 while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0)
4352 ib_process_cq_direct(cq, -1);
4353 return;
4354 }
4355
4356 if (mdev->persist->state == MLX4_DEVICE_STATE_INTERNAL_ERROR) {
4357 struct mlx4_ib_cq *mcq = to_mcq(cq);
4358 bool triggered = false;
4359 unsigned long flags;
4360
4361 spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
4362 /* Make sure that the CQ handler won't run if wasn't run yet */
4363 if (!mcq->mcq.reset_notify_added)
4364 mcq->mcq.reset_notify_added = 1;
4365 else
4366 triggered = true;
4367 spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
4368
4369 if (triggered) {
4370 /* Wait for any scheduled/running task to be ended */
4371 switch (cq->poll_ctx) {
4372 case IB_POLL_SOFTIRQ:
4373 irq_poll_disable(&cq->iop);
4374 irq_poll_enable(&cq->iop);
4375 break;
4376 case IB_POLL_WORKQUEUE:
4377 cancel_work_sync(&cq->work);
4378 break;
4379 default:
4380 WARN_ON_ONCE(1);
4381 }
4382 }
4383
4384 /* Run the CQ handler - this makes sure that the drain WR will
4385 * be processed if wasn't processed yet.
4386 */
4387 mcq->mcq.comp(&mcq->mcq);
4388 }
4389
4390 wait_for_completion(&sdrain->done);
4391}
4392
4393void mlx4_ib_drain_sq(struct ib_qp *qp)
4394{
4395 struct ib_cq *cq = qp->send_cq;
4396 struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
4397 struct mlx4_ib_drain_cqe sdrain;
4398 const struct ib_send_wr *bad_swr;
4399 struct ib_rdma_wr swr = {
4400 .wr = {
4401 .next = NULL,
4402 { .wr_cqe = &sdrain.cqe, },
4403 .opcode = IB_WR_RDMA_WRITE,
4404 },
4405 };
4406 int ret;
4407 struct mlx4_ib_dev *dev = to_mdev(qp->device);
4408 struct mlx4_dev *mdev = dev->dev;
4409
4410 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
4411 if (ret && mdev->persist->state != MLX4_DEVICE_STATE_INTERNAL_ERROR) {
4412 WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
4413 return;
4414 }
4415
4416 sdrain.cqe.done = mlx4_ib_drain_qp_done;
4417 init_completion(&sdrain.done);
4418
4419 ret = _mlx4_ib_post_send(qp, &swr.wr, &bad_swr, true);
4420 if (ret) {
4421 WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
4422 return;
4423 }
4424
4425 handle_drain_completion(cq, &sdrain, dev);
4426}
4427
4428void mlx4_ib_drain_rq(struct ib_qp *qp)
4429{
4430 struct ib_cq *cq = qp->recv_cq;
4431 struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
4432 struct mlx4_ib_drain_cqe rdrain;
4433 struct ib_recv_wr rwr = {};
4434 const struct ib_recv_wr *bad_rwr;
4435 int ret;
4436 struct mlx4_ib_dev *dev = to_mdev(qp->device);
4437 struct mlx4_dev *mdev = dev->dev;
4438
4439 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
4440 if (ret && mdev->persist->state != MLX4_DEVICE_STATE_INTERNAL_ERROR) {
4441 WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
4442 return;
4443 }
4444
4445 rwr.wr_cqe = &rdrain.cqe;
4446 rdrain.cqe.done = mlx4_ib_drain_qp_done;
4447 init_completion(&rdrain.done);
4448
4449 ret = _mlx4_ib_post_recv(qp, &rwr, &bad_rwr, true);
4450 if (ret) {
4451 WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
4452 return;
4453 }
4454
4455 handle_drain_completion(cq, &rdrain, dev);
4456}
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index ebee56cbc0e2..3731b31c3653 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -307,8 +307,8 @@ void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)
307 spin_unlock(&srq->lock); 307 spin_unlock(&srq->lock);
308} 308}
309 309
310int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, 310int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
311 struct ib_recv_wr **bad_wr) 311 const struct ib_recv_wr **bad_wr)
312{ 312{
313 struct mlx4_ib_srq *srq = to_msrq(ibsrq); 313 struct mlx4_ib_srq *srq = to_msrq(ibsrq);
314 struct mlx4_wqe_srq_next_seg *next; 314 struct mlx4_wqe_srq_next_seg *next;
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index d42b922bede8..b8e4b15e2674 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -3,3 +3,5 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
3mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o 3mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
4mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o 4mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
5mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o 5mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
6mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o
7mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += flow.o
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index e6bde32a83f3..ffd03bf1a71e 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -37,7 +37,6 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
37 struct rdma_ah_attr *ah_attr) 37 struct rdma_ah_attr *ah_attr)
38{ 38{
39 enum ib_gid_type gid_type; 39 enum ib_gid_type gid_type;
40 int err;
41 40
42 if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { 41 if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
43 const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); 42 const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
@@ -53,18 +52,12 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
53 ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4); 52 ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4);
54 53
55 if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { 54 if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
56 err = mlx5_get_roce_gid_type(dev, ah_attr->port_num, 55 gid_type = ah_attr->grh.sgid_attr->gid_type;
57 ah_attr->grh.sgid_index,
58 &gid_type);
59 if (err)
60 return ERR_PTR(err);
61 56
62 memcpy(ah->av.rmac, ah_attr->roce.dmac, 57 memcpy(ah->av.rmac, ah_attr->roce.dmac,
63 sizeof(ah_attr->roce.dmac)); 58 sizeof(ah_attr->roce.dmac));
64 ah->av.udp_sport = 59 ah->av.udp_sport =
65 mlx5_get_roce_udp_sport(dev, 60 mlx5_get_roce_udp_sport(dev, ah_attr->grh.sgid_attr);
66 rdma_ah_get_port_num(ah_attr),
67 rdma_ah_read_grh(ah_attr)->sgid_index);
68 ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1; 61 ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1;
69 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 62 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
70#define MLX5_ECN_ENABLED BIT(1) 63#define MLX5_ECN_ENABLED BIT(1)
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index ccc0b5d06a7d..c84fef9a8a08 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -185,3 +185,15 @@ int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length)
185 185
186 return err; 186 return err;
187} 187}
188
189int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
190{
191 u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
192 int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
193
194 MLX5_SET(ppcnt_reg, in, local_port, 1);
195
196 MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
197 return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT,
198 0, 0);
199}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 98ea4648c655..88cbb1c41703 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -41,6 +41,7 @@ int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey);
41int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey); 41int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
42int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, 42int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
43 void *out, int out_size); 43 void *out, int out_size);
44int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out);
44int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, 45int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
45 void *in, int in_size); 46 void *in, int in_size);
46int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, 47int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr,
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c
index 985fa2637390..7e4e358a4fd8 100644
--- a/drivers/infiniband/hw/mlx5/cong.c
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -359,9 +359,6 @@ static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
359 int ret; 359 int ret;
360 char lbuf[11]; 360 char lbuf[11];
361 361
362 if (*pos)
363 return 0;
364
365 ret = mlx5_ib_get_cc_params(param->dev, param->port_num, offset, &var); 362 ret = mlx5_ib_get_cc_params(param->dev, param->port_num, offset, &var);
366 if (ret) 363 if (ret)
367 return ret; 364 return ret;
@@ -370,11 +367,7 @@ static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
370 if (ret < 0) 367 if (ret < 0)
371 return ret; 368 return ret;
372 369
373 if (copy_to_user(buf, lbuf, ret)) 370 return simple_read_from_buffer(buf, count, pos, lbuf, ret);
374 return -EFAULT;
375
376 *pos += ret;
377 return ret;
378} 371}
379 372
380static const struct file_operations dbg_cc_fops = { 373static const struct file_operations dbg_cc_fops = {
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index ad39d64b8108..088205d7f1a1 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -1184,7 +1184,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
1184 int err; 1184 int err;
1185 1185
1186 if (!MLX5_CAP_GEN(dev->mdev, cq_moderation)) 1186 if (!MLX5_CAP_GEN(dev->mdev, cq_moderation))
1187 return -ENOSYS; 1187 return -EOPNOTSUPP;
1188 1188
1189 if (cq_period > MLX5_MAX_CQ_PERIOD) 1189 if (cq_period > MLX5_MAX_CQ_PERIOD)
1190 return -EINVAL; 1190 return -EINVAL;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
new file mode 100644
index 000000000000..ac116d63e466
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -0,0 +1,1119 @@
1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/*
3 * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
4 */
5
6#include <rdma/ib_user_verbs.h>
7#include <rdma/ib_verbs.h>
8#include <rdma/uverbs_types.h>
9#include <rdma/uverbs_ioctl.h>
10#include <rdma/mlx5_user_ioctl_cmds.h>
11#include <rdma/ib_umem.h>
12#include <linux/mlx5/driver.h>
13#include <linux/mlx5/fs.h>
14#include "mlx5_ib.h"
15
16#define UVERBS_MODULE_NAME mlx5_ib
17#include <rdma/uverbs_named_ioctl.h>
18
19#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
20struct devx_obj {
21 struct mlx5_core_dev *mdev;
22 u32 obj_id;
23 u32 dinlen; /* destroy inbox length */
24 u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
25};
26
27struct devx_umem {
28 struct mlx5_core_dev *mdev;
29 struct ib_umem *umem;
30 u32 page_offset;
31 int page_shift;
32 int ncont;
33 u32 dinlen;
34 u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
35};
36
37struct devx_umem_reg_cmd {
38 void *in;
39 u32 inlen;
40 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
41};
42
43static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file)
44{
45 return to_mucontext(ib_uverbs_get_ucontext(file));
46}
47
48int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
49{
50 u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
51 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
52 u64 general_obj_types;
53 void *hdr;
54 int err;
55
56 hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr);
57
58 general_obj_types = MLX5_CAP_GEN_64(dev->mdev, general_obj_types);
59 if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) ||
60 !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM))
61 return -EINVAL;
62
63 if (!capable(CAP_NET_RAW))
64 return -EPERM;
65
66 MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
67 MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX);
68
69 err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
70 if (err)
71 return err;
72
73 context->devx_uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
74 return 0;
75}
76
77void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
78 struct mlx5_ib_ucontext *context)
79{
80 u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0};
81 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
82
83 MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
84 MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX);
85 MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, context->devx_uid);
86
87 mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
88}
89
90bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
91{
92 struct devx_obj *devx_obj = obj;
93 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
94
95 switch (opcode) {
96 case MLX5_CMD_OP_DESTROY_TIR:
97 *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
98 *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
99 obj_id);
100 return true;
101
102 case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
103 *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
104 *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
105 table_id);
106 return true;
107 default:
108 return false;
109 }
110}
111
112static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
113{
114 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
115 u32 obj_id;
116
117 switch (opcode) {
118 case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
119 case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
120 obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id);
121 break;
122 case MLX5_CMD_OP_QUERY_MKEY:
123 obj_id = MLX5_GET(query_mkey_in, in, mkey_index);
124 break;
125 case MLX5_CMD_OP_QUERY_CQ:
126 obj_id = MLX5_GET(query_cq_in, in, cqn);
127 break;
128 case MLX5_CMD_OP_MODIFY_CQ:
129 obj_id = MLX5_GET(modify_cq_in, in, cqn);
130 break;
131 case MLX5_CMD_OP_QUERY_SQ:
132 obj_id = MLX5_GET(query_sq_in, in, sqn);
133 break;
134 case MLX5_CMD_OP_MODIFY_SQ:
135 obj_id = MLX5_GET(modify_sq_in, in, sqn);
136 break;
137 case MLX5_CMD_OP_QUERY_RQ:
138 obj_id = MLX5_GET(query_rq_in, in, rqn);
139 break;
140 case MLX5_CMD_OP_MODIFY_RQ:
141 obj_id = MLX5_GET(modify_rq_in, in, rqn);
142 break;
143 case MLX5_CMD_OP_QUERY_RMP:
144 obj_id = MLX5_GET(query_rmp_in, in, rmpn);
145 break;
146 case MLX5_CMD_OP_MODIFY_RMP:
147 obj_id = MLX5_GET(modify_rmp_in, in, rmpn);
148 break;
149 case MLX5_CMD_OP_QUERY_RQT:
150 obj_id = MLX5_GET(query_rqt_in, in, rqtn);
151 break;
152 case MLX5_CMD_OP_MODIFY_RQT:
153 obj_id = MLX5_GET(modify_rqt_in, in, rqtn);
154 break;
155 case MLX5_CMD_OP_QUERY_TIR:
156 obj_id = MLX5_GET(query_tir_in, in, tirn);
157 break;
158 case MLX5_CMD_OP_MODIFY_TIR:
159 obj_id = MLX5_GET(modify_tir_in, in, tirn);
160 break;
161 case MLX5_CMD_OP_QUERY_TIS:
162 obj_id = MLX5_GET(query_tis_in, in, tisn);
163 break;
164 case MLX5_CMD_OP_MODIFY_TIS:
165 obj_id = MLX5_GET(modify_tis_in, in, tisn);
166 break;
167 case MLX5_CMD_OP_QUERY_FLOW_TABLE:
168 obj_id = MLX5_GET(query_flow_table_in, in, table_id);
169 break;
170 case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
171 obj_id = MLX5_GET(modify_flow_table_in, in, table_id);
172 break;
173 case MLX5_CMD_OP_QUERY_FLOW_GROUP:
174 obj_id = MLX5_GET(query_flow_group_in, in, group_id);
175 break;
176 case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
177 obj_id = MLX5_GET(query_fte_in, in, flow_index);
178 break;
179 case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
180 obj_id = MLX5_GET(set_fte_in, in, flow_index);
181 break;
182 case MLX5_CMD_OP_QUERY_Q_COUNTER:
183 obj_id = MLX5_GET(query_q_counter_in, in, counter_set_id);
184 break;
185 case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
186 obj_id = MLX5_GET(query_flow_counter_in, in, flow_counter_id);
187 break;
188 case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
189 obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id);
190 break;
191 case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
192 obj_id = MLX5_GET(query_scheduling_element_in, in,
193 scheduling_element_id);
194 break;
195 case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
196 obj_id = MLX5_GET(modify_scheduling_element_in, in,
197 scheduling_element_id);
198 break;
199 case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
200 obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
201 break;
202 case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
203 obj_id = MLX5_GET(query_l2_table_entry_in, in, table_index);
204 break;
205 case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
206 obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
207 break;
208 case MLX5_CMD_OP_QUERY_QP:
209 obj_id = MLX5_GET(query_qp_in, in, qpn);
210 break;
211 case MLX5_CMD_OP_RST2INIT_QP:
212 obj_id = MLX5_GET(rst2init_qp_in, in, qpn);
213 break;
214 case MLX5_CMD_OP_INIT2RTR_QP:
215 obj_id = MLX5_GET(init2rtr_qp_in, in, qpn);
216 break;
217 case MLX5_CMD_OP_RTR2RTS_QP:
218 obj_id = MLX5_GET(rtr2rts_qp_in, in, qpn);
219 break;
220 case MLX5_CMD_OP_RTS2RTS_QP:
221 obj_id = MLX5_GET(rts2rts_qp_in, in, qpn);
222 break;
223 case MLX5_CMD_OP_SQERR2RTS_QP:
224 obj_id = MLX5_GET(sqerr2rts_qp_in, in, qpn);
225 break;
226 case MLX5_CMD_OP_2ERR_QP:
227 obj_id = MLX5_GET(qp_2err_in, in, qpn);
228 break;
229 case MLX5_CMD_OP_2RST_QP:
230 obj_id = MLX5_GET(qp_2rst_in, in, qpn);
231 break;
232 case MLX5_CMD_OP_QUERY_DCT:
233 obj_id = MLX5_GET(query_dct_in, in, dctn);
234 break;
235 case MLX5_CMD_OP_QUERY_XRQ:
236 obj_id = MLX5_GET(query_xrq_in, in, xrqn);
237 break;
238 case MLX5_CMD_OP_QUERY_XRC_SRQ:
239 obj_id = MLX5_GET(query_xrc_srq_in, in, xrc_srqn);
240 break;
241 case MLX5_CMD_OP_ARM_XRC_SRQ:
242 obj_id = MLX5_GET(arm_xrc_srq_in, in, xrc_srqn);
243 break;
244 case MLX5_CMD_OP_QUERY_SRQ:
245 obj_id = MLX5_GET(query_srq_in, in, srqn);
246 break;
247 case MLX5_CMD_OP_ARM_RQ:
248 obj_id = MLX5_GET(arm_rq_in, in, srq_number);
249 break;
250 case MLX5_CMD_OP_DRAIN_DCT:
251 case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
252 obj_id = MLX5_GET(drain_dct_in, in, dctn);
253 break;
254 case MLX5_CMD_OP_ARM_XRQ:
255 obj_id = MLX5_GET(arm_xrq_in, in, xrqn);
256 break;
257 default:
258 return false;
259 }
260
261 if (obj_id == obj->obj_id)
262 return true;
263
264 return false;
265}
266
267static bool devx_is_obj_create_cmd(const void *in)
268{
269 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
270
271 switch (opcode) {
272 case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
273 case MLX5_CMD_OP_CREATE_MKEY:
274 case MLX5_CMD_OP_CREATE_CQ:
275 case MLX5_CMD_OP_ALLOC_PD:
276 case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
277 case MLX5_CMD_OP_CREATE_RMP:
278 case MLX5_CMD_OP_CREATE_SQ:
279 case MLX5_CMD_OP_CREATE_RQ:
280 case MLX5_CMD_OP_CREATE_RQT:
281 case MLX5_CMD_OP_CREATE_TIR:
282 case MLX5_CMD_OP_CREATE_TIS:
283 case MLX5_CMD_OP_ALLOC_Q_COUNTER:
284 case MLX5_CMD_OP_CREATE_FLOW_TABLE:
285 case MLX5_CMD_OP_CREATE_FLOW_GROUP:
286 case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
287 case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
288 case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
289 case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
290 case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
291 case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
292 case MLX5_CMD_OP_CREATE_QP:
293 case MLX5_CMD_OP_CREATE_SRQ:
294 case MLX5_CMD_OP_CREATE_XRC_SRQ:
295 case MLX5_CMD_OP_CREATE_DCT:
296 case MLX5_CMD_OP_CREATE_XRQ:
297 case MLX5_CMD_OP_ATTACH_TO_MCG:
298 case MLX5_CMD_OP_ALLOC_XRCD:
299 return true;
300 case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
301 {
302 u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
303 if (op_mod == 0)
304 return true;
305 return false;
306 }
307 default:
308 return false;
309 }
310}
311
312static bool devx_is_obj_modify_cmd(const void *in)
313{
314 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
315
316 switch (opcode) {
317 case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
318 case MLX5_CMD_OP_MODIFY_CQ:
319 case MLX5_CMD_OP_MODIFY_RMP:
320 case MLX5_CMD_OP_MODIFY_SQ:
321 case MLX5_CMD_OP_MODIFY_RQ:
322 case MLX5_CMD_OP_MODIFY_RQT:
323 case MLX5_CMD_OP_MODIFY_TIR:
324 case MLX5_CMD_OP_MODIFY_TIS:
325 case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
326 case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
327 case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
328 case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
329 case MLX5_CMD_OP_RST2INIT_QP:
330 case MLX5_CMD_OP_INIT2RTR_QP:
331 case MLX5_CMD_OP_RTR2RTS_QP:
332 case MLX5_CMD_OP_RTS2RTS_QP:
333 case MLX5_CMD_OP_SQERR2RTS_QP:
334 case MLX5_CMD_OP_2ERR_QP:
335 case MLX5_CMD_OP_2RST_QP:
336 case MLX5_CMD_OP_ARM_XRC_SRQ:
337 case MLX5_CMD_OP_ARM_RQ:
338 case MLX5_CMD_OP_DRAIN_DCT:
339 case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
340 case MLX5_CMD_OP_ARM_XRQ:
341 return true;
342 case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
343 {
344 u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
345
346 if (op_mod == 1)
347 return true;
348 return false;
349 }
350 default:
351 return false;
352 }
353}
354
355static bool devx_is_obj_query_cmd(const void *in)
356{
357 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
358
359 switch (opcode) {
360 case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
361 case MLX5_CMD_OP_QUERY_MKEY:
362 case MLX5_CMD_OP_QUERY_CQ:
363 case MLX5_CMD_OP_QUERY_RMP:
364 case MLX5_CMD_OP_QUERY_SQ:
365 case MLX5_CMD_OP_QUERY_RQ:
366 case MLX5_CMD_OP_QUERY_RQT:
367 case MLX5_CMD_OP_QUERY_TIR:
368 case MLX5_CMD_OP_QUERY_TIS:
369 case MLX5_CMD_OP_QUERY_Q_COUNTER:
370 case MLX5_CMD_OP_QUERY_FLOW_TABLE:
371 case MLX5_CMD_OP_QUERY_FLOW_GROUP:
372 case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
373 case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
374 case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
375 case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
376 case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
377 case MLX5_CMD_OP_QUERY_QP:
378 case MLX5_CMD_OP_QUERY_SRQ:
379 case MLX5_CMD_OP_QUERY_XRC_SRQ:
380 case MLX5_CMD_OP_QUERY_DCT:
381 case MLX5_CMD_OP_QUERY_XRQ:
382 return true;
383 default:
384 return false;
385 }
386}
387
388static bool devx_is_general_cmd(void *in)
389{
390 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
391
392 switch (opcode) {
393 case MLX5_CMD_OP_QUERY_HCA_CAP:
394 case MLX5_CMD_OP_QUERY_VPORT_STATE:
395 case MLX5_CMD_OP_QUERY_ADAPTER:
396 case MLX5_CMD_OP_QUERY_ISSI:
397 case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
398 case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
399 case MLX5_CMD_OP_QUERY_VNIC_ENV:
400 case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
401 case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG:
402 case MLX5_CMD_OP_NOP:
403 case MLX5_CMD_OP_QUERY_CONG_STATUS:
404 case MLX5_CMD_OP_QUERY_CONG_PARAMS:
405 case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
406 return true;
407 default:
408 return false;
409 }
410}
411
412static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
413 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
414{
415 struct mlx5_ib_ucontext *c;
416 struct mlx5_ib_dev *dev;
417 int user_vector;
418 int dev_eqn;
419 unsigned int irqn;
420 int err;
421
422 if (uverbs_copy_from(&user_vector, attrs,
423 MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC))
424 return -EFAULT;
425
426 c = devx_ufile2uctx(file);
427 if (IS_ERR(c))
428 return PTR_ERR(c);
429 dev = to_mdev(c->ibucontext.device);
430
431 err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn);
432 if (err < 0)
433 return err;
434
435 if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
436 &dev_eqn, sizeof(dev_eqn)))
437 return -EFAULT;
438
439 return 0;
440}
441
442/*
443 *Security note:
444 * The hardware protection mechanism works like this: Each device object that
445 * is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in
446 * the device specification manual) upon its creation. Then upon doorbell,
447 * hardware fetches the object context for which the doorbell was rang, and
448 * validates that the UAR through which the DB was rang matches the UAR ID
449 * of the object.
450 * If no match the doorbell is silently ignored by the hardware. Of course,
451 * the user cannot ring a doorbell on a UAR that was not mapped to it.
452 * Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command
453 * mailboxes (except tagging them with UID), we expose to the user its UAR
454 * ID, so it can embed it in these objects in the expected specification
455 * format. So the only thing the user can do is hurt itself by creating a
456 * QP/SQ/CQ with a UAR ID other than his, and then in this case other users
457 * may ring a doorbell on its objects.
458 * The consequence of that will be that another user can schedule a QP/SQ
459 * of the buggy user for execution (just insert it to the hardware schedule
460 * queue or arm its CQ for event generation), no further harm is expected.
461 */
462static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
463 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
464{
465 struct mlx5_ib_ucontext *c;
466 struct mlx5_ib_dev *dev;
467 u32 user_idx;
468 s32 dev_idx;
469
470 c = devx_ufile2uctx(file);
471 if (IS_ERR(c))
472 return PTR_ERR(c);
473 dev = to_mdev(c->ibucontext.device);
474
475 if (uverbs_copy_from(&user_idx, attrs,
476 MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX))
477 return -EFAULT;
478
479 dev_idx = bfregn_to_uar_index(dev, &c->bfregi, user_idx, true);
480 if (dev_idx < 0)
481 return dev_idx;
482
483 if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
484 &dev_idx, sizeof(dev_idx)))
485 return -EFAULT;
486
487 return 0;
488}
489
490static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
491 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
492{
493 struct mlx5_ib_ucontext *c;
494 struct mlx5_ib_dev *dev;
495 void *cmd_in = uverbs_attr_get_alloced_ptr(
496 attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN);
497 int cmd_out_len = uverbs_attr_get_len(attrs,
498 MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
499 void *cmd_out;
500 int err;
501
502 c = devx_ufile2uctx(file);
503 if (IS_ERR(c))
504 return PTR_ERR(c);
505 dev = to_mdev(c->ibucontext.device);
506
507 if (!c->devx_uid)
508 return -EPERM;
509
510 /* Only white list of some general HCA commands are allowed for this method. */
511 if (!devx_is_general_cmd(cmd_in))
512 return -EINVAL;
513
514 cmd_out = uverbs_zalloc(attrs, cmd_out_len);
515 if (IS_ERR(cmd_out))
516 return PTR_ERR(cmd_out);
517
518 MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
519 err = mlx5_cmd_exec(dev->mdev, cmd_in,
520 uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
521 cmd_out, cmd_out_len);
522 if (err)
523 return err;
524
525 return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out,
526 cmd_out_len);
527}
528
529static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
530 u32 *dinlen,
531 u32 *obj_id)
532{
533 u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type);
534 u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid);
535
536 *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
537 *dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr);
538
539 MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id);
540 MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid);
541
542 switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) {
543 case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
544 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
545 MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type);
546 break;
547
548 case MLX5_CMD_OP_CREATE_MKEY:
549 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY);
550 break;
551 case MLX5_CMD_OP_CREATE_CQ:
552 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
553 break;
554 case MLX5_CMD_OP_ALLOC_PD:
555 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD);
556 break;
557 case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
558 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
559 MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
560 break;
561 case MLX5_CMD_OP_CREATE_RMP:
562 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP);
563 break;
564 case MLX5_CMD_OP_CREATE_SQ:
565 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ);
566 break;
567 case MLX5_CMD_OP_CREATE_RQ:
568 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ);
569 break;
570 case MLX5_CMD_OP_CREATE_RQT:
571 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT);
572 break;
573 case MLX5_CMD_OP_CREATE_TIR:
574 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR);
575 break;
576 case MLX5_CMD_OP_CREATE_TIS:
577 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS);
578 break;
579 case MLX5_CMD_OP_ALLOC_Q_COUNTER:
580 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
581 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
582 break;
583 case MLX5_CMD_OP_CREATE_FLOW_TABLE:
584 *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in);
585 *obj_id = MLX5_GET(create_flow_table_out, out, table_id);
586 MLX5_SET(destroy_flow_table_in, din, other_vport,
587 MLX5_GET(create_flow_table_in, in, other_vport));
588 MLX5_SET(destroy_flow_table_in, din, vport_number,
589 MLX5_GET(create_flow_table_in, in, vport_number));
590 MLX5_SET(destroy_flow_table_in, din, table_type,
591 MLX5_GET(create_flow_table_in, in, table_type));
592 MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id);
593 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
594 MLX5_CMD_OP_DESTROY_FLOW_TABLE);
595 break;
596 case MLX5_CMD_OP_CREATE_FLOW_GROUP:
597 *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in);
598 *obj_id = MLX5_GET(create_flow_group_out, out, group_id);
599 MLX5_SET(destroy_flow_group_in, din, other_vport,
600 MLX5_GET(create_flow_group_in, in, other_vport));
601 MLX5_SET(destroy_flow_group_in, din, vport_number,
602 MLX5_GET(create_flow_group_in, in, vport_number));
603 MLX5_SET(destroy_flow_group_in, din, table_type,
604 MLX5_GET(create_flow_group_in, in, table_type));
605 MLX5_SET(destroy_flow_group_in, din, table_id,
606 MLX5_GET(create_flow_group_in, in, table_id));
607 MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id);
608 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
609 MLX5_CMD_OP_DESTROY_FLOW_GROUP);
610 break;
611 case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
612 *dinlen = MLX5_ST_SZ_BYTES(delete_fte_in);
613 *obj_id = MLX5_GET(set_fte_in, in, flow_index);
614 MLX5_SET(delete_fte_in, din, other_vport,
615 MLX5_GET(set_fte_in, in, other_vport));
616 MLX5_SET(delete_fte_in, din, vport_number,
617 MLX5_GET(set_fte_in, in, vport_number));
618 MLX5_SET(delete_fte_in, din, table_type,
619 MLX5_GET(set_fte_in, in, table_type));
620 MLX5_SET(delete_fte_in, din, table_id,
621 MLX5_GET(set_fte_in, in, table_id));
622 MLX5_SET(delete_fte_in, din, flow_index, *obj_id);
623 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
624 MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
625 break;
626 case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
627 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
628 MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
629 break;
630 case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
631 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
632 MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
633 break;
634 case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
635 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
636 MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
637 break;
638 case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
639 *dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in);
640 *obj_id = MLX5_GET(create_scheduling_element_out, out,
641 scheduling_element_id);
642 MLX5_SET(destroy_scheduling_element_in, din,
643 scheduling_hierarchy,
644 MLX5_GET(create_scheduling_element_in, in,
645 scheduling_hierarchy));
646 MLX5_SET(destroy_scheduling_element_in, din,
647 scheduling_element_id, *obj_id);
648 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
649 MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
650 break;
651 case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
652 *dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in);
653 *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
654 MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id);
655 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
656 MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
657 break;
658 case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
659 *dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in);
660 *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
661 MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id);
662 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
663 MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
664 break;
665 case MLX5_CMD_OP_CREATE_QP:
666 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP);
667 break;
668 case MLX5_CMD_OP_CREATE_SRQ:
669 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ);
670 break;
671 case MLX5_CMD_OP_CREATE_XRC_SRQ:
672 MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
673 MLX5_CMD_OP_DESTROY_XRC_SRQ);
674 break;
675 case MLX5_CMD_OP_CREATE_DCT:
676 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
677 break;
678 case MLX5_CMD_OP_CREATE_XRQ:
679 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ);
680 break;
681 case MLX5_CMD_OP_ATTACH_TO_MCG:
682 *dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in);
683 MLX5_SET(detach_from_mcg_in, din, qpn,
684 MLX5_GET(attach_to_mcg_in, in, qpn));
685 memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid),
686 MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid),
687 MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid));
688 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
689 break;
690 case MLX5_CMD_OP_ALLOC_XRCD:
691 MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
692 break;
693 default:
694 /* The entry must match to one of the devx_is_obj_create_cmd */
695 WARN_ON(true);
696 break;
697 }
698}
699
700static int devx_obj_cleanup(struct ib_uobject *uobject,
701 enum rdma_remove_reason why)
702{
703 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
704 struct devx_obj *obj = uobject->object;
705 int ret;
706
707 ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
708 if (ib_is_destroy_retryable(ret, why, uobject))
709 return ret;
710
711 kfree(obj);
712 return ret;
713}
714
715static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
716 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
717{
718 void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
719 int cmd_out_len = uverbs_attr_get_len(attrs,
720 MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT);
721 void *cmd_out;
722 struct ib_uobject *uobj = uverbs_attr_get_uobject(
723 attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
724 struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
725 struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
726 struct devx_obj *obj;
727 int err;
728
729 if (!c->devx_uid)
730 return -EPERM;
731
732 if (!devx_is_obj_create_cmd(cmd_in))
733 return -EINVAL;
734
735 cmd_out = uverbs_zalloc(attrs, cmd_out_len);
736 if (IS_ERR(cmd_out))
737 return PTR_ERR(cmd_out);
738
739 obj = kzalloc(sizeof(struct devx_obj), GFP_KERNEL);
740 if (!obj)
741 return -ENOMEM;
742
743 MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
744 err = mlx5_cmd_exec(dev->mdev, cmd_in,
745 uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN),
746 cmd_out, cmd_out_len);
747 if (err)
748 goto obj_free;
749
750 uobj->object = obj;
751 obj->mdev = dev->mdev;
752 devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, &obj->obj_id);
753 WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
754
755 err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
756 if (err)
757 goto obj_free;
758
759 return 0;
760
761obj_free:
762 kfree(obj);
763 return err;
764}
765
766static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
767 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
768{
769 void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN);
770 int cmd_out_len = uverbs_attr_get_len(attrs,
771 MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT);
772 struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
773 MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
774 struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
775 struct devx_obj *obj = uobj->object;
776 void *cmd_out;
777 int err;
778
779 if (!c->devx_uid)
780 return -EPERM;
781
782 if (!devx_is_obj_modify_cmd(cmd_in))
783 return -EINVAL;
784
785 if (!devx_is_valid_obj_id(obj, cmd_in))
786 return -EINVAL;
787
788 cmd_out = uverbs_zalloc(attrs, cmd_out_len);
789 if (IS_ERR(cmd_out))
790 return PTR_ERR(cmd_out);
791
792 MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
793 err = mlx5_cmd_exec(obj->mdev, cmd_in,
794 uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
795 cmd_out, cmd_out_len);
796 if (err)
797 return err;
798
799 return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
800 cmd_out, cmd_out_len);
801}
802
803static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
804 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
805{
806 void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN);
807 int cmd_out_len = uverbs_attr_get_len(attrs,
808 MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT);
809 struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
810 MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
811 struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
812 struct devx_obj *obj = uobj->object;
813 void *cmd_out;
814 int err;
815
816 if (!c->devx_uid)
817 return -EPERM;
818
819 if (!devx_is_obj_query_cmd(cmd_in))
820 return -EINVAL;
821
822 if (!devx_is_valid_obj_id(obj, cmd_in))
823 return -EINVAL;
824
825 cmd_out = uverbs_zalloc(attrs, cmd_out_len);
826 if (IS_ERR(cmd_out))
827 return PTR_ERR(cmd_out);
828
829 MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
830 err = mlx5_cmd_exec(obj->mdev, cmd_in,
831 uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
832 cmd_out, cmd_out_len);
833 if (err)
834 return err;
835
836 return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
837 cmd_out, cmd_out_len);
838}
839
840static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
841 struct uverbs_attr_bundle *attrs,
842 struct devx_umem *obj)
843{
844 u64 addr;
845 size_t size;
846 u32 access;
847 int npages;
848 int err;
849 u32 page_mask;
850
851 if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
852 uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN))
853 return -EFAULT;
854
855 err = uverbs_get_flags32(&access, attrs,
856 MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
857 IB_ACCESS_SUPPORTED);
858 if (err)
859 return err;
860
861 err = ib_check_mr_access(access);
862 if (err)
863 return err;
864
865 obj->umem = ib_umem_get(ucontext, addr, size, access, 0);
866 if (IS_ERR(obj->umem))
867 return PTR_ERR(obj->umem);
868
869 mlx5_ib_cont_pages(obj->umem, obj->umem->address,
870 MLX5_MKEY_PAGE_SHIFT_MASK, &npages,
871 &obj->page_shift, &obj->ncont, NULL);
872
873 if (!npages) {
874 ib_umem_release(obj->umem);
875 return -EINVAL;
876 }
877
878 page_mask = (1 << obj->page_shift) - 1;
879 obj->page_offset = obj->umem->address & page_mask;
880
881 return 0;
882}
883
884static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs,
885 struct devx_umem *obj,
886 struct devx_umem_reg_cmd *cmd)
887{
888 cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) +
889 (MLX5_ST_SZ_BYTES(mtt) * obj->ncont);
890 cmd->in = uverbs_zalloc(attrs, cmd->inlen);
891 return PTR_ERR_OR_ZERO(cmd->in);
892}
893
894static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
895 struct devx_umem *obj,
896 struct devx_umem_reg_cmd *cmd)
897{
898 void *umem;
899 __be64 *mtt;
900
901 umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
902 mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
903
904 MLX5_SET(general_obj_in_cmd_hdr, cmd->in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
905 MLX5_SET(general_obj_in_cmd_hdr, cmd->in, obj_type, MLX5_OBJ_TYPE_UMEM);
906 MLX5_SET64(umem, umem, num_of_mtt, obj->ncont);
907 MLX5_SET(umem, umem, log_page_size, obj->page_shift -
908 MLX5_ADAPTER_PAGE_SHIFT);
909 MLX5_SET(umem, umem, page_offset, obj->page_offset);
910 mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt,
911 (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
912 MLX5_IB_MTT_READ);
913}
914
915static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
916 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
917{
918 struct devx_umem_reg_cmd cmd;
919 struct devx_umem *obj;
920 struct ib_uobject *uobj = uverbs_attr_get_uobject(
921 attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
922 u32 obj_id;
923 struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
924 struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
925 int err;
926
927 if (!c->devx_uid)
928 return -EPERM;
929
930 obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
931 if (!obj)
932 return -ENOMEM;
933
934 err = devx_umem_get(dev, &c->ibucontext, attrs, obj);
935 if (err)
936 goto err_obj_free;
937
938 err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd);
939 if (err)
940 goto err_umem_release;
941
942 devx_umem_reg_cmd_build(dev, obj, &cmd);
943
944 MLX5_SET(general_obj_in_cmd_hdr, cmd.in, uid, c->devx_uid);
945 err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
946 sizeof(cmd.out));
947 if (err)
948 goto err_umem_release;
949
950 obj->mdev = dev->mdev;
951 uobj->object = obj;
952 devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id);
953 err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, sizeof(obj_id));
954 if (err)
955 goto err_umem_destroy;
956
957 return 0;
958
959err_umem_destroy:
960 mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, cmd.out, sizeof(cmd.out));
961err_umem_release:
962 ib_umem_release(obj->umem);
963err_obj_free:
964 kfree(obj);
965 return err;
966}
967
968static int devx_umem_cleanup(struct ib_uobject *uobject,
969 enum rdma_remove_reason why)
970{
971 struct devx_umem *obj = uobject->object;
972 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
973 int err;
974
975 err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
976 if (ib_is_destroy_retryable(err, why, uobject))
977 return err;
978
979 ib_umem_release(obj->umem);
980 kfree(obj);
981 return 0;
982}
983
984DECLARE_UVERBS_NAMED_METHOD(
985 MLX5_IB_METHOD_DEVX_UMEM_REG,
986 UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
987 MLX5_IB_OBJECT_DEVX_UMEM,
988 UVERBS_ACCESS_NEW,
989 UA_MANDATORY),
990 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
991 UVERBS_ATTR_TYPE(u64),
992 UA_MANDATORY),
993 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
994 UVERBS_ATTR_TYPE(u64),
995 UA_MANDATORY),
996 UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
997 enum ib_access_flags),
998 UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
999 UVERBS_ATTR_TYPE(u32),
1000 UA_MANDATORY));
1001
1002DECLARE_UVERBS_NAMED_METHOD_DESTROY(
1003 MLX5_IB_METHOD_DEVX_UMEM_DEREG,
1004 UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE,
1005 MLX5_IB_OBJECT_DEVX_UMEM,
1006 UVERBS_ACCESS_DESTROY,
1007 UA_MANDATORY));
1008
1009DECLARE_UVERBS_NAMED_METHOD(
1010 MLX5_IB_METHOD_DEVX_QUERY_EQN,
1011 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC,
1012 UVERBS_ATTR_TYPE(u32),
1013 UA_MANDATORY),
1014 UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
1015 UVERBS_ATTR_TYPE(u32),
1016 UA_MANDATORY));
1017
1018DECLARE_UVERBS_NAMED_METHOD(
1019 MLX5_IB_METHOD_DEVX_QUERY_UAR,
1020 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX,
1021 UVERBS_ATTR_TYPE(u32),
1022 UA_MANDATORY),
1023 UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
1024 UVERBS_ATTR_TYPE(u32),
1025 UA_MANDATORY));
1026
1027DECLARE_UVERBS_NAMED_METHOD(
1028 MLX5_IB_METHOD_DEVX_OTHER,
1029 UVERBS_ATTR_PTR_IN(
1030 MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
1031 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
1032 UA_MANDATORY,
1033 UA_ALLOC_AND_COPY),
1034 UVERBS_ATTR_PTR_OUT(
1035 MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
1036 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
1037 UA_MANDATORY));
1038
1039DECLARE_UVERBS_NAMED_METHOD(
1040 MLX5_IB_METHOD_DEVX_OBJ_CREATE,
1041 UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE,
1042 MLX5_IB_OBJECT_DEVX_OBJ,
1043 UVERBS_ACCESS_NEW,
1044 UA_MANDATORY),
1045 UVERBS_ATTR_PTR_IN(
1046 MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
1047 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
1048 UA_MANDATORY,
1049 UA_ALLOC_AND_COPY),
1050 UVERBS_ATTR_PTR_OUT(
1051 MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
1052 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
1053 UA_MANDATORY));
1054
1055DECLARE_UVERBS_NAMED_METHOD_DESTROY(
1056 MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
1057 UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE,
1058 MLX5_IB_OBJECT_DEVX_OBJ,
1059 UVERBS_ACCESS_DESTROY,
1060 UA_MANDATORY));
1061
1062DECLARE_UVERBS_NAMED_METHOD(
1063 MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
1064 UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
1065 MLX5_IB_OBJECT_DEVX_OBJ,
1066 UVERBS_ACCESS_WRITE,
1067 UA_MANDATORY),
1068 UVERBS_ATTR_PTR_IN(
1069 MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
1070 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
1071 UA_MANDATORY,
1072 UA_ALLOC_AND_COPY),
1073 UVERBS_ATTR_PTR_OUT(
1074 MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
1075 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
1076 UA_MANDATORY));
1077
1078DECLARE_UVERBS_NAMED_METHOD(
1079 MLX5_IB_METHOD_DEVX_OBJ_QUERY,
1080 UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
1081 MLX5_IB_OBJECT_DEVX_OBJ,
1082 UVERBS_ACCESS_READ,
1083 UA_MANDATORY),
1084 UVERBS_ATTR_PTR_IN(
1085 MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
1086 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
1087 UA_MANDATORY,
1088 UA_ALLOC_AND_COPY),
1089 UVERBS_ATTR_PTR_OUT(
1090 MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
1091 UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
1092 UA_MANDATORY));
1093
1094DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
1095 &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
1096 &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
1097 &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN));
1098
1099DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
1100 UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),
1101 &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
1102 &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
1103 &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
1104 &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY));
1105
1106DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
1107 UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup),
1108 &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
1109 &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
1110
1111DECLARE_UVERBS_OBJECT_TREE(devx_objects,
1112 &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX),
1113 &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ),
1114 &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM));
1115
1116const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void)
1117{
1118 return &devx_objects;
1119}
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
new file mode 100644
index 000000000000..1a29f47f836e
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -0,0 +1,252 @@
1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/*
3 * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
4 */
5
6#include <rdma/ib_user_verbs.h>
7#include <rdma/ib_verbs.h>
8#include <rdma/uverbs_types.h>
9#include <rdma/uverbs_ioctl.h>
10#include <rdma/mlx5_user_ioctl_cmds.h>
11#include <rdma/ib_umem.h>
12#include <linux/mlx5/driver.h>
13#include <linux/mlx5/fs.h>
14#include "mlx5_ib.h"
15
16#define UVERBS_MODULE_NAME mlx5_ib
17#include <rdma/uverbs_named_ioctl.h>
18
19static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
20 [MLX5_IB_FLOW_TYPE_NORMAL] = {
21 .type = UVERBS_ATTR_TYPE_PTR_IN,
22 .u.ptr = {
23 .len = sizeof(u16), /* data is priority */
24 .min_len = sizeof(u16),
25 }
26 },
27 [MLX5_IB_FLOW_TYPE_SNIFFER] = {
28 .type = UVERBS_ATTR_TYPE_PTR_IN,
29 UVERBS_ATTR_NO_DATA(),
30 },
31 [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
32 .type = UVERBS_ATTR_TYPE_PTR_IN,
33 UVERBS_ATTR_NO_DATA(),
34 },
35 [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
36 .type = UVERBS_ATTR_TYPE_PTR_IN,
37 UVERBS_ATTR_NO_DATA(),
38 },
39};
40
41static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
42 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
43{
44 struct mlx5_ib_flow_handler *flow_handler;
45 struct mlx5_ib_flow_matcher *fs_matcher;
46 void *devx_obj;
47 int dest_id, dest_type;
48 void *cmd_in;
49 int inlen;
50 bool dest_devx, dest_qp;
51 struct ib_qp *qp = NULL;
52 struct ib_uobject *uobj =
53 uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
54 struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
55
56 if (!capable(CAP_NET_RAW))
57 return -EPERM;
58
59 dest_devx =
60 uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
61 dest_qp = uverbs_attr_is_valid(attrs,
62 MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
63
64 if ((dest_devx && dest_qp) || (!dest_devx && !dest_qp))
65 return -EINVAL;
66
67 if (dest_devx) {
68 devx_obj = uverbs_attr_get_obj(
69 attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
70 if (IS_ERR(devx_obj))
71 return PTR_ERR(devx_obj);
72
73 /* Verify that the given DEVX object is a flow
74 * steering destination.
75 */
76 if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type))
77 return -EINVAL;
78 } else {
79 struct mlx5_ib_qp *mqp;
80
81 qp = uverbs_attr_get_obj(attrs,
82 MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
83 if (IS_ERR(qp))
84 return PTR_ERR(qp);
85
86 if (qp->qp_type != IB_QPT_RAW_PACKET)
87 return -EINVAL;
88
89 mqp = to_mqp(qp);
90 if (mqp->flags & MLX5_IB_QP_RSS)
91 dest_id = mqp->rss_qp.tirn;
92 else
93 dest_id = mqp->raw_packet_qp.rq.tirn;
94 dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
95 }
96
97 if (dev->rep)
98 return -ENOTSUPP;
99
100 cmd_in = uverbs_attr_get_alloced_ptr(
101 attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
102 inlen = uverbs_attr_get_len(attrs,
103 MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
104 fs_matcher = uverbs_attr_get_obj(attrs,
105 MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
106 flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, cmd_in, inlen,
107 dest_id, dest_type);
108 if (IS_ERR(flow_handler))
109 return PTR_ERR(flow_handler);
110
111 ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev);
112
113 return 0;
114}
115
116static int flow_matcher_cleanup(struct ib_uobject *uobject,
117 enum rdma_remove_reason why)
118{
119 struct mlx5_ib_flow_matcher *obj = uobject->object;
120 int ret;
121
122 ret = ib_destroy_usecnt(&obj->usecnt, why, uobject);
123 if (ret)
124 return ret;
125
126 kfree(obj);
127 return 0;
128}
129
130static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
131 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
132{
133 struct ib_uobject *uobj = uverbs_attr_get_uobject(
134 attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
135 struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
136 struct mlx5_ib_flow_matcher *obj;
137 int err;
138
139 obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
140 if (!obj)
141 return -ENOMEM;
142
143 obj->mask_len = uverbs_attr_get_len(
144 attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
145 err = uverbs_copy_from(&obj->matcher_mask,
146 attrs,
147 MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
148 if (err)
149 goto end;
150
151 obj->flow_type = uverbs_attr_get_enum_id(
152 attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
153
154 if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
155 err = uverbs_copy_from(&obj->priority,
156 attrs,
157 MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
158 if (err)
159 goto end;
160 }
161
162 err = uverbs_copy_from(&obj->match_criteria_enable,
163 attrs,
164 MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
165 if (err)
166 goto end;
167
168 uobj->object = obj;
169 obj->mdev = dev->mdev;
170 atomic_set(&obj->usecnt, 0);
171 return 0;
172
173end:
174 kfree(obj);
175 return err;
176}
177
178DECLARE_UVERBS_NAMED_METHOD(
179 MLX5_IB_METHOD_CREATE_FLOW,
180 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
181 UVERBS_OBJECT_FLOW,
182 UVERBS_ACCESS_NEW,
183 UA_MANDATORY),
184 UVERBS_ATTR_PTR_IN(
185 MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
186 UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
187 UA_MANDATORY,
188 UA_ALLOC_AND_COPY),
189 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
190 MLX5_IB_OBJECT_FLOW_MATCHER,
191 UVERBS_ACCESS_READ,
192 UA_MANDATORY),
193 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
194 UVERBS_OBJECT_QP,
195 UVERBS_ACCESS_READ),
196 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
197 MLX5_IB_OBJECT_DEVX_OBJ,
198 UVERBS_ACCESS_READ));
199
200DECLARE_UVERBS_NAMED_METHOD_DESTROY(
201 MLX5_IB_METHOD_DESTROY_FLOW,
202 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
203 UVERBS_OBJECT_FLOW,
204 UVERBS_ACCESS_DESTROY,
205 UA_MANDATORY));
206
207ADD_UVERBS_METHODS(mlx5_ib_fs,
208 UVERBS_OBJECT_FLOW,
209 &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
210 &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
211
212DECLARE_UVERBS_NAMED_METHOD(
213 MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
214 UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
215 MLX5_IB_OBJECT_FLOW_MATCHER,
216 UVERBS_ACCESS_NEW,
217 UA_MANDATORY),
218 UVERBS_ATTR_PTR_IN(
219 MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
220 UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
221 UA_MANDATORY),
222 UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
223 mlx5_ib_flow_type,
224 UA_MANDATORY),
225 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
226 UVERBS_ATTR_TYPE(u8),
227 UA_MANDATORY));
228
229DECLARE_UVERBS_NAMED_METHOD_DESTROY(
230 MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
231 UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
232 MLX5_IB_OBJECT_FLOW_MATCHER,
233 UVERBS_ACCESS_DESTROY,
234 UA_MANDATORY));
235
236DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
237 UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
238 &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
239 &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
240
241DECLARE_UVERBS_OBJECT_TREE(flow_objects,
242 &UVERBS_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER));
243
244int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
245{
246 int i = 0;
247
248 root[i++] = &flow_objects;
249 root[i++] = &mlx5_ib_fs;
250
251 return i;
252}
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index 79e6309460dc..4950df3f71b6 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -477,8 +477,8 @@ static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
477 return gsi->tx_qps[qp_index]; 477 return gsi->tx_qps[qp_index];
478} 478}
479 479
480int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, 480int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
481 struct ib_send_wr **bad_wr) 481 const struct ib_send_wr **bad_wr)
482{ 482{
483 struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); 483 struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
484 struct ib_qp *tx_qp; 484 struct ib_qp *tx_qp;
@@ -522,8 +522,8 @@ err:
522 return ret; 522 return ret;
523} 523}
524 524
525int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, 525int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
526 struct ib_recv_wr **bad_wr) 526 const struct ib_recv_wr **bad_wr)
527{ 527{
528 struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); 528 struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
529 529
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index b3ba9a222550..c414f3809e5c 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -419,8 +419,8 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
419 translate_eth_proto_oper(eth_prot_oper, &props->active_speed, 419 translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
420 &props->active_width); 420 &props->active_width);
421 421
422 props->port_cap_flags |= IB_PORT_CM_SUP; 422 props->port_cap_flags |= IB_PORT_CM_SUP;
423 props->port_cap_flags |= IB_PORT_IP_BASED_GIDS; 423 props->ip_gids = true;
424 424
425 props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, 425 props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
426 roce_address_table_size); 426 roce_address_table_size);
@@ -510,12 +510,11 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
510 vlan_id, port_num); 510 vlan_id, port_num);
511} 511}
512 512
513static int mlx5_ib_add_gid(const union ib_gid *gid, 513static int mlx5_ib_add_gid(const struct ib_gid_attr *attr,
514 const struct ib_gid_attr *attr,
515 __always_unused void **context) 514 __always_unused void **context)
516{ 515{
517 return set_roce_addr(to_mdev(attr->device), attr->port_num, 516 return set_roce_addr(to_mdev(attr->device), attr->port_num,
518 attr->index, gid, attr); 517 attr->index, &attr->gid, attr);
519} 518}
520 519
521static int mlx5_ib_del_gid(const struct ib_gid_attr *attr, 520static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
@@ -525,41 +524,15 @@ static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
525 attr->index, NULL, NULL); 524 attr->index, NULL, NULL);
526} 525}
527 526
528__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, 527__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev,
529 int index) 528 const struct ib_gid_attr *attr)
530{ 529{
531 struct ib_gid_attr attr; 530 if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
532 union ib_gid gid;
533
534 if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
535 return 0;
536
537 dev_put(attr.ndev);
538
539 if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
540 return 0; 531 return 0;
541 532
542 return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port)); 533 return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
543} 534}
544 535
545int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
546 int index, enum ib_gid_type *gid_type)
547{
548 struct ib_gid_attr attr;
549 union ib_gid gid;
550 int ret;
551
552 ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr);
553 if (ret)
554 return ret;
555
556 dev_put(attr.ndev);
557
558 *gid_type = attr.gid_type;
559
560 return 0;
561}
562
563static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) 536static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
564{ 537{
565 if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) 538 if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
@@ -915,7 +888,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
915 max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) - 888 max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) -
916 sizeof(struct mlx5_wqe_raddr_seg)) / 889 sizeof(struct mlx5_wqe_raddr_seg)) /
917 sizeof(struct mlx5_wqe_data_seg); 890 sizeof(struct mlx5_wqe_data_seg);
918 props->max_sge = min(max_rq_sg, max_sq_sg); 891 props->max_send_sge = max_sq_sg;
892 props->max_recv_sge = max_rq_sg;
919 props->max_sge_rd = MLX5_MAX_SGE_RD; 893 props->max_sge_rd = MLX5_MAX_SGE_RD;
920 props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); 894 props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
921 props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; 895 props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
@@ -1246,7 +1220,6 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
1246 props->qkey_viol_cntr = rep->qkey_violation_counter; 1220 props->qkey_viol_cntr = rep->qkey_violation_counter;
1247 props->subnet_timeout = rep->subnet_timeout; 1221 props->subnet_timeout = rep->subnet_timeout;
1248 props->init_type_reply = rep->init_type_reply; 1222 props->init_type_reply = rep->init_type_reply;
1249 props->grh_required = rep->grh_required;
1250 1223
1251 err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); 1224 err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
1252 if (err) 1225 if (err)
@@ -1585,31 +1558,26 @@ error:
1585 return err; 1558 return err;
1586} 1559}
1587 1560
1588static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) 1561static void deallocate_uars(struct mlx5_ib_dev *dev,
1562 struct mlx5_ib_ucontext *context)
1589{ 1563{
1590 struct mlx5_bfreg_info *bfregi; 1564 struct mlx5_bfreg_info *bfregi;
1591 int err;
1592 int i; 1565 int i;
1593 1566
1594 bfregi = &context->bfregi; 1567 bfregi = &context->bfregi;
1595 for (i = 0; i < bfregi->num_sys_pages; i++) { 1568 for (i = 0; i < bfregi->num_sys_pages; i++)
1596 if (i < bfregi->num_static_sys_pages || 1569 if (i < bfregi->num_static_sys_pages ||
1597 bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) { 1570 bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX)
1598 err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); 1571 mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
1599 if (err) {
1600 mlx5_ib_warn(dev, "failed to free uar %d, err=%d\n", i, err);
1601 return err;
1602 }
1603 }
1604 }
1605
1606 return 0;
1607} 1572}
1608 1573
1609static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) 1574static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
1610{ 1575{
1611 int err; 1576 int err;
1612 1577
1578 if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
1579 return 0;
1580
1613 err = mlx5_core_alloc_transport_domain(dev->mdev, tdn); 1581 err = mlx5_core_alloc_transport_domain(dev->mdev, tdn);
1614 if (err) 1582 if (err)
1615 return err; 1583 return err;
@@ -1631,6 +1599,9 @@ static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
1631 1599
1632static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn) 1600static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
1633{ 1601{
1602 if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
1603 return;
1604
1634 mlx5_core_dealloc_transport_domain(dev->mdev, tdn); 1605 mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
1635 1606
1636 if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || 1607 if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
@@ -1660,6 +1631,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
1660 int err; 1631 int err;
1661 size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, 1632 size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
1662 max_cqe_version); 1633 max_cqe_version);
1634 u32 dump_fill_mkey;
1663 bool lib_uar_4k; 1635 bool lib_uar_4k;
1664 1636
1665 if (!dev->ib_active) 1637 if (!dev->ib_active)
@@ -1676,8 +1648,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
1676 if (err) 1648 if (err)
1677 return ERR_PTR(err); 1649 return ERR_PTR(err);
1678 1650
1679 if (req.flags) 1651 if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX)
1680 return ERR_PTR(-EINVAL); 1652 return ERR_PTR(-EOPNOTSUPP);
1681 1653
1682 if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2) 1654 if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
1683 return ERR_PTR(-EOPNOTSUPP); 1655 return ERR_PTR(-EOPNOTSUPP);
@@ -1755,10 +1727,26 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
1755 context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; 1727 context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
1756#endif 1728#endif
1757 1729
1758 if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) { 1730 err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
1759 err = mlx5_ib_alloc_transport_domain(dev, &context->tdn); 1731 if (err)
1732 goto out_uars;
1733
1734 if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
1735 /* Block DEVX on Infiniband as of SELinux */
1736 if (mlx5_ib_port_link_layer(ibdev, 1) != IB_LINK_LAYER_ETHERNET) {
1737 err = -EPERM;
1738 goto out_td;
1739 }
1740
1741 err = mlx5_ib_devx_create(dev, context);
1760 if (err) 1742 if (err)
1761 goto out_uars; 1743 goto out_td;
1744 }
1745
1746 if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
1747 err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey);
1748 if (err)
1749 goto out_mdev;
1762 } 1750 }
1763 1751
1764 INIT_LIST_HEAD(&context->vma_private_list); 1752 INIT_LIST_HEAD(&context->vma_private_list);
@@ -1819,9 +1807,18 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
1819 resp.response_length += sizeof(resp.num_dyn_bfregs); 1807 resp.response_length += sizeof(resp.num_dyn_bfregs);
1820 } 1808 }
1821 1809
1810 if (field_avail(typeof(resp), dump_fill_mkey, udata->outlen)) {
1811 if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
1812 resp.dump_fill_mkey = dump_fill_mkey;
1813 resp.comp_mask |=
1814 MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
1815 }
1816 resp.response_length += sizeof(resp.dump_fill_mkey);
1817 }
1818
1822 err = ib_copy_to_udata(udata, &resp, resp.response_length); 1819 err = ib_copy_to_udata(udata, &resp, resp.response_length);
1823 if (err) 1820 if (err)
1824 goto out_td; 1821 goto out_mdev;
1825 1822
1826 bfregi->ver = ver; 1823 bfregi->ver = ver;
1827 bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs; 1824 bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
@@ -1831,9 +1828,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
1831 1828
1832 return &context->ibucontext; 1829 return &context->ibucontext;
1833 1830
1831out_mdev:
1832 if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
1833 mlx5_ib_devx_destroy(dev, context);
1834out_td: 1834out_td:
1835 if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) 1835 mlx5_ib_dealloc_transport_domain(dev, context->tdn);
1836 mlx5_ib_dealloc_transport_domain(dev, context->tdn);
1837 1836
1838out_uars: 1837out_uars:
1839 deallocate_uars(dev, context); 1838 deallocate_uars(dev, context);
@@ -1856,9 +1855,11 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
1856 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 1855 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
1857 struct mlx5_bfreg_info *bfregi; 1856 struct mlx5_bfreg_info *bfregi;
1858 1857
1858 if (context->devx_uid)
1859 mlx5_ib_devx_destroy(dev, context);
1860
1859 bfregi = &context->bfregi; 1861 bfregi = &context->bfregi;
1860 if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) 1862 mlx5_ib_dealloc_transport_domain(dev, context->tdn);
1861 mlx5_ib_dealloc_transport_domain(dev, context->tdn);
1862 1863
1863 deallocate_uars(dev, context); 1864 deallocate_uars(dev, context);
1864 kfree(bfregi->sys_pages); 1865 kfree(bfregi->sys_pages);
@@ -2040,7 +2041,7 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
2040 struct mlx5_bfreg_info *bfregi = &context->bfregi; 2041 struct mlx5_bfreg_info *bfregi = &context->bfregi;
2041 int err; 2042 int err;
2042 unsigned long idx; 2043 unsigned long idx;
2043 phys_addr_t pfn, pa; 2044 phys_addr_t pfn;
2044 pgprot_t prot; 2045 pgprot_t prot;
2045 u32 bfreg_dyn_idx = 0; 2046 u32 bfreg_dyn_idx = 0;
2046 u32 uar_index; 2047 u32 uar_index;
@@ -2131,8 +2132,6 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
2131 goto err; 2132 goto err;
2132 } 2133 }
2133 2134
2134 pa = pfn << PAGE_SHIFT;
2135
2136 err = mlx5_ib_set_vma_data(vma, context); 2135 err = mlx5_ib_set_vma_data(vma, context);
2137 if (err) 2136 if (err)
2138 goto err; 2137 goto err;
@@ -2699,7 +2698,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
2699 IPPROTO_GRE); 2698 IPPROTO_GRE);
2700 2699
2701 MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, 2700 MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
2702 0xffff); 2701 ntohs(ib_spec->gre.mask.protocol));
2703 MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, 2702 MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
2704 ntohs(ib_spec->gre.val.protocol)); 2703 ntohs(ib_spec->gre.val.protocol));
2705 2704
@@ -2979,11 +2978,11 @@ static void counters_clear_description(struct ib_counters *counters)
2979 2978
2980static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) 2979static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
2981{ 2980{
2982 struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
2983 struct mlx5_ib_flow_handler *handler = container_of(flow_id, 2981 struct mlx5_ib_flow_handler *handler = container_of(flow_id,
2984 struct mlx5_ib_flow_handler, 2982 struct mlx5_ib_flow_handler,
2985 ibflow); 2983 ibflow);
2986 struct mlx5_ib_flow_handler *iter, *tmp; 2984 struct mlx5_ib_flow_handler *iter, *tmp;
2985 struct mlx5_ib_dev *dev = handler->dev;
2987 2986
2988 mutex_lock(&dev->flow_db->lock); 2987 mutex_lock(&dev->flow_db->lock);
2989 2988
@@ -3001,6 +3000,8 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
3001 counters_clear_description(handler->ibcounters); 3000 counters_clear_description(handler->ibcounters);
3002 3001
3003 mutex_unlock(&dev->flow_db->lock); 3002 mutex_unlock(&dev->flow_db->lock);
3003 if (handler->flow_matcher)
3004 atomic_dec(&handler->flow_matcher->usecnt);
3004 kfree(handler); 3005 kfree(handler);
3005 3006
3006 return 0; 3007 return 0;
@@ -3021,6 +3022,26 @@ enum flow_table_type {
3021 3022
3022#define MLX5_FS_MAX_TYPES 6 3023#define MLX5_FS_MAX_TYPES 6
3023#define MLX5_FS_MAX_ENTRIES BIT(16) 3024#define MLX5_FS_MAX_ENTRIES BIT(16)
3025
3026static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
3027 struct mlx5_ib_flow_prio *prio,
3028 int priority,
3029 int num_entries, int num_groups)
3030{
3031 struct mlx5_flow_table *ft;
3032
3033 ft = mlx5_create_auto_grouped_flow_table(ns, priority,
3034 num_entries,
3035 num_groups,
3036 0, 0);
3037 if (IS_ERR(ft))
3038 return ERR_CAST(ft);
3039
3040 prio->flow_table = ft;
3041 prio->refcount = 0;
3042 return prio;
3043}
3044
3024static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, 3045static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
3025 struct ib_flow_attr *flow_attr, 3046 struct ib_flow_attr *flow_attr,
3026 enum flow_table_type ft_type) 3047 enum flow_table_type ft_type)
@@ -3033,7 +3054,6 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
3033 int num_entries; 3054 int num_entries;
3034 int num_groups; 3055 int num_groups;
3035 int priority; 3056 int priority;
3036 int err = 0;
3037 3057
3038 max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 3058 max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
3039 log_max_ft_size)); 3059 log_max_ft_size));
@@ -3083,21 +3103,10 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
3083 return ERR_PTR(-ENOMEM); 3103 return ERR_PTR(-ENOMEM);
3084 3104
3085 ft = prio->flow_table; 3105 ft = prio->flow_table;
3086 if (!ft) { 3106 if (!ft)
3087 ft = mlx5_create_auto_grouped_flow_table(ns, priority, 3107 return _get_prio(ns, prio, priority, num_entries, num_groups);
3088 num_entries,
3089 num_groups,
3090 0, 0);
3091
3092 if (!IS_ERR(ft)) {
3093 prio->refcount = 0;
3094 prio->flow_table = ft;
3095 } else {
3096 err = PTR_ERR(ft);
3097 }
3098 }
3099 3108
3100 return err ? ERR_PTR(err) : prio; 3109 return prio;
3101} 3110}
3102 3111
3103static void set_underlay_qp(struct mlx5_ib_dev *dev, 3112static void set_underlay_qp(struct mlx5_ib_dev *dev,
@@ -3356,6 +3365,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
3356 3365
3357 ft_prio->refcount++; 3366 ft_prio->refcount++;
3358 handler->prio = ft_prio; 3367 handler->prio = ft_prio;
3368 handler->dev = dev;
3359 3369
3360 ft_prio->flow_table = ft; 3370 ft_prio->flow_table = ft;
3361free: 3371free:
@@ -3648,6 +3658,189 @@ free_ucmd:
3648 return ERR_PTR(err); 3658 return ERR_PTR(err);
3649} 3659}
3650 3660
3661static struct mlx5_ib_flow_prio *_get_flow_table(struct mlx5_ib_dev *dev,
3662 int priority, bool mcast)
3663{
3664 int max_table_size;
3665 struct mlx5_flow_namespace *ns = NULL;
3666 struct mlx5_ib_flow_prio *prio;
3667
3668 max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
3669 log_max_ft_size));
3670 if (max_table_size < MLX5_FS_MAX_ENTRIES)
3671 return ERR_PTR(-ENOMEM);
3672
3673 if (mcast)
3674 priority = MLX5_IB_FLOW_MCAST_PRIO;
3675 else
3676 priority = ib_prio_to_core_prio(priority, false);
3677
3678 ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS);
3679 if (!ns)
3680 return ERR_PTR(-ENOTSUPP);
3681
3682 prio = &dev->flow_db->prios[priority];
3683
3684 if (prio->flow_table)
3685 return prio;
3686
3687 return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES,
3688 MLX5_FS_MAX_TYPES);
3689}
3690
3691static struct mlx5_ib_flow_handler *
3692_create_raw_flow_rule(struct mlx5_ib_dev *dev,
3693 struct mlx5_ib_flow_prio *ft_prio,
3694 struct mlx5_flow_destination *dst,
3695 struct mlx5_ib_flow_matcher *fs_matcher,
3696 void *cmd_in, int inlen)
3697{
3698 struct mlx5_ib_flow_handler *handler;
3699 struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
3700 struct mlx5_flow_spec *spec;
3701 struct mlx5_flow_table *ft = ft_prio->flow_table;
3702 int err = 0;
3703
3704 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
3705 handler = kzalloc(sizeof(*handler), GFP_KERNEL);
3706 if (!handler || !spec) {
3707 err = -ENOMEM;
3708 goto free;
3709 }
3710
3711 INIT_LIST_HEAD(&handler->list);
3712
3713 memcpy(spec->match_value, cmd_in, inlen);
3714 memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
3715 fs_matcher->mask_len);
3716 spec->match_criteria_enable = fs_matcher->match_criteria_enable;
3717
3718 flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3719 handler->rule = mlx5_add_flow_rules(ft, spec,
3720 &flow_act, dst, 1);
3721
3722 if (IS_ERR(handler->rule)) {
3723 err = PTR_ERR(handler->rule);
3724 goto free;
3725 }
3726
3727 ft_prio->refcount++;
3728 handler->prio = ft_prio;
3729 handler->dev = dev;
3730 ft_prio->flow_table = ft;
3731
3732free:
3733 if (err)
3734 kfree(handler);
3735 kvfree(spec);
3736 return err ? ERR_PTR(err) : handler;
3737}
3738
3739static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
3740 void *match_v)
3741{
3742 void *match_c;
3743 void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
3744 void *dmac, *dmac_mask;
3745 void *ipv4, *ipv4_mask;
3746
3747 if (!(fs_matcher->match_criteria_enable &
3748 (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
3749 return false;
3750
3751 match_c = fs_matcher->matcher_mask.match_params;
3752 match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
3753 outer_headers);
3754 match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
3755 outer_headers);
3756
3757 dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
3758 dmac_47_16);
3759 dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
3760 dmac_47_16);
3761
3762 if (is_multicast_ether_addr(dmac) &&
3763 is_multicast_ether_addr(dmac_mask))
3764 return true;
3765
3766 ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
3767 dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
3768
3769 ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
3770 dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
3771
3772 if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
3773 ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
3774 return true;
3775
3776 return false;
3777}
3778
3779struct mlx5_ib_flow_handler *
3780mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
3781 struct mlx5_ib_flow_matcher *fs_matcher,
3782 void *cmd_in, int inlen, int dest_id,
3783 int dest_type)
3784{
3785 struct mlx5_flow_destination *dst;
3786 struct mlx5_ib_flow_prio *ft_prio;
3787 int priority = fs_matcher->priority;
3788 struct mlx5_ib_flow_handler *handler;
3789 bool mcast;
3790 int err;
3791
3792 if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
3793 return ERR_PTR(-EOPNOTSUPP);
3794
3795 if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
3796 return ERR_PTR(-ENOMEM);
3797
3798 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
3799 if (!dst)
3800 return ERR_PTR(-ENOMEM);
3801
3802 mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
3803 mutex_lock(&dev->flow_db->lock);
3804
3805 ft_prio = _get_flow_table(dev, priority, mcast);
3806 if (IS_ERR(ft_prio)) {
3807 err = PTR_ERR(ft_prio);
3808 goto unlock;
3809 }
3810
3811 if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
3812 dst->type = dest_type;
3813 dst->tir_num = dest_id;
3814 } else {
3815 dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
3816 dst->ft_num = dest_id;
3817 }
3818
3819 handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, cmd_in,
3820 inlen);
3821
3822 if (IS_ERR(handler)) {
3823 err = PTR_ERR(handler);
3824 goto destroy_ft;
3825 }
3826
3827 mutex_unlock(&dev->flow_db->lock);
3828 atomic_inc(&fs_matcher->usecnt);
3829 handler->flow_matcher = fs_matcher;
3830
3831 kfree(dst);
3832
3833 return handler;
3834
3835destroy_ft:
3836 put_flow_table(dev, ft_prio, false);
3837unlock:
3838 mutex_unlock(&dev->flow_db->lock);
3839 kfree(dst);
3840
3841 return ERR_PTR(err);
3842}
3843
3651static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) 3844static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
3652{ 3845{
3653 u32 flags = 0; 3846 u32 flags = 0;
@@ -3672,12 +3865,11 @@ mlx5_ib_create_flow_action_esp(struct ib_device *device,
3672 u64 flags; 3865 u64 flags;
3673 int err = 0; 3866 int err = 0;
3674 3867
3675 if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&action_flags, attrs, 3868 err = uverbs_get_flags64(
3676 MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS))) 3869 &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
3677 return ERR_PTR(-EFAULT); 3870 ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1));
3678 3871 if (err)
3679 if (action_flags >= (MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1)) 3872 return ERR_PTR(err);
3680 return ERR_PTR(-EOPNOTSUPP);
3681 3873
3682 flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); 3874 flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
3683 3875
@@ -4466,7 +4658,8 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr)
4466 cancel_work_sync(&devr->ports[port].pkey_change_work); 4658 cancel_work_sync(&devr->ports[port].pkey_change_work);
4467} 4659}
4468 4660
4469static u32 get_core_cap_flags(struct ib_device *ibdev) 4661static u32 get_core_cap_flags(struct ib_device *ibdev,
4662 struct mlx5_hca_vport_context *rep)
4470{ 4663{
4471 struct mlx5_ib_dev *dev = to_mdev(ibdev); 4664 struct mlx5_ib_dev *dev = to_mdev(ibdev);
4472 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1); 4665 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
@@ -4475,11 +4668,14 @@ static u32 get_core_cap_flags(struct ib_device *ibdev)
4475 bool raw_support = !mlx5_core_mp_enabled(dev->mdev); 4668 bool raw_support = !mlx5_core_mp_enabled(dev->mdev);
4476 u32 ret = 0; 4669 u32 ret = 0;
4477 4670
4671 if (rep->grh_required)
4672 ret |= RDMA_CORE_CAP_IB_GRH_REQUIRED;
4673
4478 if (ll == IB_LINK_LAYER_INFINIBAND) 4674 if (ll == IB_LINK_LAYER_INFINIBAND)
4479 return RDMA_CORE_PORT_IBA_IB; 4675 return ret | RDMA_CORE_PORT_IBA_IB;
4480 4676
4481 if (raw_support) 4677 if (raw_support)
4482 ret = RDMA_CORE_PORT_RAW_PACKET; 4678 ret |= RDMA_CORE_PORT_RAW_PACKET;
4483 4679
4484 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP)) 4680 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
4485 return ret; 4681 return ret;
@@ -4502,17 +4698,23 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
4502 struct ib_port_attr attr; 4698 struct ib_port_attr attr;
4503 struct mlx5_ib_dev *dev = to_mdev(ibdev); 4699 struct mlx5_ib_dev *dev = to_mdev(ibdev);
4504 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num); 4700 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
4701 struct mlx5_hca_vport_context rep = {0};
4505 int err; 4702 int err;
4506 4703
4507 immutable->core_cap_flags = get_core_cap_flags(ibdev);
4508
4509 err = ib_query_port(ibdev, port_num, &attr); 4704 err = ib_query_port(ibdev, port_num, &attr);
4510 if (err) 4705 if (err)
4511 return err; 4706 return err;
4512 4707
4708 if (ll == IB_LINK_LAYER_INFINIBAND) {
4709 err = mlx5_query_hca_vport_context(dev->mdev, 0, port_num, 0,
4710 &rep);
4711 if (err)
4712 return err;
4713 }
4714
4513 immutable->pkey_tbl_len = attr.pkey_tbl_len; 4715 immutable->pkey_tbl_len = attr.pkey_tbl_len;
4514 immutable->gid_tbl_len = attr.gid_tbl_len; 4716 immutable->gid_tbl_len = attr.gid_tbl_len;
4515 immutable->core_cap_flags = get_core_cap_flags(ibdev); 4717 immutable->core_cap_flags = get_core_cap_flags(ibdev, &rep);
4516 if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce)) 4718 if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
4517 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 4719 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
4518 4720
@@ -4610,7 +4812,7 @@ static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
4610 } 4812 }
4611} 4813}
4612 4814
4613static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num) 4815static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
4614{ 4816{
4615 int err; 4817 int err;
4616 4818
@@ -4689,12 +4891,21 @@ static const struct mlx5_ib_counter extended_err_cnts[] = {
4689 INIT_Q_COUNTER(req_cqe_flush_error), 4891 INIT_Q_COUNTER(req_cqe_flush_error),
4690}; 4892};
4691 4893
4894#define INIT_EXT_PPCNT_COUNTER(_name) \
4895 { .name = #_name, .offset = \
4896 MLX5_BYTE_OFF(ppcnt_reg, \
4897 counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
4898
4899static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
4900 INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
4901};
4902
4692static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) 4903static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
4693{ 4904{
4694 int i; 4905 int i;
4695 4906
4696 for (i = 0; i < dev->num_ports; i++) { 4907 for (i = 0; i < dev->num_ports; i++) {
4697 if (dev->port[i].cnts.set_id) 4908 if (dev->port[i].cnts.set_id_valid)
4698 mlx5_core_dealloc_q_counter(dev->mdev, 4909 mlx5_core_dealloc_q_counter(dev->mdev,
4699 dev->port[i].cnts.set_id); 4910 dev->port[i].cnts.set_id);
4700 kfree(dev->port[i].cnts.names); 4911 kfree(dev->port[i].cnts.names);
@@ -4724,7 +4935,10 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
4724 cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); 4935 cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
4725 num_counters += ARRAY_SIZE(cong_cnts); 4936 num_counters += ARRAY_SIZE(cong_cnts);
4726 } 4937 }
4727 4938 if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
4939 cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
4940 num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
4941 }
4728 cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); 4942 cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
4729 if (!cnts->names) 4943 if (!cnts->names)
4730 return -ENOMEM; 4944 return -ENOMEM;
@@ -4781,6 +4995,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
4781 offsets[j] = cong_cnts[i].offset; 4995 offsets[j] = cong_cnts[i].offset;
4782 } 4996 }
4783 } 4997 }
4998
4999 if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
5000 for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
5001 names[j] = ext_ppcnt_cnts[i].name;
5002 offsets[j] = ext_ppcnt_cnts[i].offset;
5003 }
5004 }
4784} 5005}
4785 5006
4786static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) 5007static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
@@ -4826,7 +5047,8 @@ static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
4826 5047
4827 return rdma_alloc_hw_stats_struct(port->cnts.names, 5048 return rdma_alloc_hw_stats_struct(port->cnts.names,
4828 port->cnts.num_q_counters + 5049 port->cnts.num_q_counters +
4829 port->cnts.num_cong_counters, 5050 port->cnts.num_cong_counters +
5051 port->cnts.num_ext_ppcnt_counters,
4830 RDMA_HW_STATS_DEFAULT_LIFESPAN); 5052 RDMA_HW_STATS_DEFAULT_LIFESPAN);
4831} 5053}
4832 5054
@@ -4859,6 +5081,34 @@ free:
4859 return ret; 5081 return ret;
4860} 5082}
4861 5083
5084static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
5085 struct mlx5_ib_port *port,
5086 struct rdma_hw_stats *stats)
5087{
5088 int offset = port->cnts.num_q_counters + port->cnts.num_cong_counters;
5089 int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
5090 int ret, i;
5091 void *out;
5092
5093 out = kvzalloc(sz, GFP_KERNEL);
5094 if (!out)
5095 return -ENOMEM;
5096
5097 ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out);
5098 if (ret)
5099 goto free;
5100
5101 for (i = 0; i < port->cnts.num_ext_ppcnt_counters; i++) {
5102 stats->value[i + offset] =
5103 be64_to_cpup((__be64 *)(out +
5104 port->cnts.offsets[i + offset]));
5105 }
5106
5107free:
5108 kvfree(out);
5109 return ret;
5110}
5111
4862static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, 5112static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
4863 struct rdma_hw_stats *stats, 5113 struct rdma_hw_stats *stats,
4864 u8 port_num, int index) 5114 u8 port_num, int index)
@@ -4872,13 +5122,21 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
4872 if (!stats) 5122 if (!stats)
4873 return -EINVAL; 5123 return -EINVAL;
4874 5124
4875 num_counters = port->cnts.num_q_counters + port->cnts.num_cong_counters; 5125 num_counters = port->cnts.num_q_counters +
5126 port->cnts.num_cong_counters +
5127 port->cnts.num_ext_ppcnt_counters;
4876 5128
4877 /* q_counters are per IB device, query the master mdev */ 5129 /* q_counters are per IB device, query the master mdev */
4878 ret = mlx5_ib_query_q_counters(dev->mdev, port, stats); 5130 ret = mlx5_ib_query_q_counters(dev->mdev, port, stats);
4879 if (ret) 5131 if (ret)
4880 return ret; 5132 return ret;
4881 5133
5134 if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
5135 ret = mlx5_ib_query_ext_ppcnt_counters(dev, port, stats);
5136 if (ret)
5137 return ret;
5138 }
5139
4882 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { 5140 if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
4883 mdev = mlx5_ib_get_native_port_mdev(dev, port_num, 5141 mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
4884 &mdev_port_num); 5142 &mdev_port_num);
@@ -4905,11 +5163,6 @@ done:
4905 return num_counters; 5163 return num_counters;
4906} 5164}
4907 5165
4908static void mlx5_ib_free_rdma_netdev(struct net_device *netdev)
4909{
4910 return mlx5_rdma_netdev_free(netdev);
4911}
4912
4913static struct net_device* 5166static struct net_device*
4914mlx5_ib_alloc_rdma_netdev(struct ib_device *hca, 5167mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
4915 u8 port_num, 5168 u8 port_num,
@@ -4919,17 +5172,12 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
4919 void (*setup)(struct net_device *)) 5172 void (*setup)(struct net_device *))
4920{ 5173{
4921 struct net_device *netdev; 5174 struct net_device *netdev;
4922 struct rdma_netdev *rn;
4923 5175
4924 if (type != RDMA_NETDEV_IPOIB) 5176 if (type != RDMA_NETDEV_IPOIB)
4925 return ERR_PTR(-EOPNOTSUPP); 5177 return ERR_PTR(-EOPNOTSUPP);
4926 5178
4927 netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca, 5179 netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca,
4928 name, setup); 5180 name, setup);
4929 if (likely(!IS_ERR_OR_NULL(netdev))) {
4930 rn = netdev_priv(netdev);
4931 rn->free_rdma_netdev = mlx5_ib_free_rdma_netdev;
4932 }
4933 return netdev; 5181 return netdev;
4934} 5182}
4935 5183
@@ -5127,8 +5375,8 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
5127 5375
5128 spin_lock(&ibdev->port[port_num].mp.mpi_lock); 5376 spin_lock(&ibdev->port[port_num].mp.mpi_lock);
5129 if (ibdev->port[port_num].mp.mpi) { 5377 if (ibdev->port[port_num].mp.mpi) {
5130 mlx5_ib_warn(ibdev, "port %d already affiliated.\n", 5378 mlx5_ib_dbg(ibdev, "port %d already affiliated.\n",
5131 port_num + 1); 5379 port_num + 1);
5132 spin_unlock(&ibdev->port[port_num].mp.mpi_lock); 5380 spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
5133 return false; 5381 return false;
5134 } 5382 }
@@ -5263,45 +5511,47 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
5263 mlx5_nic_vport_disable_roce(dev->mdev); 5511 mlx5_nic_vport_disable_roce(dev->mdev);
5264} 5512}
5265 5513
5266ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_dm, UVERBS_OBJECT_DM, 5514ADD_UVERBS_ATTRIBUTES_SIMPLE(
5267 UVERBS_METHOD_DM_ALLOC, 5515 mlx5_ib_dm,
5268 &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, 5516 UVERBS_OBJECT_DM,
5269 UVERBS_ATTR_TYPE(u64), 5517 UVERBS_METHOD_DM_ALLOC,
5270 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)), 5518 UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
5271 &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, 5519 UVERBS_ATTR_TYPE(u64),
5272 UVERBS_ATTR_TYPE(u16), 5520 UA_MANDATORY),
5273 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); 5521 UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
5274 5522 UVERBS_ATTR_TYPE(u16),
5275ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION, 5523 UA_MANDATORY));
5276 UVERBS_METHOD_FLOW_ACTION_ESP_CREATE, 5524
5277 &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, 5525ADD_UVERBS_ATTRIBUTES_SIMPLE(
5278 UVERBS_ATTR_TYPE(u64), 5526 mlx5_ib_flow_action,
5279 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY))); 5527 UVERBS_OBJECT_FLOW_ACTION,
5528 UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
5529 UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
5530 enum mlx5_ib_uapi_flow_action_flags));
5280 5531
5281#define NUM_TREES 2
5282static int populate_specs_root(struct mlx5_ib_dev *dev) 5532static int populate_specs_root(struct mlx5_ib_dev *dev)
5283{ 5533{
5284 const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = { 5534 const struct uverbs_object_tree_def **trees = dev->driver_trees;
5285 uverbs_default_get_objects()}; 5535 size_t num_trees = 0;
5286 size_t num_trees = 1;
5287 5536
5288 if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE && 5537 if (mlx5_accel_ipsec_device_caps(dev->mdev) &
5289 !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) 5538 MLX5_ACCEL_IPSEC_CAP_DEVICE)
5290 default_root[num_trees++] = &mlx5_ib_flow_action; 5539 trees[num_trees++] = &mlx5_ib_flow_action;
5291 5540
5292 if (MLX5_CAP_DEV_MEM(dev->mdev, memic) && 5541 if (MLX5_CAP_DEV_MEM(dev->mdev, memic))
5293 !WARN_ON(num_trees >= ARRAY_SIZE(default_root))) 5542 trees[num_trees++] = &mlx5_ib_dm;
5294 default_root[num_trees++] = &mlx5_ib_dm;
5295 5543
5296 dev->ib_dev.specs_root = 5544 if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
5297 uverbs_alloc_spec_tree(num_trees, default_root); 5545 MLX5_GENERAL_OBJ_TYPES_CAP_UCTX)
5546 trees[num_trees++] = mlx5_ib_get_devx_tree();
5298 5547
5299 return PTR_ERR_OR_ZERO(dev->ib_dev.specs_root); 5548 num_trees += mlx5_ib_get_flow_trees(trees + num_trees);
5300}
5301 5549
5302static void depopulate_specs_root(struct mlx5_ib_dev *dev) 5550 WARN_ON(num_trees >= ARRAY_SIZE(dev->driver_trees));
5303{ 5551 trees[num_trees] = NULL;
5304 uverbs_free_spec_tree(dev->ib_dev.specs_root); 5552 dev->ib_dev.driver_specs = trees;
5553
5554 return 0;
5305} 5555}
5306 5556
5307static int mlx5_ib_read_counters(struct ib_counters *counters, 5557static int mlx5_ib_read_counters(struct ib_counters *counters,
@@ -5552,6 +5802,8 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
5552 dev->ib_dev.modify_qp = mlx5_ib_modify_qp; 5802 dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
5553 dev->ib_dev.query_qp = mlx5_ib_query_qp; 5803 dev->ib_dev.query_qp = mlx5_ib_query_qp;
5554 dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; 5804 dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
5805 dev->ib_dev.drain_sq = mlx5_ib_drain_sq;
5806 dev->ib_dev.drain_rq = mlx5_ib_drain_rq;
5555 dev->ib_dev.post_send = mlx5_ib_post_send; 5807 dev->ib_dev.post_send = mlx5_ib_post_send;
5556 dev->ib_dev.post_recv = mlx5_ib_post_recv; 5808 dev->ib_dev.post_recv = mlx5_ib_post_recv;
5557 dev->ib_dev.create_cq = mlx5_ib_create_cq; 5809 dev->ib_dev.create_cq = mlx5_ib_create_cq;
@@ -5649,9 +5901,9 @@ int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
5649 return 0; 5901 return 0;
5650} 5902}
5651 5903
5652static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev, 5904static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev)
5653 u8 port_num)
5654{ 5905{
5906 u8 port_num;
5655 int i; 5907 int i;
5656 5908
5657 for (i = 0; i < dev->num_ports; i++) { 5909 for (i = 0; i < dev->num_ports; i++) {
@@ -5674,6 +5926,8 @@ static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev,
5674 (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | 5926 (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
5675 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); 5927 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
5676 5928
5929 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
5930
5677 return mlx5_add_netdev_notifier(dev, port_num); 5931 return mlx5_add_netdev_notifier(dev, port_num);
5678} 5932}
5679 5933
@@ -5690,14 +5944,12 @@ int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
5690 enum rdma_link_layer ll; 5944 enum rdma_link_layer ll;
5691 int port_type_cap; 5945 int port_type_cap;
5692 int err = 0; 5946 int err = 0;
5693 u8 port_num;
5694 5947
5695 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
5696 port_type_cap = MLX5_CAP_GEN(mdev, port_type); 5948 port_type_cap = MLX5_CAP_GEN(mdev, port_type);
5697 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); 5949 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
5698 5950
5699 if (ll == IB_LINK_LAYER_ETHERNET) 5951 if (ll == IB_LINK_LAYER_ETHERNET)
5700 err = mlx5_ib_stage_common_roce_init(dev, port_num); 5952 err = mlx5_ib_stage_common_roce_init(dev);
5701 5953
5702 return err; 5954 return err;
5703} 5955}
@@ -5712,19 +5964,17 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
5712 struct mlx5_core_dev *mdev = dev->mdev; 5964 struct mlx5_core_dev *mdev = dev->mdev;
5713 enum rdma_link_layer ll; 5965 enum rdma_link_layer ll;
5714 int port_type_cap; 5966 int port_type_cap;
5715 u8 port_num;
5716 int err; 5967 int err;
5717 5968
5718 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
5719 port_type_cap = MLX5_CAP_GEN(mdev, port_type); 5969 port_type_cap = MLX5_CAP_GEN(mdev, port_type);
5720 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); 5970 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
5721 5971
5722 if (ll == IB_LINK_LAYER_ETHERNET) { 5972 if (ll == IB_LINK_LAYER_ETHERNET) {
5723 err = mlx5_ib_stage_common_roce_init(dev, port_num); 5973 err = mlx5_ib_stage_common_roce_init(dev);
5724 if (err) 5974 if (err)
5725 return err; 5975 return err;
5726 5976
5727 err = mlx5_enable_eth(dev, port_num); 5977 err = mlx5_enable_eth(dev);
5728 if (err) 5978 if (err)
5729 goto cleanup; 5979 goto cleanup;
5730 } 5980 }
@@ -5741,9 +5991,7 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
5741 struct mlx5_core_dev *mdev = dev->mdev; 5991 struct mlx5_core_dev *mdev = dev->mdev;
5742 enum rdma_link_layer ll; 5992 enum rdma_link_layer ll;
5743 int port_type_cap; 5993 int port_type_cap;
5744 u8 port_num;
5745 5994
5746 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
5747 port_type_cap = MLX5_CAP_GEN(mdev, port_type); 5995 port_type_cap = MLX5_CAP_GEN(mdev, port_type);
5748 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); 5996 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
5749 5997
@@ -5842,11 +6090,6 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
5842 return ib_register_device(&dev->ib_dev, NULL); 6090 return ib_register_device(&dev->ib_dev, NULL);
5843} 6091}
5844 6092
5845static void mlx5_ib_stage_depopulate_specs(struct mlx5_ib_dev *dev)
5846{
5847 depopulate_specs_root(dev);
5848}
5849
5850void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) 6093void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
5851{ 6094{
5852 destroy_umrc_res(dev); 6095 destroy_umrc_res(dev);
@@ -5915,8 +6158,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
5915 ib_dealloc_device((struct ib_device *)dev); 6158 ib_dealloc_device((struct ib_device *)dev);
5916} 6159}
5917 6160
5918static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num);
5919
5920void *__mlx5_ib_add(struct mlx5_ib_dev *dev, 6161void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
5921 const struct mlx5_ib_profile *profile) 6162 const struct mlx5_ib_profile *profile)
5922{ 6163{
@@ -5983,7 +6224,7 @@ static const struct mlx5_ib_profile pf_profile = {
5983 mlx5_ib_stage_pre_ib_reg_umr_cleanup), 6224 mlx5_ib_stage_pre_ib_reg_umr_cleanup),
5984 STAGE_CREATE(MLX5_IB_STAGE_SPECS, 6225 STAGE_CREATE(MLX5_IB_STAGE_SPECS,
5985 mlx5_ib_stage_populate_specs, 6226 mlx5_ib_stage_populate_specs,
5986 mlx5_ib_stage_depopulate_specs), 6227 NULL),
5987 STAGE_CREATE(MLX5_IB_STAGE_IB_REG, 6228 STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
5988 mlx5_ib_stage_ib_reg_init, 6229 mlx5_ib_stage_ib_reg_init,
5989 mlx5_ib_stage_ib_reg_cleanup), 6230 mlx5_ib_stage_ib_reg_cleanup),
@@ -6031,7 +6272,7 @@ static const struct mlx5_ib_profile nic_rep_profile = {
6031 mlx5_ib_stage_pre_ib_reg_umr_cleanup), 6272 mlx5_ib_stage_pre_ib_reg_umr_cleanup),
6032 STAGE_CREATE(MLX5_IB_STAGE_SPECS, 6273 STAGE_CREATE(MLX5_IB_STAGE_SPECS,
6033 mlx5_ib_stage_populate_specs, 6274 mlx5_ib_stage_populate_specs,
6034 mlx5_ib_stage_depopulate_specs), 6275 NULL),
6035 STAGE_CREATE(MLX5_IB_STAGE_IB_REG, 6276 STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
6036 mlx5_ib_stage_ib_reg_init, 6277 mlx5_ib_stage_ib_reg_init,
6037 mlx5_ib_stage_ib_reg_cleanup), 6278 mlx5_ib_stage_ib_reg_cleanup),
@@ -6046,7 +6287,7 @@ static const struct mlx5_ib_profile nic_rep_profile = {
6046 mlx5_ib_stage_rep_reg_cleanup), 6287 mlx5_ib_stage_rep_reg_cleanup),
6047}; 6288};
6048 6289
6049static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num) 6290static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
6050{ 6291{
6051 struct mlx5_ib_multiport_info *mpi; 6292 struct mlx5_ib_multiport_info *mpi;
6052 struct mlx5_ib_dev *dev; 6293 struct mlx5_ib_dev *dev;
@@ -6080,8 +6321,6 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
6080 if (!bound) { 6321 if (!bound) {
6081 list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list); 6322 list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list);
6082 dev_dbg(&mdev->pdev->dev, "no suitable IB device found to bind to, added to unaffiliated list.\n"); 6323 dev_dbg(&mdev->pdev->dev, "no suitable IB device found to bind to, added to unaffiliated list.\n");
6083 } else {
6084 mlx5_ib_dbg(dev, "bound port %u\n", port_num + 1);
6085 } 6324 }
6086 mutex_unlock(&mlx5_ib_multiport_mutex); 6325 mutex_unlock(&mlx5_ib_multiport_mutex);
6087 6326
@@ -6099,11 +6338,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
6099 port_type_cap = MLX5_CAP_GEN(mdev, port_type); 6338 port_type_cap = MLX5_CAP_GEN(mdev, port_type);
6100 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); 6339 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
6101 6340
6102 if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) { 6341 if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET)
6103 u8 port_num = mlx5_core_native_port_num(mdev) - 1; 6342 return mlx5_ib_add_slave_port(mdev);
6104
6105 return mlx5_ib_add_slave_port(mdev, port_num);
6106 }
6107 6343
6108 dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); 6344 dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
6109 if (!dev) 6345 if (!dev)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index d89c8fe626f6..320d4dfe8c2f 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -46,6 +46,7 @@
46#include <rdma/ib_user_verbs.h> 46#include <rdma/ib_user_verbs.h>
47#include <rdma/mlx5-abi.h> 47#include <rdma/mlx5-abi.h>
48#include <rdma/uverbs_ioctl.h> 48#include <rdma/uverbs_ioctl.h>
49#include <rdma/mlx5_user_ioctl_cmds.h>
49 50
50#define mlx5_ib_dbg(dev, format, arg...) \ 51#define mlx5_ib_dbg(dev, format, arg...) \
51pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ 52pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
@@ -78,12 +79,6 @@ enum {
78 MLX5_REQ_SCAT_DATA64_CQE = 0x22, 79 MLX5_REQ_SCAT_DATA64_CQE = 0x22,
79}; 80};
80 81
81enum mlx5_ib_latency_class {
82 MLX5_IB_LATENCY_CLASS_LOW,
83 MLX5_IB_LATENCY_CLASS_MEDIUM,
84 MLX5_IB_LATENCY_CLASS_HIGH,
85};
86
87enum mlx5_ib_mad_ifc_flags { 82enum mlx5_ib_mad_ifc_flags {
88 MLX5_MAD_IFC_IGNORE_MKEY = 1, 83 MLX5_MAD_IFC_IGNORE_MKEY = 1,
89 MLX5_MAD_IFC_IGNORE_BKEY = 2, 84 MLX5_MAD_IFC_IGNORE_BKEY = 2,
@@ -143,6 +138,7 @@ struct mlx5_ib_ucontext {
143 138
144 u64 lib_caps; 139 u64 lib_caps;
145 DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES); 140 DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES);
141 u16 devx_uid;
146}; 142};
147 143
148static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) 144static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -176,6 +172,18 @@ struct mlx5_ib_flow_handler {
176 struct mlx5_ib_flow_prio *prio; 172 struct mlx5_ib_flow_prio *prio;
177 struct mlx5_flow_handle *rule; 173 struct mlx5_flow_handle *rule;
178 struct ib_counters *ibcounters; 174 struct ib_counters *ibcounters;
175 struct mlx5_ib_dev *dev;
176 struct mlx5_ib_flow_matcher *flow_matcher;
177};
178
179struct mlx5_ib_flow_matcher {
180 struct mlx5_ib_match_params matcher_mask;
181 int mask_len;
182 enum mlx5_ib_flow_type flow_type;
183 u16 priority;
184 struct mlx5_core_dev *mdev;
185 atomic_t usecnt;
186 u8 match_criteria_enable;
179}; 187};
180 188
181struct mlx5_ib_flow_db { 189struct mlx5_ib_flow_db {
@@ -461,7 +469,7 @@ struct mlx5_umr_wr {
461 u32 mkey; 469 u32 mkey;
462}; 470};
463 471
464static inline struct mlx5_umr_wr *umr_wr(struct ib_send_wr *wr) 472static inline const struct mlx5_umr_wr *umr_wr(const struct ib_send_wr *wr)
465{ 473{
466 return container_of(wr, struct mlx5_umr_wr, wr); 474 return container_of(wr, struct mlx5_umr_wr, wr);
467} 475}
@@ -665,6 +673,7 @@ struct mlx5_ib_counters {
665 size_t *offsets; 673 size_t *offsets;
666 u32 num_q_counters; 674 u32 num_q_counters;
667 u32 num_cong_counters; 675 u32 num_cong_counters;
676 u32 num_ext_ppcnt_counters;
668 u16 set_id; 677 u16 set_id;
669 bool set_id_valid; 678 bool set_id_valid;
670}; 679};
@@ -851,6 +860,7 @@ to_mcounters(struct ib_counters *ibcntrs)
851 860
852struct mlx5_ib_dev { 861struct mlx5_ib_dev {
853 struct ib_device ib_dev; 862 struct ib_device ib_dev;
863 const struct uverbs_object_tree_def *driver_trees[6];
854 struct mlx5_core_dev *mdev; 864 struct mlx5_core_dev *mdev;
855 struct mlx5_roce roce[MLX5_MAX_PORTS]; 865 struct mlx5_roce roce[MLX5_MAX_PORTS];
856 int num_ports; 866 int num_ports;
@@ -1004,8 +1014,8 @@ int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1004 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); 1014 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
1005int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr); 1015int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
1006int mlx5_ib_destroy_srq(struct ib_srq *srq); 1016int mlx5_ib_destroy_srq(struct ib_srq *srq);
1007int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, 1017int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
1008 struct ib_recv_wr **bad_wr); 1018 const struct ib_recv_wr **bad_wr);
1009struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, 1019struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
1010 struct ib_qp_init_attr *init_attr, 1020 struct ib_qp_init_attr *init_attr,
1011 struct ib_udata *udata); 1021 struct ib_udata *udata);
@@ -1014,10 +1024,12 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1014int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, 1024int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
1015 struct ib_qp_init_attr *qp_init_attr); 1025 struct ib_qp_init_attr *qp_init_attr);
1016int mlx5_ib_destroy_qp(struct ib_qp *qp); 1026int mlx5_ib_destroy_qp(struct ib_qp *qp);
1017int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 1027void mlx5_ib_drain_sq(struct ib_qp *qp);
1018 struct ib_send_wr **bad_wr); 1028void mlx5_ib_drain_rq(struct ib_qp *qp);
1019int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 1029int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
1020 struct ib_recv_wr **bad_wr); 1030 const struct ib_send_wr **bad_wr);
1031int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
1032 const struct ib_recv_wr **bad_wr);
1021void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); 1033void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
1022int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, 1034int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
1023 void *buffer, u32 length, 1035 void *buffer, u32 length,
@@ -1183,10 +1195,8 @@ int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
1183int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, 1195int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
1184 u64 guid, int type); 1196 u64 guid, int type);
1185 1197
1186__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, 1198__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev,
1187 int index); 1199 const struct ib_gid_attr *attr);
1188int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
1189 int index, enum ib_gid_type *gid_type);
1190 1200
1191void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); 1201void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
1192int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); 1202int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
@@ -1200,10 +1210,10 @@ int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
1200int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, 1210int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
1201 int qp_attr_mask, 1211 int qp_attr_mask,
1202 struct ib_qp_init_attr *qp_init_attr); 1212 struct ib_qp_init_attr *qp_init_attr);
1203int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, 1213int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
1204 struct ib_send_wr **bad_wr); 1214 const struct ib_send_wr **bad_wr);
1205int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, 1215int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
1206 struct ib_recv_wr **bad_wr); 1216 const struct ib_recv_wr **bad_wr);
1207void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi); 1217void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi);
1208 1218
1209int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc); 1219int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc);
@@ -1217,6 +1227,36 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev,
1217void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, 1227void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
1218 u8 port_num); 1228 u8 port_num);
1219 1229
1230#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
1231int mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
1232 struct mlx5_ib_ucontext *context);
1233void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
1234 struct mlx5_ib_ucontext *context);
1235const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
1236struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
1237 struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
1238 void *cmd_in, int inlen, int dest_id, int dest_type);
1239bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
1240int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root);
1241#else
1242static inline int
1243mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
1244 struct mlx5_ib_ucontext *context) { return -EOPNOTSUPP; };
1245static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
1246 struct mlx5_ib_ucontext *context) {}
1247static inline const struct uverbs_object_tree_def *
1248mlx5_ib_get_devx_tree(void) { return NULL; }
1249static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
1250 int *dest_type)
1251{
1252 return false;
1253}
1254static inline int
1255mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
1256{
1257 return 0;
1258}
1259#endif
1220static inline void init_query_mad(struct ib_smp *mad) 1260static inline void init_query_mad(struct ib_smp *mad)
1221{ 1261{
1222 mad->base_version = 1; 1262 mad->base_version = 1;
@@ -1318,4 +1358,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev,
1318unsigned long mlx5_ib_get_xlt_emergency_page(void); 1358unsigned long mlx5_ib_get_xlt_emergency_page(void);
1319void mlx5_ib_put_xlt_emergency_page(void); 1359void mlx5_ib_put_xlt_emergency_page(void);
1320 1360
1361int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
1362 struct mlx5_bfreg_info *bfregi, u32 bfregn,
1363 bool dyn_bfreg);
1321#endif /* MLX5_IB_H */ 1364#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 90a9c461cedc..9fb1d9cb9401 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -271,16 +271,16 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
271{ 271{
272 struct mlx5_cache_ent *ent = filp->private_data; 272 struct mlx5_cache_ent *ent = filp->private_data;
273 struct mlx5_ib_dev *dev = ent->dev; 273 struct mlx5_ib_dev *dev = ent->dev;
274 char lbuf[20]; 274 char lbuf[20] = {0};
275 u32 var; 275 u32 var;
276 int err; 276 int err;
277 int c; 277 int c;
278 278
279 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 279 count = min(count, sizeof(lbuf) - 1);
280 if (copy_from_user(lbuf, buf, count))
280 return -EFAULT; 281 return -EFAULT;
281 282
282 c = order2idx(dev, ent->order); 283 c = order2idx(dev, ent->order);
283 lbuf[sizeof(lbuf) - 1] = 0;
284 284
285 if (sscanf(lbuf, "%u", &var) != 1) 285 if (sscanf(lbuf, "%u", &var) != 1)
286 return -EINVAL; 286 return -EINVAL;
@@ -310,19 +310,11 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
310 char lbuf[20]; 310 char lbuf[20];
311 int err; 311 int err;
312 312
313 if (*pos)
314 return 0;
315
316 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); 313 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
317 if (err < 0) 314 if (err < 0)
318 return err; 315 return err;
319 316
320 if (copy_to_user(buf, lbuf, err)) 317 return simple_read_from_buffer(buf, count, pos, lbuf, err);
321 return -EFAULT;
322
323 *pos += err;
324
325 return err;
326} 318}
327 319
328static const struct file_operations size_fops = { 320static const struct file_operations size_fops = {
@@ -337,16 +329,16 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
337{ 329{
338 struct mlx5_cache_ent *ent = filp->private_data; 330 struct mlx5_cache_ent *ent = filp->private_data;
339 struct mlx5_ib_dev *dev = ent->dev; 331 struct mlx5_ib_dev *dev = ent->dev;
340 char lbuf[20]; 332 char lbuf[20] = {0};
341 u32 var; 333 u32 var;
342 int err; 334 int err;
343 int c; 335 int c;
344 336
345 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 337 count = min(count, sizeof(lbuf) - 1);
338 if (copy_from_user(lbuf, buf, count))
346 return -EFAULT; 339 return -EFAULT;
347 340
348 c = order2idx(dev, ent->order); 341 c = order2idx(dev, ent->order);
349 lbuf[sizeof(lbuf) - 1] = 0;
350 342
351 if (sscanf(lbuf, "%u", &var) != 1) 343 if (sscanf(lbuf, "%u", &var) != 1)
352 return -EINVAL; 344 return -EINVAL;
@@ -372,19 +364,11 @@ static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
372 char lbuf[20]; 364 char lbuf[20];
373 int err; 365 int err;
374 366
375 if (*pos)
376 return 0;
377
378 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); 367 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
379 if (err < 0) 368 if (err < 0)
380 return err; 369 return err;
381 370
382 if (copy_to_user(buf, lbuf, err)) 371 return simple_read_from_buffer(buf, count, pos, lbuf, err);
383 return -EFAULT;
384
385 *pos += err;
386
387 return err;
388} 372}
389 373
390static const struct file_operations limit_fops = { 374static const struct file_operations limit_fops = {
@@ -914,7 +898,7 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
914 struct mlx5_umr_wr *umrwr) 898 struct mlx5_umr_wr *umrwr)
915{ 899{
916 struct umr_common *umrc = &dev->umrc; 900 struct umr_common *umrc = &dev->umrc;
917 struct ib_send_wr *bad; 901 const struct ib_send_wr *bad;
918 int err; 902 int err;
919 struct mlx5_ib_umr_context umr_context; 903 struct mlx5_ib_umr_context umr_context;
920 904
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index a4f1f638509f..6cba2a02d11b 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -563,32 +563,21 @@ static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev,
563} 563}
564 564
565static int alloc_bfreg(struct mlx5_ib_dev *dev, 565static int alloc_bfreg(struct mlx5_ib_dev *dev,
566 struct mlx5_bfreg_info *bfregi, 566 struct mlx5_bfreg_info *bfregi)
567 enum mlx5_ib_latency_class lat)
568{ 567{
569 int bfregn = -EINVAL; 568 int bfregn = -ENOMEM;
570 569
571 mutex_lock(&bfregi->lock); 570 mutex_lock(&bfregi->lock);
572 switch (lat) { 571 if (bfregi->ver >= 2) {
573 case MLX5_IB_LATENCY_CLASS_LOW: 572 bfregn = alloc_high_class_bfreg(dev, bfregi);
573 if (bfregn < 0)
574 bfregn = alloc_med_class_bfreg(dev, bfregi);
575 }
576
577 if (bfregn < 0) {
574 BUILD_BUG_ON(NUM_NON_BLUE_FLAME_BFREGS != 1); 578 BUILD_BUG_ON(NUM_NON_BLUE_FLAME_BFREGS != 1);
575 bfregn = 0; 579 bfregn = 0;
576 bfregi->count[bfregn]++; 580 bfregi->count[bfregn]++;
577 break;
578
579 case MLX5_IB_LATENCY_CLASS_MEDIUM:
580 if (bfregi->ver < 2)
581 bfregn = -ENOMEM;
582 else
583 bfregn = alloc_med_class_bfreg(dev, bfregi);
584 break;
585
586 case MLX5_IB_LATENCY_CLASS_HIGH:
587 if (bfregi->ver < 2)
588 bfregn = -ENOMEM;
589 else
590 bfregn = alloc_high_class_bfreg(dev, bfregi);
591 break;
592 } 581 }
593 mutex_unlock(&bfregi->lock); 582 mutex_unlock(&bfregi->lock);
594 583
@@ -641,13 +630,13 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq,
641static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, 630static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
642 struct mlx5_ib_cq *recv_cq); 631 struct mlx5_ib_cq *recv_cq);
643 632
644static int bfregn_to_uar_index(struct mlx5_ib_dev *dev, 633int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
645 struct mlx5_bfreg_info *bfregi, int bfregn, 634 struct mlx5_bfreg_info *bfregi, u32 bfregn,
646 bool dyn_bfreg) 635 bool dyn_bfreg)
647{ 636{
648 int bfregs_per_sys_page; 637 unsigned int bfregs_per_sys_page;
649 int index_of_sys_page; 638 u32 index_of_sys_page;
650 int offset; 639 u32 offset;
651 640
652 bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * 641 bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) *
653 MLX5_NON_FP_BFREGS_PER_UAR; 642 MLX5_NON_FP_BFREGS_PER_UAR;
@@ -655,6 +644,10 @@ static int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
655 644
656 if (dyn_bfreg) { 645 if (dyn_bfreg) {
657 index_of_sys_page += bfregi->num_static_sys_pages; 646 index_of_sys_page += bfregi->num_static_sys_pages;
647
648 if (index_of_sys_page >= bfregi->num_sys_pages)
649 return -EINVAL;
650
658 if (bfregn > bfregi->num_dyn_bfregs || 651 if (bfregn > bfregi->num_dyn_bfregs ||
659 bfregi->sys_pages[index_of_sys_page] == MLX5_IB_INVALID_UAR_INDEX) { 652 bfregi->sys_pages[index_of_sys_page] == MLX5_IB_INVALID_UAR_INDEX) {
660 mlx5_ib_dbg(dev, "Invalid dynamic uar index\n"); 653 mlx5_ib_dbg(dev, "Invalid dynamic uar index\n");
@@ -819,21 +812,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
819 bfregn = MLX5_CROSS_CHANNEL_BFREG; 812 bfregn = MLX5_CROSS_CHANNEL_BFREG;
820 } 813 }
821 else { 814 else {
822 bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_HIGH); 815 bfregn = alloc_bfreg(dev, &context->bfregi);
823 if (bfregn < 0) { 816 if (bfregn < 0)
824 mlx5_ib_dbg(dev, "failed to allocate low latency BFREG\n"); 817 return bfregn;
825 mlx5_ib_dbg(dev, "reverting to medium latency\n");
826 bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_MEDIUM);
827 if (bfregn < 0) {
828 mlx5_ib_dbg(dev, "failed to allocate medium latency BFREG\n");
829 mlx5_ib_dbg(dev, "reverting to high latency\n");
830 bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_LOW);
831 if (bfregn < 0) {
832 mlx5_ib_warn(dev, "bfreg allocation failed\n");
833 return bfregn;
834 }
835 }
836 }
837 } 818 }
838 819
839 mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index); 820 mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index);
@@ -1626,7 +1607,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1626 struct mlx5_ib_resources *devr = &dev->devr; 1607 struct mlx5_ib_resources *devr = &dev->devr;
1627 int inlen = MLX5_ST_SZ_BYTES(create_qp_in); 1608 int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
1628 struct mlx5_core_dev *mdev = dev->mdev; 1609 struct mlx5_core_dev *mdev = dev->mdev;
1629 struct mlx5_ib_create_qp_resp resp; 1610 struct mlx5_ib_create_qp_resp resp = {};
1630 struct mlx5_ib_cq *send_cq; 1611 struct mlx5_ib_cq *send_cq;
1631 struct mlx5_ib_cq *recv_cq; 1612 struct mlx5_ib_cq *recv_cq;
1632 unsigned long flags; 1613 unsigned long flags;
@@ -2555,18 +2536,16 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
2555 if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) { 2536 if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) {
2556 if (!(ah_flags & IB_AH_GRH)) 2537 if (!(ah_flags & IB_AH_GRH))
2557 return -EINVAL; 2538 return -EINVAL;
2558 err = mlx5_get_roce_gid_type(dev, port, grh->sgid_index, 2539
2559 &gid_type);
2560 if (err)
2561 return err;
2562 memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac)); 2540 memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac));
2563 if (qp->ibqp.qp_type == IB_QPT_RC || 2541 if (qp->ibqp.qp_type == IB_QPT_RC ||
2564 qp->ibqp.qp_type == IB_QPT_UC || 2542 qp->ibqp.qp_type == IB_QPT_UC ||
2565 qp->ibqp.qp_type == IB_QPT_XRC_INI || 2543 qp->ibqp.qp_type == IB_QPT_XRC_INI ||
2566 qp->ibqp.qp_type == IB_QPT_XRC_TGT) 2544 qp->ibqp.qp_type == IB_QPT_XRC_TGT)
2567 path->udp_sport = mlx5_get_roce_udp_sport(dev, port, 2545 path->udp_sport =
2568 grh->sgid_index); 2546 mlx5_get_roce_udp_sport(dev, ah->grh.sgid_attr);
2569 path->dci_cfi_prio_sl = (sl & 0x7) << 4; 2547 path->dci_cfi_prio_sl = (sl & 0x7) << 4;
2548 gid_type = ah->grh.sgid_attr->gid_type;
2570 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 2549 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
2571 path->ecn_dscp = (grh->traffic_class >> 2) & 0x3f; 2550 path->ecn_dscp = (grh->traffic_class >> 2) & 0x3f;
2572 } else { 2551 } else {
@@ -3529,7 +3508,7 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
3529} 3508}
3530 3509
3531static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, 3510static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
3532 struct ib_send_wr *wr, void *qend, 3511 const struct ib_send_wr *wr, void *qend,
3533 struct mlx5_ib_qp *qp, int *size) 3512 struct mlx5_ib_qp *qp, int *size)
3534{ 3513{
3535 void *seg = eseg; 3514 void *seg = eseg;
@@ -3582,7 +3561,7 @@ static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
3582} 3561}
3583 3562
3584static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, 3563static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
3585 struct ib_send_wr *wr) 3564 const struct ib_send_wr *wr)
3586{ 3565{
3587 memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av)); 3566 memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
3588 dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV); 3567 dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
@@ -3730,9 +3709,9 @@ static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
3730 3709
3731static int set_reg_umr_segment(struct mlx5_ib_dev *dev, 3710static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
3732 struct mlx5_wqe_umr_ctrl_seg *umr, 3711 struct mlx5_wqe_umr_ctrl_seg *umr,
3733 struct ib_send_wr *wr, int atomic) 3712 const struct ib_send_wr *wr, int atomic)
3734{ 3713{
3735 struct mlx5_umr_wr *umrwr = umr_wr(wr); 3714 const struct mlx5_umr_wr *umrwr = umr_wr(wr);
3736 3715
3737 memset(umr, 0, sizeof(*umr)); 3716 memset(umr, 0, sizeof(*umr));
3738 3717
@@ -3803,9 +3782,10 @@ static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
3803 seg->status = MLX5_MKEY_STATUS_FREE; 3782 seg->status = MLX5_MKEY_STATUS_FREE;
3804} 3783}
3805 3784
3806static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr) 3785static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg,
3786 const struct ib_send_wr *wr)
3807{ 3787{
3808 struct mlx5_umr_wr *umrwr = umr_wr(wr); 3788 const struct mlx5_umr_wr *umrwr = umr_wr(wr);
3809 3789
3810 memset(seg, 0, sizeof(*seg)); 3790 memset(seg, 0, sizeof(*seg));
3811 if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR) 3791 if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
@@ -3854,7 +3834,7 @@ static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp,
3854 seg += mr_list_size; 3834 seg += mr_list_size;
3855} 3835}
3856 3836
3857static __be32 send_ieth(struct ib_send_wr *wr) 3837static __be32 send_ieth(const struct ib_send_wr *wr)
3858{ 3838{
3859 switch (wr->opcode) { 3839 switch (wr->opcode) {
3860 case IB_WR_SEND_WITH_IMM: 3840 case IB_WR_SEND_WITH_IMM:
@@ -3886,7 +3866,7 @@ static u8 wq_sig(void *wqe)
3886 return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4); 3866 return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
3887} 3867}
3888 3868
3889static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr, 3869static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
3890 void *wqe, int *sz) 3870 void *wqe, int *sz)
3891{ 3871{
3892 struct mlx5_wqe_inline_seg *seg; 3872 struct mlx5_wqe_inline_seg *seg;
@@ -4032,7 +4012,7 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
4032 return 0; 4012 return 0;
4033} 4013}
4034 4014
4035static int set_sig_data_segment(struct ib_sig_handover_wr *wr, 4015static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
4036 struct mlx5_ib_qp *qp, void **seg, int *size) 4016 struct mlx5_ib_qp *qp, void **seg, int *size)
4037{ 4017{
4038 struct ib_sig_attrs *sig_attrs = wr->sig_attrs; 4018 struct ib_sig_attrs *sig_attrs = wr->sig_attrs;
@@ -4134,7 +4114,7 @@ static int set_sig_data_segment(struct ib_sig_handover_wr *wr,
4134} 4114}
4135 4115
4136static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, 4116static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
4137 struct ib_sig_handover_wr *wr, u32 size, 4117 const struct ib_sig_handover_wr *wr, u32 size,
4138 u32 length, u32 pdn) 4118 u32 length, u32 pdn)
4139{ 4119{
4140 struct ib_mr *sig_mr = wr->sig_mr; 4120 struct ib_mr *sig_mr = wr->sig_mr;
@@ -4165,10 +4145,10 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
4165} 4145}
4166 4146
4167 4147
4168static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp, 4148static int set_sig_umr_wr(const struct ib_send_wr *send_wr,
4169 void **seg, int *size) 4149 struct mlx5_ib_qp *qp, void **seg, int *size)
4170{ 4150{
4171 struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); 4151 const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
4172 struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr); 4152 struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
4173 u32 pdn = get_pd(qp)->pdn; 4153 u32 pdn = get_pd(qp)->pdn;
4174 u32 xlt_size; 4154 u32 xlt_size;
@@ -4243,7 +4223,7 @@ static int set_psv_wr(struct ib_sig_domain *domain,
4243} 4223}
4244 4224
4245static int set_reg_wr(struct mlx5_ib_qp *qp, 4225static int set_reg_wr(struct mlx5_ib_qp *qp,
4246 struct ib_reg_wr *wr, 4226 const struct ib_reg_wr *wr,
4247 void **seg, int *size) 4227 void **seg, int *size)
4248{ 4228{
4249 struct mlx5_ib_mr *mr = to_mmr(wr->mr); 4229 struct mlx5_ib_mr *mr = to_mmr(wr->mr);
@@ -4314,10 +4294,10 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
4314 } 4294 }
4315} 4295}
4316 4296
4317static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, 4297static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
4318 struct mlx5_wqe_ctrl_seg **ctrl, 4298 struct mlx5_wqe_ctrl_seg **ctrl,
4319 struct ib_send_wr *wr, unsigned *idx, 4299 const struct ib_send_wr *wr, unsigned *idx,
4320 int *size, int nreq) 4300 int *size, int nreq, bool send_signaled, bool solicited)
4321{ 4301{
4322 if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) 4302 if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
4323 return -ENOMEM; 4303 return -ENOMEM;
@@ -4328,10 +4308,8 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
4328 *(uint32_t *)(*seg + 8) = 0; 4308 *(uint32_t *)(*seg + 8) = 0;
4329 (*ctrl)->imm = send_ieth(wr); 4309 (*ctrl)->imm = send_ieth(wr);
4330 (*ctrl)->fm_ce_se = qp->sq_signal_bits | 4310 (*ctrl)->fm_ce_se = qp->sq_signal_bits |
4331 (wr->send_flags & IB_SEND_SIGNALED ? 4311 (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
4332 MLX5_WQE_CTRL_CQ_UPDATE : 0) | 4312 (solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
4333 (wr->send_flags & IB_SEND_SOLICITED ?
4334 MLX5_WQE_CTRL_SOLICITED : 0);
4335 4313
4336 *seg += sizeof(**ctrl); 4314 *seg += sizeof(**ctrl);
4337 *size = sizeof(**ctrl) / 16; 4315 *size = sizeof(**ctrl) / 16;
@@ -4339,6 +4317,16 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
4339 return 0; 4317 return 0;
4340} 4318}
4341 4319
4320static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
4321 struct mlx5_wqe_ctrl_seg **ctrl,
4322 const struct ib_send_wr *wr, unsigned *idx,
4323 int *size, int nreq)
4324{
4325 return __begin_wqe(qp, seg, ctrl, wr, idx, size, nreq,
4326 wr->send_flags & IB_SEND_SIGNALED,
4327 wr->send_flags & IB_SEND_SOLICITED);
4328}
4329
4342static void finish_wqe(struct mlx5_ib_qp *qp, 4330static void finish_wqe(struct mlx5_ib_qp *qp,
4343 struct mlx5_wqe_ctrl_seg *ctrl, 4331 struct mlx5_wqe_ctrl_seg *ctrl,
4344 u8 size, unsigned idx, u64 wr_id, 4332 u8 size, unsigned idx, u64 wr_id,
@@ -4360,9 +4348,8 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
4360 qp->sq.w_list[idx].next = qp->sq.cur_post; 4348 qp->sq.w_list[idx].next = qp->sq.cur_post;
4361} 4349}
4362 4350
4363 4351static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
4364int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 4352 const struct ib_send_wr **bad_wr, bool drain)
4365 struct ib_send_wr **bad_wr)
4366{ 4353{
4367 struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ 4354 struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
4368 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 4355 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
@@ -4393,7 +4380,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
4393 4380
4394 spin_lock_irqsave(&qp->sq.lock, flags); 4381 spin_lock_irqsave(&qp->sq.lock, flags);
4395 4382
4396 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { 4383 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) {
4397 err = -EIO; 4384 err = -EIO;
4398 *bad_wr = wr; 4385 *bad_wr = wr;
4399 nreq = 0; 4386 nreq = 0;
@@ -4498,10 +4485,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
4498 * SET_PSV WQEs are not signaled and solicited 4485 * SET_PSV WQEs are not signaled and solicited
4499 * on error 4486 * on error
4500 */ 4487 */
4501 wr->send_flags &= ~IB_SEND_SIGNALED; 4488 err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
4502 wr->send_flags |= IB_SEND_SOLICITED; 4489 &size, nreq, false, true);
4503 err = begin_wqe(qp, &seg, &ctrl, wr,
4504 &idx, &size, nreq);
4505 if (err) { 4490 if (err) {
4506 mlx5_ib_warn(dev, "\n"); 4491 mlx5_ib_warn(dev, "\n");
4507 err = -ENOMEM; 4492 err = -ENOMEM;
@@ -4520,8 +4505,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
4520 4505
4521 finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, 4506 finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
4522 fence, MLX5_OPCODE_SET_PSV); 4507 fence, MLX5_OPCODE_SET_PSV);
4523 err = begin_wqe(qp, &seg, &ctrl, wr, 4508 err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
4524 &idx, &size, nreq); 4509 &size, nreq, false, true);
4525 if (err) { 4510 if (err) {
4526 mlx5_ib_warn(dev, "\n"); 4511 mlx5_ib_warn(dev, "\n");
4527 err = -ENOMEM; 4512 err = -ENOMEM;
@@ -4690,13 +4675,19 @@ out:
4690 return err; 4675 return err;
4691} 4676}
4692 4677
4678int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
4679 const struct ib_send_wr **bad_wr)
4680{
4681 return _mlx5_ib_post_send(ibqp, wr, bad_wr, false);
4682}
4683
4693static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size) 4684static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size)
4694{ 4685{
4695 sig->signature = calc_sig(sig, size); 4686 sig->signature = calc_sig(sig, size);
4696} 4687}
4697 4688
4698int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 4689static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
4699 struct ib_recv_wr **bad_wr) 4690 const struct ib_recv_wr **bad_wr, bool drain)
4700{ 4691{
4701 struct mlx5_ib_qp *qp = to_mqp(ibqp); 4692 struct mlx5_ib_qp *qp = to_mqp(ibqp);
4702 struct mlx5_wqe_data_seg *scat; 4693 struct mlx5_wqe_data_seg *scat;
@@ -4714,7 +4705,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
4714 4705
4715 spin_lock_irqsave(&qp->rq.lock, flags); 4706 spin_lock_irqsave(&qp->rq.lock, flags);
4716 4707
4717 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { 4708 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) {
4718 err = -EIO; 4709 err = -EIO;
4719 *bad_wr = wr; 4710 *bad_wr = wr;
4720 nreq = 0; 4711 nreq = 0;
@@ -4776,6 +4767,12 @@ out:
4776 return err; 4767 return err;
4777} 4768}
4778 4769
4770int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
4771 const struct ib_recv_wr **bad_wr)
4772{
4773 return _mlx5_ib_post_recv(ibqp, wr, bad_wr, false);
4774}
4775
4779static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state) 4776static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state)
4780{ 4777{
4781 switch (mlx5_state) { 4778 switch (mlx5_state) {
@@ -5365,7 +5362,9 @@ static int set_user_rq_size(struct mlx5_ib_dev *dev,
5365 5362
5366 rwq->wqe_count = ucmd->rq_wqe_count; 5363 rwq->wqe_count = ucmd->rq_wqe_count;
5367 rwq->wqe_shift = ucmd->rq_wqe_shift; 5364 rwq->wqe_shift = ucmd->rq_wqe_shift;
5368 rwq->buf_size = (rwq->wqe_count << rwq->wqe_shift); 5365 if (check_shl_overflow(rwq->wqe_count, rwq->wqe_shift, &rwq->buf_size))
5366 return -EINVAL;
5367
5369 rwq->log_rq_stride = rwq->wqe_shift; 5368 rwq->log_rq_stride = rwq->wqe_shift;
5370 rwq->log_rq_size = ilog2(rwq->wqe_count); 5369 rwq->log_rq_size = ilog2(rwq->wqe_count);
5371 return 0; 5370 return 0;
@@ -5697,3 +5696,132 @@ out:
5697 kvfree(in); 5696 kvfree(in);
5698 return err; 5697 return err;
5699} 5698}
5699
5700struct mlx5_ib_drain_cqe {
5701 struct ib_cqe cqe;
5702 struct completion done;
5703};
5704
5705static void mlx5_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
5706{
5707 struct mlx5_ib_drain_cqe *cqe = container_of(wc->wr_cqe,
5708 struct mlx5_ib_drain_cqe,
5709 cqe);
5710
5711 complete(&cqe->done);
5712}
5713
5714/* This function returns only once the drained WR was completed */
5715static void handle_drain_completion(struct ib_cq *cq,
5716 struct mlx5_ib_drain_cqe *sdrain,
5717 struct mlx5_ib_dev *dev)
5718{
5719 struct mlx5_core_dev *mdev = dev->mdev;
5720
5721 if (cq->poll_ctx == IB_POLL_DIRECT) {
5722 while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0)
5723 ib_process_cq_direct(cq, -1);
5724 return;
5725 }
5726
5727 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
5728 struct mlx5_ib_cq *mcq = to_mcq(cq);
5729 bool triggered = false;
5730 unsigned long flags;
5731
5732 spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
5733 /* Make sure that the CQ handler won't run if wasn't run yet */
5734 if (!mcq->mcq.reset_notify_added)
5735 mcq->mcq.reset_notify_added = 1;
5736 else
5737 triggered = true;
5738 spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
5739
5740 if (triggered) {
5741 /* Wait for any scheduled/running task to be ended */
5742 switch (cq->poll_ctx) {
5743 case IB_POLL_SOFTIRQ:
5744 irq_poll_disable(&cq->iop);
5745 irq_poll_enable(&cq->iop);
5746 break;
5747 case IB_POLL_WORKQUEUE:
5748 cancel_work_sync(&cq->work);
5749 break;
5750 default:
5751 WARN_ON_ONCE(1);
5752 }
5753 }
5754
5755 /* Run the CQ handler - this makes sure that the drain WR will
5756 * be processed if wasn't processed yet.
5757 */
5758 mcq->mcq.comp(&mcq->mcq);
5759 }
5760
5761 wait_for_completion(&sdrain->done);
5762}
5763
5764void mlx5_ib_drain_sq(struct ib_qp *qp)
5765{
5766 struct ib_cq *cq = qp->send_cq;
5767 struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
5768 struct mlx5_ib_drain_cqe sdrain;
5769 const struct ib_send_wr *bad_swr;
5770 struct ib_rdma_wr swr = {
5771 .wr = {
5772 .next = NULL,
5773 { .wr_cqe = &sdrain.cqe, },
5774 .opcode = IB_WR_RDMA_WRITE,
5775 },
5776 };
5777 int ret;
5778 struct mlx5_ib_dev *dev = to_mdev(qp->device);
5779 struct mlx5_core_dev *mdev = dev->mdev;
5780
5781 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
5782 if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
5783 WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
5784 return;
5785 }
5786
5787 sdrain.cqe.done = mlx5_ib_drain_qp_done;
5788 init_completion(&sdrain.done);
5789
5790 ret = _mlx5_ib_post_send(qp, &swr.wr, &bad_swr, true);
5791 if (ret) {
5792 WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
5793 return;
5794 }
5795
5796 handle_drain_completion(cq, &sdrain, dev);
5797}
5798
5799void mlx5_ib_drain_rq(struct ib_qp *qp)
5800{
5801 struct ib_cq *cq = qp->recv_cq;
5802 struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
5803 struct mlx5_ib_drain_cqe rdrain;
5804 struct ib_recv_wr rwr = {};
5805 const struct ib_recv_wr *bad_rwr;
5806 int ret;
5807 struct mlx5_ib_dev *dev = to_mdev(qp->device);
5808 struct mlx5_core_dev *mdev = dev->mdev;
5809
5810 ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
5811 if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
5812 WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
5813 return;
5814 }
5815
5816 rwr.wr_cqe = &rdrain.cqe;
5817 rdrain.cqe.done = mlx5_ib_drain_qp_done;
5818 init_completion(&rdrain.done);
5819
5820 ret = _mlx5_ib_post_recv(qp, &rwr, &bad_rwr, true);
5821 if (ret) {
5822 WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
5823 return;
5824 }
5825
5826 handle_drain_completion(cq, &rdrain, dev);
5827}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index f5de5adc9b1a..d359fecf7a5b 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -446,8 +446,8 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index)
446 spin_unlock(&srq->lock); 446 spin_unlock(&srq->lock);
447} 447}
448 448
449int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, 449int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
450 struct ib_recv_wr **bad_wr) 450 const struct ib_recv_wr **bad_wr)
451{ 451{
452 struct mlx5_ib_srq *srq = to_msrq(ibsrq); 452 struct mlx5_ib_srq *srq = to_msrq(ibsrq);
453 struct mlx5_wqe_srq_next_seg *next; 453 struct mlx5_wqe_srq_next_seg *next;
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index e7f6223e9c60..0823c0bc7e73 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -281,10 +281,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
281 header->grh.flow_label = 281 header->grh.flow_label =
282 ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff); 282 ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff);
283 header->grh.hop_limit = ah->av->hop_limit; 283 header->grh.hop_limit = ah->av->hop_limit;
284 ib_get_cached_gid(&dev->ib_dev, 284 header->grh.source_gid = ah->ibah.sgid_attr->gid;
285 be32_to_cpu(ah->av->port_pd) >> 24,
286 ah->av->gid_index % dev->limits.gid_table_len,
287 &header->grh.source_gid, NULL);
288 memcpy(header->grh.destination_gid.raw, 285 memcpy(header->grh.destination_gid.raw,
289 ah->av->dgid, 16); 286 ah->av->dgid, 16);
290 } 287 }
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 5508afbf1c67..220a3e4717a3 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -519,10 +519,10 @@ int mthca_max_srq_sge(struct mthca_dev *dev);
519void mthca_srq_event(struct mthca_dev *dev, u32 srqn, 519void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
520 enum ib_event_type event_type); 520 enum ib_event_type event_type);
521void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); 521void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
522int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, 522int mthca_tavor_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *wr,
523 struct ib_recv_wr **bad_wr); 523 const struct ib_recv_wr **bad_wr);
524int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, 524int mthca_arbel_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *wr,
525 struct ib_recv_wr **bad_wr); 525 const struct ib_recv_wr **bad_wr);
526 526
527void mthca_qp_event(struct mthca_dev *dev, u32 qpn, 527void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
528 enum ib_event_type event_type); 528 enum ib_event_type event_type);
@@ -530,14 +530,14 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
530 struct ib_qp_init_attr *qp_init_attr); 530 struct ib_qp_init_attr *qp_init_attr);
531int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, 531int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
532 struct ib_udata *udata); 532 struct ib_udata *udata);
533int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 533int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
534 struct ib_send_wr **bad_wr); 534 const struct ib_send_wr **bad_wr);
535int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 535int mthca_tavor_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
536 struct ib_recv_wr **bad_wr); 536 const struct ib_recv_wr **bad_wr);
537int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 537int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
538 struct ib_send_wr **bad_wr); 538 const struct ib_send_wr **bad_wr);
539int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 539int mthca_arbel_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
540 struct ib_recv_wr **bad_wr); 540 const struct ib_recv_wr **bad_wr);
541void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, 541void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
542 int index, int *dbd, __be32 *new_wqe); 542 int index, int *dbd, __be32 *new_wqe);
543int mthca_alloc_qp(struct mthca_dev *dev, 543int mthca_alloc_qp(struct mthca_dev *dev,
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 541f237965c7..0d3473b4596e 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -96,8 +96,9 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr
96 props->page_size_cap = mdev->limits.page_size_cap; 96 props->page_size_cap = mdev->limits.page_size_cap;
97 props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps; 97 props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps;
98 props->max_qp_wr = mdev->limits.max_wqes; 98 props->max_qp_wr = mdev->limits.max_wqes;
99 props->max_sge = mdev->limits.max_sg; 99 props->max_send_sge = mdev->limits.max_sg;
100 props->max_sge_rd = props->max_sge; 100 props->max_recv_sge = mdev->limits.max_sg;
101 props->max_sge_rd = mdev->limits.max_sg;
101 props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs; 102 props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
102 props->max_cqe = mdev->limits.max_cqes; 103 props->max_cqe = mdev->limits.max_cqes;
103 props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws; 104 props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
@@ -448,7 +449,7 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
448 int err; 449 int err;
449 450
450 if (init_attr->srq_type != IB_SRQT_BASIC) 451 if (init_attr->srq_type != IB_SRQT_BASIC)
451 return ERR_PTR(-ENOSYS); 452 return ERR_PTR(-EOPNOTSUPP);
452 453
453 srq = kmalloc(sizeof *srq, GFP_KERNEL); 454 srq = kmalloc(sizeof *srq, GFP_KERNEL);
454 if (!srq) 455 if (!srq)
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index af1c49d70b89..3d37f2373d63 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1488,7 +1488,7 @@ void mthca_free_qp(struct mthca_dev *dev,
1488 1488
1489/* Create UD header for an MLX send and build a data segment for it */ 1489/* Create UD header for an MLX send and build a data segment for it */
1490static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, 1490static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
1491 int ind, struct ib_ud_wr *wr, 1491 int ind, const struct ib_ud_wr *wr,
1492 struct mthca_mlx_seg *mlx, 1492 struct mthca_mlx_seg *mlx,
1493 struct mthca_data_seg *data) 1493 struct mthca_data_seg *data)
1494{ 1494{
@@ -1581,7 +1581,7 @@ static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg,
1581} 1581}
1582 1582
1583static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg, 1583static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
1584 struct ib_atomic_wr *wr) 1584 const struct ib_atomic_wr *wr)
1585{ 1585{
1586 if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 1586 if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
1587 aseg->swap_add = cpu_to_be64(wr->swap); 1587 aseg->swap_add = cpu_to_be64(wr->swap);
@@ -1594,7 +1594,7 @@ static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
1594} 1594}
1595 1595
1596static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg, 1596static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
1597 struct ib_ud_wr *wr) 1597 const struct ib_ud_wr *wr)
1598{ 1598{
1599 useg->lkey = cpu_to_be32(to_mah(wr->ah)->key); 1599 useg->lkey = cpu_to_be32(to_mah(wr->ah)->key);
1600 useg->av_addr = cpu_to_be64(to_mah(wr->ah)->avdma); 1600 useg->av_addr = cpu_to_be64(to_mah(wr->ah)->avdma);
@@ -1604,15 +1604,15 @@ static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
1604} 1604}
1605 1605
1606static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg, 1606static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg,
1607 struct ib_ud_wr *wr) 1607 const struct ib_ud_wr *wr)
1608{ 1608{
1609 memcpy(useg->av, to_mah(wr->ah)->av, MTHCA_AV_SIZE); 1609 memcpy(useg->av, to_mah(wr->ah)->av, MTHCA_AV_SIZE);
1610 useg->dqpn = cpu_to_be32(wr->remote_qpn); 1610 useg->dqpn = cpu_to_be32(wr->remote_qpn);
1611 useg->qkey = cpu_to_be32(wr->remote_qkey); 1611 useg->qkey = cpu_to_be32(wr->remote_qkey);
1612} 1612}
1613 1613
1614int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 1614int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
1615 struct ib_send_wr **bad_wr) 1615 const struct ib_send_wr **bad_wr)
1616{ 1616{
1617 struct mthca_dev *dev = to_mdev(ibqp->device); 1617 struct mthca_dev *dev = to_mdev(ibqp->device);
1618 struct mthca_qp *qp = to_mqp(ibqp); 1618 struct mthca_qp *qp = to_mqp(ibqp);
@@ -1814,8 +1814,8 @@ out:
1814 return err; 1814 return err;
1815} 1815}
1816 1816
1817int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 1817int mthca_tavor_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
1818 struct ib_recv_wr **bad_wr) 1818 const struct ib_recv_wr **bad_wr)
1819{ 1819{
1820 struct mthca_dev *dev = to_mdev(ibqp->device); 1820 struct mthca_dev *dev = to_mdev(ibqp->device);
1821 struct mthca_qp *qp = to_mqp(ibqp); 1821 struct mthca_qp *qp = to_mqp(ibqp);
@@ -1925,8 +1925,8 @@ out:
1925 return err; 1925 return err;
1926} 1926}
1927 1927
1928int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 1928int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
1929 struct ib_send_wr **bad_wr) 1929 const struct ib_send_wr **bad_wr)
1930{ 1930{
1931 struct mthca_dev *dev = to_mdev(ibqp->device); 1931 struct mthca_dev *dev = to_mdev(ibqp->device);
1932 struct mthca_qp *qp = to_mqp(ibqp); 1932 struct mthca_qp *qp = to_mqp(ibqp);
@@ -2165,8 +2165,8 @@ out:
2165 return err; 2165 return err;
2166} 2166}
2167 2167
2168int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 2168int mthca_arbel_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
2169 struct ib_recv_wr **bad_wr) 2169 const struct ib_recv_wr **bad_wr)
2170{ 2170{
2171 struct mthca_dev *dev = to_mdev(ibqp->device); 2171 struct mthca_dev *dev = to_mdev(ibqp->device);
2172 struct mthca_qp *qp = to_mqp(ibqp); 2172 struct mthca_qp *qp = to_mqp(ibqp);
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index f79732bc73b4..9a3fc6fb0d7e 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -472,8 +472,8 @@ void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr)
472 spin_unlock(&srq->lock); 472 spin_unlock(&srq->lock);
473} 473}
474 474
475int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, 475int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
476 struct ib_recv_wr **bad_wr) 476 const struct ib_recv_wr **bad_wr)
477{ 477{
478 struct mthca_dev *dev = to_mdev(ibsrq->device); 478 struct mthca_dev *dev = to_mdev(ibsrq->device);
479 struct mthca_srq *srq = to_msrq(ibsrq); 479 struct mthca_srq *srq = to_msrq(ibsrq);
@@ -572,8 +572,8 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
572 return err; 572 return err;
573} 573}
574 574
575int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, 575int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
576 struct ib_recv_wr **bad_wr) 576 const struct ib_recv_wr **bad_wr)
577{ 577{
578 struct mthca_dev *dev = to_mdev(ibsrq->device); 578 struct mthca_dev *dev = to_mdev(ibsrq->device);
579 struct mthca_srq *srq = to_msrq(ibsrq); 579 struct mthca_srq *srq = to_msrq(ibsrq);
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 00c27291dc26..bedaa02749fb 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -159,7 +159,7 @@ do { \
159 159
160#define NES_EVENT_TIMEOUT 1200000 160#define NES_EVENT_TIMEOUT 1200000
161#else 161#else
162#define nes_debug(level, fmt, args...) 162#define nes_debug(level, fmt, args...) no_printk(fmt, ##args)
163#define assert(expr) do {} while (0) 163#define assert(expr) do {} while (0)
164 164
165#define NES_EVENT_TIMEOUT 100000 165#define NES_EVENT_TIMEOUT 100000
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 6cdfbf8c5674..2b67ace5b614 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -58,6 +58,7 @@
58#include <net/neighbour.h> 58#include <net/neighbour.h>
59#include <net/route.h> 59#include <net/route.h>
60#include <net/ip_fib.h> 60#include <net/ip_fib.h>
61#include <net/secure_seq.h>
61#include <net/tcp.h> 62#include <net/tcp.h>
62#include <linux/fcntl.h> 63#include <linux/fcntl.h>
63 64
@@ -1445,7 +1446,6 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
1445 struct nes_cm_listener *listener) 1446 struct nes_cm_listener *listener)
1446{ 1447{
1447 struct nes_cm_node *cm_node; 1448 struct nes_cm_node *cm_node;
1448 struct timespec ts;
1449 int oldarpindex = 0; 1449 int oldarpindex = 0;
1450 int arpindex = 0; 1450 int arpindex = 0;
1451 struct nes_device *nesdev; 1451 struct nes_device *nesdev;
@@ -1496,8 +1496,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
1496 cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; 1496 cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
1497 cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >> 1497 cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >>
1498 NES_CM_DEFAULT_RCV_WND_SCALE; 1498 NES_CM_DEFAULT_RCV_WND_SCALE;
1499 ts = current_kernel_time(); 1499 cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr),
1500 cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec); 1500 htonl(cm_node->rem_addr),
1501 htons(cm_node->loc_port),
1502 htons(cm_node->rem_port));
1501 cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) - 1503 cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) -
1502 sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN; 1504 sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN;
1503 cm_node->tcp_cntxt.rcv_nxt = 0; 1505 cm_node->tcp_cntxt.rcv_nxt = 0;
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 18a7de1c3923..bd0675d8f298 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -70,8 +70,7 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number);
70static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode); 70static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode);
71static void nes_terminate_start_timer(struct nes_qp *nesqp); 71static void nes_terminate_start_timer(struct nes_qp *nesqp);
72 72
73#ifdef CONFIG_INFINIBAND_NES_DEBUG 73static const char *const nes_iwarp_state_str[] = {
74static unsigned char *nes_iwarp_state_str[] = {
75 "Non-Existent", 74 "Non-Existent",
76 "Idle", 75 "Idle",
77 "RTS", 76 "RTS",
@@ -82,7 +81,7 @@ static unsigned char *nes_iwarp_state_str[] = {
82 "RSVD2", 81 "RSVD2",
83}; 82};
84 83
85static unsigned char *nes_tcp_state_str[] = { 84static const char *const nes_tcp_state_str[] = {
86 "Non-Existent", 85 "Non-Existent",
87 "Closed", 86 "Closed",
88 "Listen", 87 "Listen",
@@ -100,7 +99,6 @@ static unsigned char *nes_tcp_state_str[] = {
100 "RSVD3", 99 "RSVD3",
101 "RSVD4", 100 "RSVD4",
102}; 101};
103#endif
104 102
105static inline void print_ip(struct nes_cm_node *cm_node) 103static inline void print_ip(struct nes_cm_node *cm_node)
106{ 104{
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 32f26556c808..6940c7215961 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -436,7 +436,8 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop
436 props->max_mr_size = 0x80000000; 436 props->max_mr_size = 0x80000000;
437 props->max_qp = nesibdev->max_qp; 437 props->max_qp = nesibdev->max_qp;
438 props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2; 438 props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2;
439 props->max_sge = nesdev->nesadapter->max_sge; 439 props->max_send_sge = nesdev->nesadapter->max_sge;
440 props->max_recv_sge = nesdev->nesadapter->max_sge;
440 props->max_cq = nesibdev->max_cq; 441 props->max_cq = nesibdev->max_cq;
441 props->max_cqe = nesdev->nesadapter->max_cqe; 442 props->max_cqe = nesdev->nesadapter->max_cqe;
442 props->max_mr = nesibdev->max_mr; 443 props->max_mr = nesibdev->max_mr;
@@ -754,26 +755,6 @@ static int nes_dealloc_pd(struct ib_pd *ibpd)
754 755
755 756
756/** 757/**
757 * nes_create_ah
758 */
759static struct ib_ah *nes_create_ah(struct ib_pd *pd,
760 struct rdma_ah_attr *ah_attr,
761 struct ib_udata *udata)
762{
763 return ERR_PTR(-ENOSYS);
764}
765
766
767/**
768 * nes_destroy_ah
769 */
770static int nes_destroy_ah(struct ib_ah *ah)
771{
772 return -ENOSYS;
773}
774
775
776/**
777 * nes_get_encoded_size 758 * nes_get_encoded_size
778 */ 759 */
779static inline u8 nes_get_encoded_size(int *size) 760static inline u8 nes_get_encoded_size(int *size)
@@ -3004,42 +2985,9 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
3004 return err; 2985 return err;
3005} 2986}
3006 2987
3007
3008/**
3009 * nes_muticast_attach
3010 */
3011static int nes_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
3012{
3013 nes_debug(NES_DBG_INIT, "\n");
3014 return -ENOSYS;
3015}
3016
3017
3018/**
3019 * nes_multicast_detach
3020 */
3021static int nes_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
3022{
3023 nes_debug(NES_DBG_INIT, "\n");
3024 return -ENOSYS;
3025}
3026
3027
3028/**
3029 * nes_process_mad
3030 */
3031static int nes_process_mad(struct ib_device *ibdev, int mad_flags,
3032 u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
3033 const struct ib_mad_hdr *in, size_t in_mad_size,
3034 struct ib_mad_hdr *out, size_t *out_mad_size,
3035 u16 *out_mad_pkey_index)
3036{
3037 nes_debug(NES_DBG_INIT, "\n");
3038 return -ENOSYS;
3039}
3040
3041static inline void 2988static inline void
3042fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, struct ib_send_wr *ib_wr, u32 uselkey) 2989fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, const struct ib_send_wr *ib_wr,
2990 u32 uselkey)
3043{ 2991{
3044 int sge_index; 2992 int sge_index;
3045 int total_payload_length = 0; 2993 int total_payload_length = 0;
@@ -3065,8 +3013,8 @@ fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, struct ib_send_wr *ib_wr, u32 uselke
3065/** 3013/**
3066 * nes_post_send 3014 * nes_post_send
3067 */ 3015 */
3068static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, 3016static int nes_post_send(struct ib_qp *ibqp, const struct ib_send_wr *ib_wr,
3069 struct ib_send_wr **bad_wr) 3017 const struct ib_send_wr **bad_wr)
3070{ 3018{
3071 u64 u64temp; 3019 u64 u64temp;
3072 unsigned long flags = 0; 3020 unsigned long flags = 0;
@@ -3327,8 +3275,8 @@ out:
3327/** 3275/**
3328 * nes_post_recv 3276 * nes_post_recv
3329 */ 3277 */
3330static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, 3278static int nes_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *ib_wr,
3331 struct ib_recv_wr **bad_wr) 3279 const struct ib_recv_wr **bad_wr)
3332{ 3280{
3333 u64 u64temp; 3281 u64 u64temp;
3334 unsigned long flags = 0; 3282 unsigned long flags = 0;
@@ -3735,8 +3683,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
3735 nesibdev->ibdev.mmap = nes_mmap; 3683 nesibdev->ibdev.mmap = nes_mmap;
3736 nesibdev->ibdev.alloc_pd = nes_alloc_pd; 3684 nesibdev->ibdev.alloc_pd = nes_alloc_pd;
3737 nesibdev->ibdev.dealloc_pd = nes_dealloc_pd; 3685 nesibdev->ibdev.dealloc_pd = nes_dealloc_pd;
3738 nesibdev->ibdev.create_ah = nes_create_ah;
3739 nesibdev->ibdev.destroy_ah = nes_destroy_ah;
3740 nesibdev->ibdev.create_qp = nes_create_qp; 3686 nesibdev->ibdev.create_qp = nes_create_qp;
3741 nesibdev->ibdev.modify_qp = nes_modify_qp; 3687 nesibdev->ibdev.modify_qp = nes_modify_qp;
3742 nesibdev->ibdev.query_qp = nes_query_qp; 3688 nesibdev->ibdev.query_qp = nes_query_qp;
@@ -3753,10 +3699,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
3753 nesibdev->ibdev.alloc_mr = nes_alloc_mr; 3699 nesibdev->ibdev.alloc_mr = nes_alloc_mr;
3754 nesibdev->ibdev.map_mr_sg = nes_map_mr_sg; 3700 nesibdev->ibdev.map_mr_sg = nes_map_mr_sg;
3755 3701
3756 nesibdev->ibdev.attach_mcast = nes_multicast_attach;
3757 nesibdev->ibdev.detach_mcast = nes_multicast_detach;
3758 nesibdev->ibdev.process_mad = nes_process_mad;
3759
3760 nesibdev->ibdev.req_notify_cq = nes_req_notify_cq; 3702 nesibdev->ibdev.req_notify_cq = nes_req_notify_cq;
3761 nesibdev->ibdev.post_send = nes_post_send; 3703 nesibdev->ibdev.post_send = nes_post_send;
3762 nesibdev->ibdev.post_recv = nes_post_recv; 3704 nesibdev->ibdev.post_recv = nes_post_recv;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 3897b64532e1..58188fe5aed2 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -71,7 +71,7 @@ static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type)
71} 71}
72 72
73static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, 73static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
74 struct rdma_ah_attr *attr, union ib_gid *sgid, 74 struct rdma_ah_attr *attr, const union ib_gid *sgid,
75 int pdid, bool *isvlan, u16 vlan_tag) 75 int pdid, bool *isvlan, u16 vlan_tag)
76{ 76{
77 int status; 77 int status;
@@ -164,17 +164,14 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
164 struct ocrdma_ah *ah; 164 struct ocrdma_ah *ah;
165 bool isvlan = false; 165 bool isvlan = false;
166 u16 vlan_tag = 0xffff; 166 u16 vlan_tag = 0xffff;
167 struct ib_gid_attr sgid_attr; 167 const struct ib_gid_attr *sgid_attr;
168 struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); 168 struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
169 struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); 169 struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
170 const struct ib_global_route *grh;
171 union ib_gid sgid;
172 170
173 if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) || 171 if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) ||
174 !(rdma_ah_get_ah_flags(attr) & IB_AH_GRH)) 172 !(rdma_ah_get_ah_flags(attr) & IB_AH_GRH))
175 return ERR_PTR(-EINVAL); 173 return ERR_PTR(-EINVAL);
176 174
177 grh = rdma_ah_read_grh(attr);
178 if (atomic_cmpxchg(&dev->update_sl, 1, 0)) 175 if (atomic_cmpxchg(&dev->update_sl, 1, 0))
179 ocrdma_init_service_level(dev); 176 ocrdma_init_service_level(dev);
180 177
@@ -186,20 +183,15 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
186 if (status) 183 if (status)
187 goto av_err; 184 goto av_err;
188 185
189 status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index, &sgid, 186 sgid_attr = attr->grh.sgid_attr;
190 &sgid_attr); 187 if (is_vlan_dev(sgid_attr->ndev))
191 if (status) { 188 vlan_tag = vlan_dev_vlan_id(sgid_attr->ndev);
192 pr_err("%s(): Failed to query sgid, status = %d\n", 189
193 __func__, status);
194 goto av_conf_err;
195 }
196 if (is_vlan_dev(sgid_attr.ndev))
197 vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev);
198 dev_put(sgid_attr.ndev);
199 /* Get network header type for this GID */ 190 /* Get network header type for this GID */
200 ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); 191 ah->hdr_type = rdma_gid_attr_network_type(sgid_attr);
201 192
202 status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag); 193 status = set_av_attr(dev, ah, attr, &sgid_attr->gid, pd->id,
194 &isvlan, vlan_tag);
203 if (status) 195 if (status)
204 goto av_conf_err; 196 goto av_conf_err;
205 197
@@ -262,12 +254,6 @@ int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
262 return 0; 254 return 0;
263} 255}
264 256
265int ocrdma_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
266{
267 /* modify_ah is unsupported */
268 return -ENOSYS;
269}
270
271int ocrdma_process_mad(struct ib_device *ibdev, 257int ocrdma_process_mad(struct ib_device *ibdev,
272 int process_mad_flags, 258 int process_mad_flags,
273 u8 port_num, 259 u8 port_num,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
index 1a65c47945aa..c0c32c9b80ae 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
@@ -55,7 +55,6 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
55 struct ib_udata *udata); 55 struct ib_udata *udata);
56int ocrdma_destroy_ah(struct ib_ah *ah); 56int ocrdma_destroy_ah(struct ib_ah *ah);
57int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); 57int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
58int ocrdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
59 58
60int ocrdma_process_mad(struct ib_device *, 59int ocrdma_process_mad(struct ib_device *,
61 int process_mad_flags, 60 int process_mad_flags,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 6c136e5017fe..e578281471af 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -1365,8 +1365,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
1365 dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv & 1365 dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv &
1366 OCRDMA_HBA_ATTRB_PTNUM_MASK) 1366 OCRDMA_HBA_ATTRB_PTNUM_MASK)
1367 >> OCRDMA_HBA_ATTRB_PTNUM_SHIFT; 1367 >> OCRDMA_HBA_ATTRB_PTNUM_SHIFT;
1368 strncpy(dev->model_number, 1368 strlcpy(dev->model_number,
1369 hba_attribs->controller_model_number, 31); 1369 hba_attribs->controller_model_number,
1370 sizeof(dev->model_number));
1370 } 1371 }
1371 dma_free_coherent(&dev->nic_info.pdev->dev, dma.size, dma.va, dma.pa); 1372 dma_free_coherent(&dev->nic_info.pdev->dev, dma.size, dma.va, dma.pa);
1372free_mqe: 1373free_mqe:
@@ -2494,8 +2495,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
2494{ 2495{
2495 int status; 2496 int status;
2496 struct rdma_ah_attr *ah_attr = &attrs->ah_attr; 2497 struct rdma_ah_attr *ah_attr = &attrs->ah_attr;
2497 union ib_gid sgid; 2498 const struct ib_gid_attr *sgid_attr;
2498 struct ib_gid_attr sgid_attr;
2499 u32 vlan_id = 0xFFFF; 2499 u32 vlan_id = 0xFFFF;
2500 u8 mac_addr[6], hdr_type; 2500 u8 mac_addr[6], hdr_type;
2501 union { 2501 union {
@@ -2525,25 +2525,23 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
2525 memcpy(&cmd->params.dgid[0], &grh->dgid.raw[0], 2525 memcpy(&cmd->params.dgid[0], &grh->dgid.raw[0],
2526 sizeof(cmd->params.dgid)); 2526 sizeof(cmd->params.dgid));
2527 2527
2528 status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index, 2528 sgid_attr = ah_attr->grh.sgid_attr;
2529 &sgid, &sgid_attr); 2529 vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev);
2530 if (!status) { 2530 memcpy(mac_addr, sgid_attr->ndev->dev_addr, ETH_ALEN);
2531 vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
2532 memcpy(mac_addr, sgid_attr.ndev->dev_addr, ETH_ALEN);
2533 dev_put(sgid_attr.ndev);
2534 }
2535 2531
2536 qp->sgid_idx = grh->sgid_index; 2532 qp->sgid_idx = grh->sgid_index;
2537 memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid)); 2533 memcpy(&cmd->params.sgid[0], &sgid_attr->gid.raw[0],
2534 sizeof(cmd->params.sgid));
2538 status = ocrdma_resolve_dmac(dev, ah_attr, &mac_addr[0]); 2535 status = ocrdma_resolve_dmac(dev, ah_attr, &mac_addr[0]);
2539 if (status) 2536 if (status)
2540 return status; 2537 return status;
2538
2541 cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) | 2539 cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) |
2542 (mac_addr[2] << 16) | (mac_addr[3] << 24); 2540 (mac_addr[2] << 16) | (mac_addr[3] << 24);
2543 2541
2544 hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid); 2542 hdr_type = rdma_gid_attr_network_type(sgid_attr);
2545 if (hdr_type == RDMA_NETWORK_IPV4) { 2543 if (hdr_type == RDMA_NETWORK_IPV4) {
2546 rdma_gid2ip(&sgid_addr._sockaddr, &sgid); 2544 rdma_gid2ip(&sgid_addr._sockaddr, &sgid_attr->gid);
2547 rdma_gid2ip(&dgid_addr._sockaddr, &grh->dgid); 2545 rdma_gid2ip(&dgid_addr._sockaddr, &grh->dgid);
2548 memcpy(&cmd->params.dgid[0], 2546 memcpy(&cmd->params.dgid[0],
2549 &dgid_addr._sockaddr_in.sin_addr.s_addr, 4); 2547 &dgid_addr._sockaddr_in.sin_addr.s_addr, 4);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 5962c0ed9847..7832ee3e0c84 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -176,7 +176,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
176 dev->ibdev.create_ah = ocrdma_create_ah; 176 dev->ibdev.create_ah = ocrdma_create_ah;
177 dev->ibdev.destroy_ah = ocrdma_destroy_ah; 177 dev->ibdev.destroy_ah = ocrdma_destroy_ah;
178 dev->ibdev.query_ah = ocrdma_query_ah; 178 dev->ibdev.query_ah = ocrdma_query_ah;
179 dev->ibdev.modify_ah = ocrdma_modify_ah;
180 179
181 dev->ibdev.poll_cq = ocrdma_poll_cq; 180 dev->ibdev.poll_cq = ocrdma_poll_cq;
182 dev->ibdev.post_send = ocrdma_post_send; 181 dev->ibdev.post_send = ocrdma_post_send;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 82e20fc32890..c158ca9fde6d 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -89,7 +89,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
89 IB_DEVICE_SYS_IMAGE_GUID | 89 IB_DEVICE_SYS_IMAGE_GUID |
90 IB_DEVICE_LOCAL_DMA_LKEY | 90 IB_DEVICE_LOCAL_DMA_LKEY |
91 IB_DEVICE_MEM_MGT_EXTENSIONS; 91 IB_DEVICE_MEM_MGT_EXTENSIONS;
92 attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge); 92 attr->max_send_sge = dev->attr.max_send_sge;
93 attr->max_recv_sge = dev->attr.max_recv_sge;
93 attr->max_sge_rd = dev->attr.max_rdma_sge; 94 attr->max_sge_rd = dev->attr.max_rdma_sge;
94 attr->max_cq = dev->attr.max_cq; 95 attr->max_cq = dev->attr.max_cq;
95 attr->max_cqe = dev->attr.max_cqe; 96 attr->max_cqe = dev->attr.max_cqe;
@@ -196,11 +197,10 @@ int ocrdma_query_port(struct ib_device *ibdev,
196 props->sm_lid = 0; 197 props->sm_lid = 0;
197 props->sm_sl = 0; 198 props->sm_sl = 0;
198 props->state = port_state; 199 props->state = port_state;
199 props->port_cap_flags = 200 props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
200 IB_PORT_CM_SUP | 201 IB_PORT_DEVICE_MGMT_SUP |
201 IB_PORT_REINIT_SUP | 202 IB_PORT_VENDOR_CLASS_SUP;
202 IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP | 203 props->ip_gids = true;
203 IB_PORT_IP_BASED_GIDS;
204 props->gid_tbl_len = OCRDMA_MAX_SGID; 204 props->gid_tbl_len = OCRDMA_MAX_SGID;
205 props->pkey_tbl_len = 1; 205 props->pkey_tbl_len = 1;
206 props->bad_pkey_cntr = 0; 206 props->bad_pkey_cntr = 0;
@@ -1774,13 +1774,13 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp)
1774 * protect against proessing in-flight CQEs for this QP. 1774 * protect against proessing in-flight CQEs for this QP.
1775 */ 1775 */
1776 spin_lock_irqsave(&qp->sq_cq->cq_lock, flags); 1776 spin_lock_irqsave(&qp->sq_cq->cq_lock, flags);
1777 if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) 1777 if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) {
1778 spin_lock(&qp->rq_cq->cq_lock); 1778 spin_lock(&qp->rq_cq->cq_lock);
1779 1779 ocrdma_del_qpn_map(dev, qp);
1780 ocrdma_del_qpn_map(dev, qp);
1781
1782 if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
1783 spin_unlock(&qp->rq_cq->cq_lock); 1780 spin_unlock(&qp->rq_cq->cq_lock);
1781 } else {
1782 ocrdma_del_qpn_map(dev, qp);
1783 }
1784 spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags); 1784 spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags);
1785 1785
1786 if (!pd->uctx) { 1786 if (!pd->uctx) {
@@ -1953,7 +1953,7 @@ int ocrdma_destroy_srq(struct ib_srq *ibsrq)
1953/* unprivileged verbs and their support functions. */ 1953/* unprivileged verbs and their support functions. */
1954static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp, 1954static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
1955 struct ocrdma_hdr_wqe *hdr, 1955 struct ocrdma_hdr_wqe *hdr,
1956 struct ib_send_wr *wr) 1956 const struct ib_send_wr *wr)
1957{ 1957{
1958 struct ocrdma_ewqe_ud_hdr *ud_hdr = 1958 struct ocrdma_ewqe_ud_hdr *ud_hdr =
1959 (struct ocrdma_ewqe_ud_hdr *)(hdr + 1); 1959 (struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
@@ -2000,7 +2000,7 @@ static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge)
2000static int ocrdma_build_inline_sges(struct ocrdma_qp *qp, 2000static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
2001 struct ocrdma_hdr_wqe *hdr, 2001 struct ocrdma_hdr_wqe *hdr,
2002 struct ocrdma_sge *sge, 2002 struct ocrdma_sge *sge,
2003 struct ib_send_wr *wr, u32 wqe_size) 2003 const struct ib_send_wr *wr, u32 wqe_size)
2004{ 2004{
2005 int i; 2005 int i;
2006 char *dpp_addr; 2006 char *dpp_addr;
@@ -2038,7 +2038,7 @@ static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
2038} 2038}
2039 2039
2040static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, 2040static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
2041 struct ib_send_wr *wr) 2041 const struct ib_send_wr *wr)
2042{ 2042{
2043 int status; 2043 int status;
2044 struct ocrdma_sge *sge; 2044 struct ocrdma_sge *sge;
@@ -2057,7 +2057,7 @@ static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
2057} 2057}
2058 2058
2059static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, 2059static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
2060 struct ib_send_wr *wr) 2060 const struct ib_send_wr *wr)
2061{ 2061{
2062 int status; 2062 int status;
2063 struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1); 2063 struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
@@ -2075,7 +2075,7 @@ static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
2075} 2075}
2076 2076
2077static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, 2077static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
2078 struct ib_send_wr *wr) 2078 const struct ib_send_wr *wr)
2079{ 2079{
2080 struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1); 2080 struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
2081 struct ocrdma_sge *sge = ext_rw + 1; 2081 struct ocrdma_sge *sge = ext_rw + 1;
@@ -2105,7 +2105,7 @@ static int get_encoded_page_size(int pg_sz)
2105 2105
2106static int ocrdma_build_reg(struct ocrdma_qp *qp, 2106static int ocrdma_build_reg(struct ocrdma_qp *qp,
2107 struct ocrdma_hdr_wqe *hdr, 2107 struct ocrdma_hdr_wqe *hdr,
2108 struct ib_reg_wr *wr) 2108 const struct ib_reg_wr *wr)
2109{ 2109{
2110 u64 fbo; 2110 u64 fbo;
2111 struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1); 2111 struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1);
@@ -2166,8 +2166,8 @@ static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
2166 iowrite32(val, qp->sq_db); 2166 iowrite32(val, qp->sq_db);
2167} 2167}
2168 2168
2169int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 2169int ocrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
2170 struct ib_send_wr **bad_wr) 2170 const struct ib_send_wr **bad_wr)
2171{ 2171{
2172 int status = 0; 2172 int status = 0;
2173 struct ocrdma_qp *qp = get_ocrdma_qp(ibqp); 2173 struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
@@ -2278,8 +2278,8 @@ static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
2278 iowrite32(val, qp->rq_db); 2278 iowrite32(val, qp->rq_db);
2279} 2279}
2280 2280
2281static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr, 2281static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe,
2282 u16 tag) 2282 const struct ib_recv_wr *wr, u16 tag)
2283{ 2283{
2284 u32 wqe_size = 0; 2284 u32 wqe_size = 0;
2285 struct ocrdma_sge *sge; 2285 struct ocrdma_sge *sge;
@@ -2299,8 +2299,8 @@ static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
2299 ocrdma_cpu_to_le32(rqe, wqe_size); 2299 ocrdma_cpu_to_le32(rqe, wqe_size);
2300} 2300}
2301 2301
2302int ocrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 2302int ocrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
2303 struct ib_recv_wr **bad_wr) 2303 const struct ib_recv_wr **bad_wr)
2304{ 2304{
2305 int status = 0; 2305 int status = 0;
2306 unsigned long flags; 2306 unsigned long flags;
@@ -2369,8 +2369,8 @@ static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
2369 iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET); 2369 iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET);
2370} 2370}
2371 2371
2372int ocrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, 2372int ocrdma_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
2373 struct ib_recv_wr **bad_wr) 2373 const struct ib_recv_wr **bad_wr)
2374{ 2374{
2375 int status = 0; 2375 int status = 0;
2376 unsigned long flags; 2376 unsigned long flags;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index 9a9971708646..b69cfdce7970 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -43,10 +43,10 @@
43#ifndef __OCRDMA_VERBS_H__ 43#ifndef __OCRDMA_VERBS_H__
44#define __OCRDMA_VERBS_H__ 44#define __OCRDMA_VERBS_H__
45 45
46int ocrdma_post_send(struct ib_qp *, struct ib_send_wr *, 46int ocrdma_post_send(struct ib_qp *, const struct ib_send_wr *,
47 struct ib_send_wr **bad_wr); 47 const struct ib_send_wr **bad_wr);
48int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *, 48int ocrdma_post_recv(struct ib_qp *, const struct ib_recv_wr *,
49 struct ib_recv_wr **bad_wr); 49 const struct ib_recv_wr **bad_wr);
50 50
51int ocrdma_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); 51int ocrdma_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc);
52int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags); 52int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags);
@@ -100,8 +100,8 @@ int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *,
100 enum ib_srq_attr_mask, struct ib_udata *); 100 enum ib_srq_attr_mask, struct ib_udata *);
101int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *); 101int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *);
102int ocrdma_destroy_srq(struct ib_srq *); 102int ocrdma_destroy_srq(struct ib_srq *);
103int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *, 103int ocrdma_post_srq_recv(struct ib_srq *, const struct ib_recv_wr *,
104 struct ib_recv_wr **bad_recv_wr); 104 const struct ib_recv_wr **bad_recv_wr);
105 105
106int ocrdma_dereg_mr(struct ib_mr *); 106int ocrdma_dereg_mr(struct ib_mr *);
107struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc); 107struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc);
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index ad22b32bbd9c..a0af6d424aed 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -191,6 +191,11 @@ static int qedr_register_device(struct qedr_dev *dev)
191 QEDR_UVERBS(MODIFY_QP) | 191 QEDR_UVERBS(MODIFY_QP) |
192 QEDR_UVERBS(QUERY_QP) | 192 QEDR_UVERBS(QUERY_QP) |
193 QEDR_UVERBS(DESTROY_QP) | 193 QEDR_UVERBS(DESTROY_QP) |
194 QEDR_UVERBS(CREATE_SRQ) |
195 QEDR_UVERBS(DESTROY_SRQ) |
196 QEDR_UVERBS(QUERY_SRQ) |
197 QEDR_UVERBS(MODIFY_SRQ) |
198 QEDR_UVERBS(POST_SRQ_RECV) |
194 QEDR_UVERBS(REG_MR) | 199 QEDR_UVERBS(REG_MR) |
195 QEDR_UVERBS(DEREG_MR) | 200 QEDR_UVERBS(DEREG_MR) |
196 QEDR_UVERBS(POLL_CQ) | 201 QEDR_UVERBS(POLL_CQ) |
@@ -229,6 +234,11 @@ static int qedr_register_device(struct qedr_dev *dev)
229 dev->ibdev.query_qp = qedr_query_qp; 234 dev->ibdev.query_qp = qedr_query_qp;
230 dev->ibdev.destroy_qp = qedr_destroy_qp; 235 dev->ibdev.destroy_qp = qedr_destroy_qp;
231 236
237 dev->ibdev.create_srq = qedr_create_srq;
238 dev->ibdev.destroy_srq = qedr_destroy_srq;
239 dev->ibdev.modify_srq = qedr_modify_srq;
240 dev->ibdev.query_srq = qedr_query_srq;
241 dev->ibdev.post_srq_recv = qedr_post_srq_recv;
232 dev->ibdev.query_pkey = qedr_query_pkey; 242 dev->ibdev.query_pkey = qedr_query_pkey;
233 243
234 dev->ibdev.create_ah = qedr_create_ah; 244 dev->ibdev.create_ah = qedr_create_ah;
@@ -325,8 +335,8 @@ static int qedr_alloc_resources(struct qedr_dev *dev)
325 spin_lock_init(&dev->sgid_lock); 335 spin_lock_init(&dev->sgid_lock);
326 336
327 if (IS_IWARP(dev)) { 337 if (IS_IWARP(dev)) {
328 spin_lock_init(&dev->idr_lock); 338 spin_lock_init(&dev->qpidr.idr_lock);
329 idr_init(&dev->qpidr); 339 idr_init(&dev->qpidr.idr);
330 dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq"); 340 dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq");
331 } 341 }
332 342
@@ -653,42 +663,70 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle)
653#define EVENT_TYPE_NOT_DEFINED 0 663#define EVENT_TYPE_NOT_DEFINED 0
654#define EVENT_TYPE_CQ 1 664#define EVENT_TYPE_CQ 1
655#define EVENT_TYPE_QP 2 665#define EVENT_TYPE_QP 2
666#define EVENT_TYPE_SRQ 3
656 struct qedr_dev *dev = (struct qedr_dev *)context; 667 struct qedr_dev *dev = (struct qedr_dev *)context;
657 struct regpair *async_handle = (struct regpair *)fw_handle; 668 struct regpair *async_handle = (struct regpair *)fw_handle;
658 u64 roce_handle64 = ((u64) async_handle->hi << 32) + async_handle->lo; 669 u64 roce_handle64 = ((u64) async_handle->hi << 32) + async_handle->lo;
659 u8 event_type = EVENT_TYPE_NOT_DEFINED; 670 u8 event_type = EVENT_TYPE_NOT_DEFINED;
660 struct ib_event event; 671 struct ib_event event;
672 struct ib_srq *ibsrq;
673 struct qedr_srq *srq;
674 unsigned long flags;
661 struct ib_cq *ibcq; 675 struct ib_cq *ibcq;
662 struct ib_qp *ibqp; 676 struct ib_qp *ibqp;
663 struct qedr_cq *cq; 677 struct qedr_cq *cq;
664 struct qedr_qp *qp; 678 struct qedr_qp *qp;
679 u16 srq_id;
665 680
666 switch (e_code) { 681 if (IS_ROCE(dev)) {
667 case ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR: 682 switch (e_code) {
668 event.event = IB_EVENT_CQ_ERR; 683 case ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR:
669 event_type = EVENT_TYPE_CQ; 684 event.event = IB_EVENT_CQ_ERR;
670 break; 685 event_type = EVENT_TYPE_CQ;
671 case ROCE_ASYNC_EVENT_SQ_DRAINED: 686 break;
672 event.event = IB_EVENT_SQ_DRAINED; 687 case ROCE_ASYNC_EVENT_SQ_DRAINED:
673 event_type = EVENT_TYPE_QP; 688 event.event = IB_EVENT_SQ_DRAINED;
674 break; 689 event_type = EVENT_TYPE_QP;
675 case ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR: 690 break;
676 event.event = IB_EVENT_QP_FATAL; 691 case ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR:
677 event_type = EVENT_TYPE_QP; 692 event.event = IB_EVENT_QP_FATAL;
678 break; 693 event_type = EVENT_TYPE_QP;
679 case ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR: 694 break;
680 event.event = IB_EVENT_QP_REQ_ERR; 695 case ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR:
681 event_type = EVENT_TYPE_QP; 696 event.event = IB_EVENT_QP_REQ_ERR;
682 break; 697 event_type = EVENT_TYPE_QP;
683 case ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR: 698 break;
684 event.event = IB_EVENT_QP_ACCESS_ERR; 699 case ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR:
685 event_type = EVENT_TYPE_QP; 700 event.event = IB_EVENT_QP_ACCESS_ERR;
686 break; 701 event_type = EVENT_TYPE_QP;
687 default: 702 break;
703 case ROCE_ASYNC_EVENT_SRQ_LIMIT:
704 event.event = IB_EVENT_SRQ_LIMIT_REACHED;
705 event_type = EVENT_TYPE_SRQ;
706 break;
707 case ROCE_ASYNC_EVENT_SRQ_EMPTY:
708 event.event = IB_EVENT_SRQ_ERR;
709 event_type = EVENT_TYPE_SRQ;
710 break;
711 default:
712 DP_ERR(dev, "unsupported event %d on handle=%llx\n",
713 e_code, roce_handle64);
714 }
715 } else {
716 switch (e_code) {
717 case QED_IWARP_EVENT_SRQ_LIMIT:
718 event.event = IB_EVENT_SRQ_LIMIT_REACHED;
719 event_type = EVENT_TYPE_SRQ;
720 break;
721 case QED_IWARP_EVENT_SRQ_EMPTY:
722 event.event = IB_EVENT_SRQ_ERR;
723 event_type = EVENT_TYPE_SRQ;
724 break;
725 default:
688 DP_ERR(dev, "unsupported event %d on handle=%llx\n", e_code, 726 DP_ERR(dev, "unsupported event %d on handle=%llx\n", e_code,
689 roce_handle64); 727 roce_handle64);
728 }
690 } 729 }
691
692 switch (event_type) { 730 switch (event_type) {
693 case EVENT_TYPE_CQ: 731 case EVENT_TYPE_CQ:
694 cq = (struct qedr_cq *)(uintptr_t)roce_handle64; 732 cq = (struct qedr_cq *)(uintptr_t)roce_handle64;
@@ -722,6 +760,25 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle)
722 } 760 }
723 DP_ERR(dev, "QP event %d on handle %p\n", e_code, qp); 761 DP_ERR(dev, "QP event %d on handle %p\n", e_code, qp);
724 break; 762 break;
763 case EVENT_TYPE_SRQ:
764 srq_id = (u16)roce_handle64;
765 spin_lock_irqsave(&dev->srqidr.idr_lock, flags);
766 srq = idr_find(&dev->srqidr.idr, srq_id);
767 if (srq) {
768 ibsrq = &srq->ibsrq;
769 if (ibsrq->event_handler) {
770 event.device = ibsrq->device;
771 event.element.srq = ibsrq;
772 ibsrq->event_handler(&event,
773 ibsrq->srq_context);
774 }
775 } else {
776 DP_NOTICE(dev,
777 "SRQ event with NULL pointer ibsrq. Handle=%llx\n",
778 roce_handle64);
779 }
780 spin_unlock_irqrestore(&dev->srqidr.idr_lock, flags);
781 DP_NOTICE(dev, "SRQ event %d on handle %p\n", e_code, srq);
725 default: 782 default:
726 break; 783 break;
727 } 784 }
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 86d4511e0d75..a2d708dceb8d 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -58,6 +58,7 @@
58#define QEDR_MSG_RQ " RQ" 58#define QEDR_MSG_RQ " RQ"
59#define QEDR_MSG_SQ " SQ" 59#define QEDR_MSG_SQ " SQ"
60#define QEDR_MSG_QP " QP" 60#define QEDR_MSG_QP " QP"
61#define QEDR_MSG_SRQ " SRQ"
61#define QEDR_MSG_GSI " GSI" 62#define QEDR_MSG_GSI " GSI"
62#define QEDR_MSG_IWARP " IW" 63#define QEDR_MSG_IWARP " IW"
63 64
@@ -122,6 +123,11 @@ struct qedr_device_attr {
122 123
123#define QEDR_ENET_STATE_BIT (0) 124#define QEDR_ENET_STATE_BIT (0)
124 125
126struct qedr_idr {
127 spinlock_t idr_lock; /* Protect idr data-structure */
128 struct idr idr;
129};
130
125struct qedr_dev { 131struct qedr_dev {
126 struct ib_device ibdev; 132 struct ib_device ibdev;
127 struct qed_dev *cdev; 133 struct qed_dev *cdev;
@@ -165,8 +171,8 @@ struct qedr_dev {
165 struct qedr_cq *gsi_rqcq; 171 struct qedr_cq *gsi_rqcq;
166 struct qedr_qp *gsi_qp; 172 struct qedr_qp *gsi_qp;
167 enum qed_rdma_type rdma_type; 173 enum qed_rdma_type rdma_type;
168 spinlock_t idr_lock; /* Protect qpidr data-structure */ 174 struct qedr_idr qpidr;
169 struct idr qpidr; 175 struct qedr_idr srqidr;
170 struct workqueue_struct *iwarp_wq; 176 struct workqueue_struct *iwarp_wq;
171 u16 iwarp_max_mtu; 177 u16 iwarp_max_mtu;
172 178
@@ -337,6 +343,34 @@ struct qedr_qp_hwq_info {
337 qed_chain_get_capacity(p_info->pbl) \ 343 qed_chain_get_capacity(p_info->pbl) \
338 } while (0) 344 } while (0)
339 345
346struct qedr_srq_hwq_info {
347 u32 max_sges;
348 u32 max_wr;
349 struct qed_chain pbl;
350 u64 p_phys_addr_tbl;
351 u32 wqe_prod;
352 u32 sge_prod;
353 u32 wr_prod_cnt;
354 u32 wr_cons_cnt;
355 u32 num_elems;
356
357 u32 *virt_prod_pair_addr;
358 dma_addr_t phy_prod_pair_addr;
359};
360
361struct qedr_srq {
362 struct ib_srq ibsrq;
363 struct qedr_dev *dev;
364
365 struct qedr_userq usrq;
366 struct qedr_srq_hwq_info hw_srq;
367 struct ib_umem *prod_umem;
368 u16 srq_id;
369 u32 srq_limit;
370 /* lock to protect srq recv post */
371 spinlock_t lock;
372};
373
340enum qedr_qp_err_bitmap { 374enum qedr_qp_err_bitmap {
341 QEDR_QP_ERR_SQ_FULL = 1, 375 QEDR_QP_ERR_SQ_FULL = 1,
342 QEDR_QP_ERR_RQ_FULL = 2, 376 QEDR_QP_ERR_RQ_FULL = 2,
@@ -538,4 +572,9 @@ static inline struct qedr_mr *get_qedr_mr(struct ib_mr *ibmr)
538{ 572{
539 return container_of(ibmr, struct qedr_mr, ibmr); 573 return container_of(ibmr, struct qedr_mr, ibmr);
540} 574}
575
576static inline struct qedr_srq *get_qedr_srq(struct ib_srq *ibsrq)
577{
578 return container_of(ibsrq, struct qedr_srq, ibsrq);
579}
541#endif 580#endif
diff --git a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
index 7e1f7021396a..228dd7d49622 100644
--- a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
+++ b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
@@ -161,12 +161,23 @@ struct rdma_rq_sge {
161#define RDMA_RQ_SGE_L_KEY_HI_SHIFT 29 161#define RDMA_RQ_SGE_L_KEY_HI_SHIFT 29
162}; 162};
163 163
164struct rdma_srq_wqe_header {
165 struct regpair wr_id;
166 u8 num_sges /* number of SGEs in WQE */;
167 u8 reserved2[7];
168};
169
164struct rdma_srq_sge { 170struct rdma_srq_sge {
165 struct regpair addr; 171 struct regpair addr;
166 __le32 length; 172 __le32 length;
167 __le32 l_key; 173 __le32 l_key;
168}; 174};
169 175
176union rdma_srq_elm {
177 struct rdma_srq_wqe_header header;
178 struct rdma_srq_sge sge;
179};
180
170/* Rdma doorbell data for flags update */ 181/* Rdma doorbell data for flags update */
171struct rdma_pwm_flags_data { 182struct rdma_pwm_flags_data {
172 __le16 icid; /* internal CID */ 183 __le16 icid; /* internal CID */
diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
index 26dc374787f7..505fa3648762 100644
--- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
@@ -491,7 +491,7 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
491 int rc = 0; 491 int rc = 0;
492 int i; 492 int i;
493 493
494 qp = idr_find(&dev->qpidr, conn_param->qpn); 494 qp = idr_find(&dev->qpidr.idr, conn_param->qpn);
495 495
496 laddr = (struct sockaddr_in *)&cm_id->m_local_addr; 496 laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
497 raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; 497 raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
@@ -679,7 +679,7 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
679 679
680 DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn); 680 DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn);
681 681
682 qp = idr_find(&dev->qpidr, conn_param->qpn); 682 qp = idr_find(&dev->qpidr.idr, conn_param->qpn);
683 if (!qp) { 683 if (!qp) {
684 DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn); 684 DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn);
685 return -EINVAL; 685 return -EINVAL;
@@ -737,9 +737,9 @@ void qedr_iw_qp_rem_ref(struct ib_qp *ibqp)
737 struct qedr_qp *qp = get_qedr_qp(ibqp); 737 struct qedr_qp *qp = get_qedr_qp(ibqp);
738 738
739 if (atomic_dec_and_test(&qp->refcnt)) { 739 if (atomic_dec_and_test(&qp->refcnt)) {
740 spin_lock_irq(&qp->dev->idr_lock); 740 spin_lock_irq(&qp->dev->qpidr.idr_lock);
741 idr_remove(&qp->dev->qpidr, qp->qp_id); 741 idr_remove(&qp->dev->qpidr.idr, qp->qp_id);
742 spin_unlock_irq(&qp->dev->idr_lock); 742 spin_unlock_irq(&qp->dev->qpidr.idr_lock);
743 kfree(qp); 743 kfree(qp);
744 } 744 }
745} 745}
@@ -748,5 +748,5 @@ struct ib_qp *qedr_iw_get_qp(struct ib_device *ibdev, int qpn)
748{ 748{
749 struct qedr_dev *dev = get_qedr_dev(ibdev); 749 struct qedr_dev *dev = get_qedr_dev(ibdev);
750 750
751 return idr_find(&dev->qpidr, qpn); 751 return idr_find(&dev->qpidr.idr, qpn);
752} 752}
diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
index 0f14e687bb91..85578887421b 100644
--- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
@@ -380,18 +380,17 @@ int qedr_destroy_gsi_qp(struct qedr_dev *dev)
380#define QEDR_GSI_QPN (1) 380#define QEDR_GSI_QPN (1)
381static inline int qedr_gsi_build_header(struct qedr_dev *dev, 381static inline int qedr_gsi_build_header(struct qedr_dev *dev,
382 struct qedr_qp *qp, 382 struct qedr_qp *qp,
383 struct ib_send_wr *swr, 383 const struct ib_send_wr *swr,
384 struct ib_ud_header *udh, 384 struct ib_ud_header *udh,
385 int *roce_mode) 385 int *roce_mode)
386{ 386{
387 bool has_vlan = false, has_grh_ipv6 = true; 387 bool has_vlan = false, has_grh_ipv6 = true;
388 struct rdma_ah_attr *ah_attr = &get_qedr_ah(ud_wr(swr)->ah)->attr; 388 struct rdma_ah_attr *ah_attr = &get_qedr_ah(ud_wr(swr)->ah)->attr;
389 const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); 389 const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
390 union ib_gid sgid; 390 const struct ib_gid_attr *sgid_attr = grh->sgid_attr;
391 int send_size = 0; 391 int send_size = 0;
392 u16 vlan_id = 0; 392 u16 vlan_id = 0;
393 u16 ether_type; 393 u16 ether_type;
394 struct ib_gid_attr sgid_attr;
395 int rc; 394 int rc;
396 int ip_ver = 0; 395 int ip_ver = 0;
397 396
@@ -402,28 +401,16 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
402 for (i = 0; i < swr->num_sge; ++i) 401 for (i = 0; i < swr->num_sge; ++i)
403 send_size += swr->sg_list[i].length; 402 send_size += swr->sg_list[i].length;
404 403
405 rc = ib_get_cached_gid(qp->ibqp.device, rdma_ah_get_port_num(ah_attr), 404 vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev);
406 grh->sgid_index, &sgid, &sgid_attr);
407 if (rc) {
408 DP_ERR(dev,
409 "gsi post send: failed to get cached GID (port=%d, ix=%d)\n",
410 rdma_ah_get_port_num(ah_attr),
411 grh->sgid_index);
412 return rc;
413 }
414
415 vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
416 if (vlan_id < VLAN_CFI_MASK) 405 if (vlan_id < VLAN_CFI_MASK)
417 has_vlan = true; 406 has_vlan = true;
418 407
419 dev_put(sgid_attr.ndev); 408 has_udp = (sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP);
420
421 has_udp = (sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP);
422 if (!has_udp) { 409 if (!has_udp) {
423 /* RoCE v1 */ 410 /* RoCE v1 */
424 ether_type = ETH_P_IBOE; 411 ether_type = ETH_P_IBOE;
425 *roce_mode = ROCE_V1; 412 *roce_mode = ROCE_V1;
426 } else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) { 413 } else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid_attr->gid)) {
427 /* RoCE v2 IPv4 */ 414 /* RoCE v2 IPv4 */
428 ip_ver = 4; 415 ip_ver = 4;
429 ether_type = ETH_P_IP; 416 ether_type = ETH_P_IP;
@@ -471,7 +458,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
471 udh->grh.flow_label = grh->flow_label; 458 udh->grh.flow_label = grh->flow_label;
472 udh->grh.hop_limit = grh->hop_limit; 459 udh->grh.hop_limit = grh->hop_limit;
473 udh->grh.destination_gid = grh->dgid; 460 udh->grh.destination_gid = grh->dgid;
474 memcpy(&udh->grh.source_gid.raw, &sgid.raw, 461 memcpy(&udh->grh.source_gid.raw, sgid_attr->gid.raw,
475 sizeof(udh->grh.source_gid.raw)); 462 sizeof(udh->grh.source_gid.raw));
476 } else { 463 } else {
477 /* IPv4 header */ 464 /* IPv4 header */
@@ -482,7 +469,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
482 udh->ip4.frag_off = htons(IP_DF); 469 udh->ip4.frag_off = htons(IP_DF);
483 udh->ip4.ttl = grh->hop_limit; 470 udh->ip4.ttl = grh->hop_limit;
484 471
485 ipv4_addr = qedr_get_ipv4_from_gid(sgid.raw); 472 ipv4_addr = qedr_get_ipv4_from_gid(sgid_attr->gid.raw);
486 udh->ip4.saddr = ipv4_addr; 473 udh->ip4.saddr = ipv4_addr;
487 ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw); 474 ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw);
488 udh->ip4.daddr = ipv4_addr; 475 udh->ip4.daddr = ipv4_addr;
@@ -501,7 +488,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
501 488
502static inline int qedr_gsi_build_packet(struct qedr_dev *dev, 489static inline int qedr_gsi_build_packet(struct qedr_dev *dev,
503 struct qedr_qp *qp, 490 struct qedr_qp *qp,
504 struct ib_send_wr *swr, 491 const struct ib_send_wr *swr,
505 struct qed_roce_ll2_packet **p_packet) 492 struct qed_roce_ll2_packet **p_packet)
506{ 493{
507 u8 ud_header_buffer[QEDR_MAX_UD_HEADER_SIZE]; 494 u8 ud_header_buffer[QEDR_MAX_UD_HEADER_SIZE];
@@ -550,8 +537,8 @@ static inline int qedr_gsi_build_packet(struct qedr_dev *dev,
550 return 0; 537 return 0;
551} 538}
552 539
553int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 540int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
554 struct ib_send_wr **bad_wr) 541 const struct ib_send_wr **bad_wr)
555{ 542{
556 struct qed_roce_ll2_packet *pkt = NULL; 543 struct qed_roce_ll2_packet *pkt = NULL;
557 struct qedr_qp *qp = get_qedr_qp(ibqp); 544 struct qedr_qp *qp = get_qedr_qp(ibqp);
@@ -620,8 +607,8 @@ err:
620 return rc; 607 return rc;
621} 608}
622 609
623int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 610int qedr_gsi_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
624 struct ib_recv_wr **bad_wr) 611 const struct ib_recv_wr **bad_wr)
625{ 612{
626 struct qedr_dev *dev = get_qedr_dev(ibqp->device); 613 struct qedr_dev *dev = get_qedr_dev(ibqp->device);
627 struct qedr_qp *qp = get_qedr_qp(ibqp); 614 struct qedr_qp *qp = get_qedr_qp(ibqp);
diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.h b/drivers/infiniband/hw/qedr/qedr_roce_cm.h
index a55916323ea9..d46dcd3f6424 100644
--- a/drivers/infiniband/hw/qedr/qedr_roce_cm.h
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.h
@@ -46,10 +46,10 @@ static inline u32 qedr_get_ipv4_from_gid(const u8 *gid)
46 46
47/* RDMA CM */ 47/* RDMA CM */
48int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); 48int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
49int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 49int qedr_gsi_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
50 struct ib_recv_wr **bad_wr); 50 const struct ib_recv_wr **bad_wr);
51int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 51int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
52 struct ib_send_wr **bad_wr); 52 const struct ib_send_wr **bad_wr);
53struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev, 53struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev,
54 struct ib_qp_init_attr *attrs, 54 struct ib_qp_init_attr *attrs,
55 struct qedr_qp *qp); 55 struct qedr_qp *qp);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index f07b8df96f43..8cc3df24e04e 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -51,6 +51,10 @@
51#include <rdma/qedr-abi.h> 51#include <rdma/qedr-abi.h>
52#include "qedr_roce_cm.h" 52#include "qedr_roce_cm.h"
53 53
54#define QEDR_SRQ_WQE_ELEM_SIZE sizeof(union rdma_srq_elm)
55#define RDMA_MAX_SGE_PER_SRQ (4)
56#define RDMA_MAX_SRQ_WQE_SIZE (RDMA_MAX_SGE_PER_SRQ + 1)
57
54#define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT) 58#define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
55 59
56static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src, 60static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
@@ -84,6 +88,19 @@ int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
84 return 0; 88 return 0;
85} 89}
86 90
91int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
92{
93 struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
94 struct qedr_device_attr *qattr = &dev->attr;
95 struct qedr_srq *srq = get_qedr_srq(ibsrq);
96
97 srq_attr->srq_limit = srq->srq_limit;
98 srq_attr->max_wr = qattr->max_srq_wr;
99 srq_attr->max_sge = qattr->max_sge;
100
101 return 0;
102}
103
87int qedr_query_device(struct ib_device *ibdev, 104int qedr_query_device(struct ib_device *ibdev,
88 struct ib_device_attr *attr, struct ib_udata *udata) 105 struct ib_device_attr *attr, struct ib_udata *udata)
89{ 106{
@@ -112,7 +129,8 @@ int qedr_query_device(struct ib_device *ibdev,
112 IB_DEVICE_RC_RNR_NAK_GEN | 129 IB_DEVICE_RC_RNR_NAK_GEN |
113 IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; 130 IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
114 131
115 attr->max_sge = qattr->max_sge; 132 attr->max_send_sge = qattr->max_sge;
133 attr->max_recv_sge = qattr->max_sge;
116 attr->max_sge_rd = qattr->max_sge; 134 attr->max_sge_rd = qattr->max_sge;
117 attr->max_cq = qattr->max_cq; 135 attr->max_cq = qattr->max_cq;
118 attr->max_cqe = qattr->max_cqe; 136 attr->max_cqe = qattr->max_cqe;
@@ -224,7 +242,7 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
224 attr->lmc = 0; 242 attr->lmc = 0;
225 attr->sm_lid = 0; 243 attr->sm_lid = 0;
226 attr->sm_sl = 0; 244 attr->sm_sl = 0;
227 attr->port_cap_flags = IB_PORT_IP_BASED_GIDS; 245 attr->ip_gids = true;
228 if (rdma_protocol_iwarp(&dev->ibdev, 1)) { 246 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
229 attr->gid_tbl_len = 1; 247 attr->gid_tbl_len = 1;
230 attr->pkey_tbl_len = 1; 248 attr->pkey_tbl_len = 1;
@@ -1075,27 +1093,19 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1075 struct qed_rdma_modify_qp_in_params 1093 struct qed_rdma_modify_qp_in_params
1076 *qp_params) 1094 *qp_params)
1077{ 1095{
1096 const struct ib_gid_attr *gid_attr;
1078 enum rdma_network_type nw_type; 1097 enum rdma_network_type nw_type;
1079 struct ib_gid_attr gid_attr;
1080 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); 1098 const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1081 union ib_gid gid;
1082 u32 ipv4_addr; 1099 u32 ipv4_addr;
1083 int rc = 0;
1084 int i; 1100 int i;
1085 1101
1086 rc = ib_get_cached_gid(ibqp->device, 1102 gid_attr = grh->sgid_attr;
1087 rdma_ah_get_port_num(&attr->ah_attr), 1103 qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr->ndev);
1088 grh->sgid_index, &gid, &gid_attr);
1089 if (rc)
1090 return rc;
1091
1092 qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
1093 1104
1094 dev_put(gid_attr.ndev); 1105 nw_type = rdma_gid_attr_network_type(gid_attr);
1095 nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
1096 switch (nw_type) { 1106 switch (nw_type) {
1097 case RDMA_NETWORK_IPV6: 1107 case RDMA_NETWORK_IPV6:
1098 memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], 1108 memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1099 sizeof(qp_params->sgid)); 1109 sizeof(qp_params->sgid));
1100 memcpy(&qp_params->dgid.bytes[0], 1110 memcpy(&qp_params->dgid.bytes[0],
1101 &grh->dgid, 1111 &grh->dgid,
@@ -1105,7 +1115,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1105 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); 1115 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1106 break; 1116 break;
1107 case RDMA_NETWORK_IB: 1117 case RDMA_NETWORK_IB:
1108 memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], 1118 memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1109 sizeof(qp_params->sgid)); 1119 sizeof(qp_params->sgid));
1110 memcpy(&qp_params->dgid.bytes[0], 1120 memcpy(&qp_params->dgid.bytes[0],
1111 &grh->dgid, 1121 &grh->dgid,
@@ -1115,7 +1125,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1115 case RDMA_NETWORK_IPV4: 1125 case RDMA_NETWORK_IPV4:
1116 memset(&qp_params->sgid, 0, sizeof(qp_params->sgid)); 1126 memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1117 memset(&qp_params->dgid, 0, sizeof(qp_params->dgid)); 1127 memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1118 ipv4_addr = qedr_get_ipv4_from_gid(gid.raw); 1128 ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
1119 qp_params->sgid.ipv4_addr = ipv4_addr; 1129 qp_params->sgid.ipv4_addr = ipv4_addr;
1120 ipv4_addr = 1130 ipv4_addr =
1121 qedr_get_ipv4_from_gid(grh->dgid.raw); 1131 qedr_get_ipv4_from_gid(grh->dgid.raw);
@@ -1189,6 +1199,21 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1189 return 0; 1199 return 0;
1190} 1200}
1191 1201
1202static int qedr_copy_srq_uresp(struct qedr_dev *dev,
1203 struct qedr_srq *srq, struct ib_udata *udata)
1204{
1205 struct qedr_create_srq_uresp uresp = {};
1206 int rc;
1207
1208 uresp.srq_id = srq->srq_id;
1209
1210 rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1211 if (rc)
1212 DP_ERR(dev, "create srq: problem copying data to user space\n");
1213
1214 return rc;
1215}
1216
1192static void qedr_copy_rq_uresp(struct qedr_dev *dev, 1217static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1193 struct qedr_create_qp_uresp *uresp, 1218 struct qedr_create_qp_uresp *uresp,
1194 struct qedr_qp *qp) 1219 struct qedr_qp *qp)
@@ -1255,13 +1280,18 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev,
1255 qp->state = QED_ROCE_QP_STATE_RESET; 1280 qp->state = QED_ROCE_QP_STATE_RESET;
1256 qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; 1281 qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1257 qp->sq_cq = get_qedr_cq(attrs->send_cq); 1282 qp->sq_cq = get_qedr_cq(attrs->send_cq);
1258 qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1259 qp->dev = dev; 1283 qp->dev = dev;
1260 qp->rq.max_sges = attrs->cap.max_recv_sge;
1261 1284
1262 DP_DEBUG(dev, QEDR_MSG_QP, 1285 if (attrs->srq) {
1263 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n", 1286 qp->srq = get_qedr_srq(attrs->srq);
1264 qp->rq.max_sges, qp->rq_cq->icid); 1287 } else {
1288 qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1289 qp->rq.max_sges = attrs->cap.max_recv_sge;
1290 DP_DEBUG(dev, QEDR_MSG_QP,
1291 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1292 qp->rq.max_sges, qp->rq_cq->icid);
1293 }
1294
1265 DP_DEBUG(dev, QEDR_MSG_QP, 1295 DP_DEBUG(dev, QEDR_MSG_QP,
1266 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n", 1296 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1267 pd->pd_id, qp->qp_type, qp->max_inline_data, 1297 pd->pd_id, qp->qp_type, qp->max_inline_data,
@@ -1276,9 +1306,303 @@ static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1276 qp->sq.db = dev->db_addr + 1306 qp->sq.db = dev->db_addr +
1277 DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); 1307 DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1278 qp->sq.db_data.data.icid = qp->icid + 1; 1308 qp->sq.db_data.data.icid = qp->icid + 1;
1279 qp->rq.db = dev->db_addr + 1309 if (!qp->srq) {
1280 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); 1310 qp->rq.db = dev->db_addr +
1281 qp->rq.db_data.data.icid = qp->icid; 1311 DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1312 qp->rq.db_data.data.icid = qp->icid;
1313 }
1314}
1315
1316static int qedr_check_srq_params(struct ib_pd *ibpd, struct qedr_dev *dev,
1317 struct ib_srq_init_attr *attrs,
1318 struct ib_udata *udata)
1319{
1320 struct qedr_device_attr *qattr = &dev->attr;
1321
1322 if (attrs->attr.max_wr > qattr->max_srq_wr) {
1323 DP_ERR(dev,
1324 "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
1325 attrs->attr.max_wr, qattr->max_srq_wr);
1326 return -EINVAL;
1327 }
1328
1329 if (attrs->attr.max_sge > qattr->max_sge) {
1330 DP_ERR(dev,
1331 "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
1332 attrs->attr.max_sge, qattr->max_sge);
1333 return -EINVAL;
1334 }
1335
1336 return 0;
1337}
1338
1339static void qedr_free_srq_user_params(struct qedr_srq *srq)
1340{
1341 qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1342 ib_umem_release(srq->usrq.umem);
1343 ib_umem_release(srq->prod_umem);
1344}
1345
1346static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
1347{
1348 struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1349 struct qedr_dev *dev = srq->dev;
1350
1351 dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
1352
1353 dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1354 hw_srq->virt_prod_pair_addr,
1355 hw_srq->phy_prod_pair_addr);
1356}
1357
1358static int qedr_init_srq_user_params(struct ib_ucontext *ib_ctx,
1359 struct qedr_srq *srq,
1360 struct qedr_create_srq_ureq *ureq,
1361 int access, int dmasync)
1362{
1363 struct scatterlist *sg;
1364 int rc;
1365
1366 rc = qedr_init_user_queue(ib_ctx, srq->dev, &srq->usrq, ureq->srq_addr,
1367 ureq->srq_len, access, dmasync, 1);
1368 if (rc)
1369 return rc;
1370
1371 srq->prod_umem = ib_umem_get(ib_ctx, ureq->prod_pair_addr,
1372 sizeof(struct rdma_srq_producers),
1373 access, dmasync);
1374 if (IS_ERR(srq->prod_umem)) {
1375 qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1376 ib_umem_release(srq->usrq.umem);
1377 DP_ERR(srq->dev,
1378 "create srq: failed ib_umem_get for producer, got %ld\n",
1379 PTR_ERR(srq->prod_umem));
1380 return PTR_ERR(srq->prod_umem);
1381 }
1382
1383 sg = srq->prod_umem->sg_head.sgl;
1384 srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
1385
1386 return 0;
1387}
1388
1389static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
1390 struct qedr_dev *dev,
1391 struct ib_srq_init_attr *init_attr)
1392{
1393 struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1394 dma_addr_t phy_prod_pair_addr;
1395 u32 num_elems;
1396 void *va;
1397 int rc;
1398
1399 va = dma_alloc_coherent(&dev->pdev->dev,
1400 sizeof(struct rdma_srq_producers),
1401 &phy_prod_pair_addr, GFP_KERNEL);
1402 if (!va) {
1403 DP_ERR(dev,
1404 "create srq: failed to allocate dma memory for producer\n");
1405 return -ENOMEM;
1406 }
1407
1408 hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
1409 hw_srq->virt_prod_pair_addr = va;
1410
1411 num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
1412 rc = dev->ops->common->chain_alloc(dev->cdev,
1413 QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1414 QED_CHAIN_MODE_PBL,
1415 QED_CHAIN_CNT_TYPE_U32,
1416 num_elems,
1417 QEDR_SRQ_WQE_ELEM_SIZE,
1418 &hw_srq->pbl, NULL);
1419 if (rc)
1420 goto err0;
1421
1422 hw_srq->num_elems = num_elems;
1423
1424 return 0;
1425
1426err0:
1427 dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1428 va, phy_prod_pair_addr);
1429 return rc;
1430}
1431
1432static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr,
1433 void *ptr, u32 id);
1434static void qedr_idr_remove(struct qedr_dev *dev,
1435 struct qedr_idr *qidr, u32 id);
1436
1437struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
1438 struct ib_srq_init_attr *init_attr,
1439 struct ib_udata *udata)
1440{
1441 struct qed_rdma_destroy_srq_in_params destroy_in_params;
1442 struct qed_rdma_create_srq_in_params in_params = {};
1443 struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1444 struct qed_rdma_create_srq_out_params out_params;
1445 struct qedr_pd *pd = get_qedr_pd(ibpd);
1446 struct qedr_create_srq_ureq ureq = {};
1447 u64 pbl_base_addr, phy_prod_pair_addr;
1448 struct ib_ucontext *ib_ctx = NULL;
1449 struct qedr_srq_hwq_info *hw_srq;
1450 struct qedr_ucontext *ctx = NULL;
1451 u32 page_cnt, page_size;
1452 struct qedr_srq *srq;
1453 int rc = 0;
1454
1455 DP_DEBUG(dev, QEDR_MSG_QP,
1456 "create SRQ called from %s (pd %p)\n",
1457 (udata) ? "User lib" : "kernel", pd);
1458
1459 rc = qedr_check_srq_params(ibpd, dev, init_attr, udata);
1460 if (rc)
1461 return ERR_PTR(-EINVAL);
1462
1463 srq = kzalloc(sizeof(*srq), GFP_KERNEL);
1464 if (!srq)
1465 return ERR_PTR(-ENOMEM);
1466
1467 srq->dev = dev;
1468 hw_srq = &srq->hw_srq;
1469 spin_lock_init(&srq->lock);
1470
1471 hw_srq->max_wr = init_attr->attr.max_wr;
1472 hw_srq->max_sges = init_attr->attr.max_sge;
1473
1474 if (udata && ibpd->uobject && ibpd->uobject->context) {
1475 ib_ctx = ibpd->uobject->context;
1476 ctx = get_qedr_ucontext(ib_ctx);
1477
1478 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
1479 DP_ERR(dev,
1480 "create srq: problem copying data from user space\n");
1481 goto err0;
1482 }
1483
1484 rc = qedr_init_srq_user_params(ib_ctx, srq, &ureq, 0, 0);
1485 if (rc)
1486 goto err0;
1487
1488 page_cnt = srq->usrq.pbl_info.num_pbes;
1489 pbl_base_addr = srq->usrq.pbl_tbl->pa;
1490 phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1491 page_size = BIT(srq->usrq.umem->page_shift);
1492 } else {
1493 struct qed_chain *pbl;
1494
1495 rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
1496 if (rc)
1497 goto err0;
1498
1499 pbl = &hw_srq->pbl;
1500 page_cnt = qed_chain_get_page_cnt(pbl);
1501 pbl_base_addr = qed_chain_get_pbl_phys(pbl);
1502 phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1503 page_size = QED_CHAIN_PAGE_SIZE;
1504 }
1505
1506 in_params.pd_id = pd->pd_id;
1507 in_params.pbl_base_addr = pbl_base_addr;
1508 in_params.prod_pair_addr = phy_prod_pair_addr;
1509 in_params.num_pages = page_cnt;
1510 in_params.page_size = page_size;
1511
1512 rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
1513 if (rc)
1514 goto err1;
1515
1516 srq->srq_id = out_params.srq_id;
1517
1518 if (udata) {
1519 rc = qedr_copy_srq_uresp(dev, srq, udata);
1520 if (rc)
1521 goto err2;
1522 }
1523
1524 rc = qedr_idr_add(dev, &dev->srqidr, srq, srq->srq_id);
1525 if (rc)
1526 goto err2;
1527
1528 DP_DEBUG(dev, QEDR_MSG_SRQ,
1529 "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
1530 return &srq->ibsrq;
1531
1532err2:
1533 destroy_in_params.srq_id = srq->srq_id;
1534
1535 dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
1536err1:
1537 if (udata)
1538 qedr_free_srq_user_params(srq);
1539 else
1540 qedr_free_srq_kernel_params(srq);
1541err0:
1542 kfree(srq);
1543
1544 return ERR_PTR(-EFAULT);
1545}
1546
1547int qedr_destroy_srq(struct ib_srq *ibsrq)
1548{
1549 struct qed_rdma_destroy_srq_in_params in_params = {};
1550 struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1551 struct qedr_srq *srq = get_qedr_srq(ibsrq);
1552
1553 qedr_idr_remove(dev, &dev->srqidr, srq->srq_id);
1554 in_params.srq_id = srq->srq_id;
1555 dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
1556
1557 if (ibsrq->pd->uobject)
1558 qedr_free_srq_user_params(srq);
1559 else
1560 qedr_free_srq_kernel_params(srq);
1561
1562 DP_DEBUG(dev, QEDR_MSG_SRQ,
1563 "destroy srq: destroyed srq with srq_id=0x%0x\n",
1564 srq->srq_id);
1565 kfree(srq);
1566
1567 return 0;
1568}
1569
1570int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1571 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
1572{
1573 struct qed_rdma_modify_srq_in_params in_params = {};
1574 struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1575 struct qedr_srq *srq = get_qedr_srq(ibsrq);
1576 int rc;
1577
1578 if (attr_mask & IB_SRQ_MAX_WR) {
1579 DP_ERR(dev,
1580 "modify srq: invalid attribute mask=0x%x specified for %p\n",
1581 attr_mask, srq);
1582 return -EINVAL;
1583 }
1584
1585 if (attr_mask & IB_SRQ_LIMIT) {
1586 if (attr->srq_limit >= srq->hw_srq.max_wr) {
1587 DP_ERR(dev,
1588 "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
1589 attr->srq_limit, srq->hw_srq.max_wr);
1590 return -EINVAL;
1591 }
1592
1593 in_params.srq_id = srq->srq_id;
1594 in_params.wqe_limit = attr->srq_limit;
1595 rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
1596 if (rc)
1597 return rc;
1598 }
1599
1600 srq->srq_limit = attr->srq_limit;
1601
1602 DP_DEBUG(dev, QEDR_MSG_SRQ,
1603 "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
1604
1605 return 0;
1282} 1606}
1283 1607
1284static inline void 1608static inline void
@@ -1299,9 +1623,17 @@ qedr_init_common_qp_in_params(struct qedr_dev *dev,
1299 params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi; 1623 params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1300 params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid; 1624 params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1301 params->stats_queue = 0; 1625 params->stats_queue = 0;
1302 params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1303 params->srq_id = 0; 1626 params->srq_id = 0;
1304 params->use_srq = false; 1627 params->use_srq = false;
1628
1629 if (!qp->srq) {
1630 params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1631
1632 } else {
1633 params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1634 params->srq_id = qp->srq->srq_id;
1635 params->use_srq = true;
1636 }
1305} 1637}
1306 1638
1307static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp) 1639static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
@@ -1318,32 +1650,27 @@ static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1318 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len); 1650 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1319} 1651}
1320 1652
1321static int qedr_idr_add(struct qedr_dev *dev, void *ptr, u32 id) 1653static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr,
1654 void *ptr, u32 id)
1322{ 1655{
1323 int rc; 1656 int rc;
1324 1657
1325 if (!rdma_protocol_iwarp(&dev->ibdev, 1))
1326 return 0;
1327
1328 idr_preload(GFP_KERNEL); 1658 idr_preload(GFP_KERNEL);
1329 spin_lock_irq(&dev->idr_lock); 1659 spin_lock_irq(&qidr->idr_lock);
1330 1660
1331 rc = idr_alloc(&dev->qpidr, ptr, id, id + 1, GFP_ATOMIC); 1661 rc = idr_alloc(&qidr->idr, ptr, id, id + 1, GFP_ATOMIC);
1332 1662
1333 spin_unlock_irq(&dev->idr_lock); 1663 spin_unlock_irq(&qidr->idr_lock);
1334 idr_preload_end(); 1664 idr_preload_end();
1335 1665
1336 return rc < 0 ? rc : 0; 1666 return rc < 0 ? rc : 0;
1337} 1667}
1338 1668
1339static void qedr_idr_remove(struct qedr_dev *dev, u32 id) 1669static void qedr_idr_remove(struct qedr_dev *dev, struct qedr_idr *qidr, u32 id)
1340{ 1670{
1341 if (!rdma_protocol_iwarp(&dev->ibdev, 1)) 1671 spin_lock_irq(&qidr->idr_lock);
1342 return; 1672 idr_remove(&qidr->idr, id);
1343 1673 spin_unlock_irq(&qidr->idr_lock);
1344 spin_lock_irq(&dev->idr_lock);
1345 idr_remove(&dev->qpidr, id);
1346 spin_unlock_irq(&dev->idr_lock);
1347} 1674}
1348 1675
1349static inline void 1676static inline void
@@ -1356,9 +1683,10 @@ qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1356 1683
1357 qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl, 1684 qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1358 &qp->usq.pbl_info, FW_PAGE_SHIFT); 1685 &qp->usq.pbl_info, FW_PAGE_SHIFT);
1359 1686 if (!qp->srq) {
1360 qp->urq.pbl_tbl->va = out_params->rq_pbl_virt; 1687 qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1361 qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys; 1688 qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1689 }
1362 1690
1363 qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl, 1691 qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1364 &qp->urq.pbl_info, FW_PAGE_SHIFT); 1692 &qp->urq.pbl_info, FW_PAGE_SHIFT);
@@ -1404,11 +1732,13 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
1404 if (rc) 1732 if (rc)
1405 return rc; 1733 return rc;
1406 1734
1407 /* RQ - read access only (0), dma sync not required (0) */ 1735 if (!qp->srq) {
1408 rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr, 1736 /* RQ - read access only (0), dma sync not required (0) */
1409 ureq.rq_len, 0, 0, alloc_and_init); 1737 rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
1410 if (rc) 1738 ureq.rq_len, 0, 0, alloc_and_init);
1411 return rc; 1739 if (rc)
1740 return rc;
1741 }
1412 1742
1413 memset(&in_params, 0, sizeof(in_params)); 1743 memset(&in_params, 0, sizeof(in_params));
1414 qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params); 1744 qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
@@ -1416,8 +1746,10 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
1416 in_params.qp_handle_hi = ureq.qp_handle_hi; 1746 in_params.qp_handle_hi = ureq.qp_handle_hi;
1417 in_params.sq_num_pages = qp->usq.pbl_info.num_pbes; 1747 in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1418 in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa; 1748 in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1419 in_params.rq_num_pages = qp->urq.pbl_info.num_pbes; 1749 if (!qp->srq) {
1420 in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; 1750 in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1751 in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1752 }
1421 1753
1422 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, 1754 qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1423 &in_params, &out_params); 1755 &in_params, &out_params);
@@ -1679,16 +2011,13 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1679 if (rc) 2011 if (rc)
1680 return ERR_PTR(rc); 2012 return ERR_PTR(rc);
1681 2013
1682 if (attrs->srq)
1683 return ERR_PTR(-EINVAL);
1684
1685 DP_DEBUG(dev, QEDR_MSG_QP, 2014 DP_DEBUG(dev, QEDR_MSG_QP,
1686 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n", 2015 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1687 udata ? "user library" : "kernel", attrs->event_handler, pd, 2016 udata ? "user library" : "kernel", attrs->event_handler, pd,
1688 get_qedr_cq(attrs->send_cq), 2017 get_qedr_cq(attrs->send_cq),
1689 get_qedr_cq(attrs->send_cq)->icid, 2018 get_qedr_cq(attrs->send_cq)->icid,
1690 get_qedr_cq(attrs->recv_cq), 2019 get_qedr_cq(attrs->recv_cq),
1691 get_qedr_cq(attrs->recv_cq)->icid); 2020 attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
1692 2021
1693 qp = kzalloc(sizeof(*qp), GFP_KERNEL); 2022 qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1694 if (!qp) { 2023 if (!qp) {
@@ -1715,9 +2044,11 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1715 2044
1716 qp->ibqp.qp_num = qp->qp_id; 2045 qp->ibqp.qp_num = qp->qp_id;
1717 2046
1718 rc = qedr_idr_add(dev, qp, qp->qp_id); 2047 if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1719 if (rc) 2048 rc = qedr_idr_add(dev, &dev->qpidr, qp, qp->qp_id);
1720 goto err; 2049 if (rc)
2050 goto err;
2051 }
1721 2052
1722 return &qp->ibqp; 2053 return &qp->ibqp;
1723 2054
@@ -2289,8 +2620,9 @@ int qedr_destroy_qp(struct ib_qp *ibqp)
2289 2620
2290 qedr_free_qp_resources(dev, qp); 2621 qedr_free_qp_resources(dev, qp);
2291 2622
2292 if (atomic_dec_and_test(&qp->refcnt)) { 2623 if (atomic_dec_and_test(&qp->refcnt) &&
2293 qedr_idr_remove(dev, qp->qp_id); 2624 rdma_protocol_iwarp(&dev->ibdev, 1)) {
2625 qedr_idr_remove(dev, &dev->qpidr, qp->qp_id);
2294 kfree(qp); 2626 kfree(qp);
2295 } 2627 }
2296 return rc; 2628 return rc;
@@ -2305,7 +2637,7 @@ struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
2305 if (!ah) 2637 if (!ah)
2306 return ERR_PTR(-ENOMEM); 2638 return ERR_PTR(-ENOMEM);
2307 2639
2308 ah->attr = *attr; 2640 rdma_copy_ah_attr(&ah->attr, attr);
2309 2641
2310 return &ah->ibah; 2642 return &ah->ibah;
2311} 2643}
@@ -2314,6 +2646,7 @@ int qedr_destroy_ah(struct ib_ah *ibah)
2314{ 2646{
2315 struct qedr_ah *ah = get_qedr_ah(ibah); 2647 struct qedr_ah *ah = get_qedr_ah(ibah);
2316 2648
2649 rdma_destroy_ah_attr(&ah->attr);
2317 kfree(ah); 2650 kfree(ah);
2318 return 0; 2651 return 0;
2319} 2652}
@@ -2705,9 +3038,9 @@ static void swap_wqe_data64(u64 *p)
2705 3038
2706static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev, 3039static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2707 struct qedr_qp *qp, u8 *wqe_size, 3040 struct qedr_qp *qp, u8 *wqe_size,
2708 struct ib_send_wr *wr, 3041 const struct ib_send_wr *wr,
2709 struct ib_send_wr **bad_wr, u8 *bits, 3042 const struct ib_send_wr **bad_wr,
2710 u8 bit) 3043 u8 *bits, u8 bit)
2711{ 3044{
2712 u32 data_size = sge_data_len(wr->sg_list, wr->num_sge); 3045 u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2713 char *seg_prt, *wqe; 3046 char *seg_prt, *wqe;
@@ -2790,7 +3123,7 @@ static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2790 } while (0) 3123 } while (0)
2791 3124
2792static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size, 3125static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
2793 struct ib_send_wr *wr) 3126 const struct ib_send_wr *wr)
2794{ 3127{
2795 u32 data_size = 0; 3128 u32 data_size = 0;
2796 int i; 3129 int i;
@@ -2814,8 +3147,8 @@ static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
2814 struct qedr_qp *qp, 3147 struct qedr_qp *qp,
2815 struct rdma_sq_rdma_wqe_1st *rwqe, 3148 struct rdma_sq_rdma_wqe_1st *rwqe,
2816 struct rdma_sq_rdma_wqe_2nd *rwqe2, 3149 struct rdma_sq_rdma_wqe_2nd *rwqe2,
2817 struct ib_send_wr *wr, 3150 const struct ib_send_wr *wr,
2818 struct ib_send_wr **bad_wr) 3151 const struct ib_send_wr **bad_wr)
2819{ 3152{
2820 rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey); 3153 rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
2821 DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr); 3154 DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
@@ -2837,8 +3170,8 @@ static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
2837 struct qedr_qp *qp, 3170 struct qedr_qp *qp,
2838 struct rdma_sq_send_wqe_1st *swqe, 3171 struct rdma_sq_send_wqe_1st *swqe,
2839 struct rdma_sq_send_wqe_2st *swqe2, 3172 struct rdma_sq_send_wqe_2st *swqe2,
2840 struct ib_send_wr *wr, 3173 const struct ib_send_wr *wr,
2841 struct ib_send_wr **bad_wr) 3174 const struct ib_send_wr **bad_wr)
2842{ 3175{
2843 memset(swqe2, 0, sizeof(*swqe2)); 3176 memset(swqe2, 0, sizeof(*swqe2));
2844 if (wr->send_flags & IB_SEND_INLINE) { 3177 if (wr->send_flags & IB_SEND_INLINE) {
@@ -2854,7 +3187,7 @@ static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
2854 3187
2855static int qedr_prepare_reg(struct qedr_qp *qp, 3188static int qedr_prepare_reg(struct qedr_qp *qp,
2856 struct rdma_sq_fmr_wqe_1st *fwqe1, 3189 struct rdma_sq_fmr_wqe_1st *fwqe1,
2857 struct ib_reg_wr *wr) 3190 const struct ib_reg_wr *wr)
2858{ 3191{
2859 struct qedr_mr *mr = get_qedr_mr(wr->mr); 3192 struct qedr_mr *mr = get_qedr_mr(wr->mr);
2860 struct rdma_sq_fmr_wqe_2nd *fwqe2; 3193 struct rdma_sq_fmr_wqe_2nd *fwqe2;
@@ -2916,7 +3249,8 @@ static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
2916 } 3249 }
2917} 3250}
2918 3251
2919static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr) 3252static inline bool qedr_can_post_send(struct qedr_qp *qp,
3253 const struct ib_send_wr *wr)
2920{ 3254{
2921 int wq_is_full, err_wr, pbl_is_full; 3255 int wq_is_full, err_wr, pbl_is_full;
2922 struct qedr_dev *dev = qp->dev; 3256 struct qedr_dev *dev = qp->dev;
@@ -2953,8 +3287,8 @@ static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
2953 return true; 3287 return true;
2954} 3288}
2955 3289
2956static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 3290static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
2957 struct ib_send_wr **bad_wr) 3291 const struct ib_send_wr **bad_wr)
2958{ 3292{
2959 struct qedr_dev *dev = get_qedr_dev(ibqp->device); 3293 struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2960 struct qedr_qp *qp = get_qedr_qp(ibqp); 3294 struct qedr_qp *qp = get_qedr_qp(ibqp);
@@ -3168,8 +3502,8 @@ static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3168 return rc; 3502 return rc;
3169} 3503}
3170 3504
3171int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 3505int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3172 struct ib_send_wr **bad_wr) 3506 const struct ib_send_wr **bad_wr)
3173{ 3507{
3174 struct qedr_dev *dev = get_qedr_dev(ibqp->device); 3508 struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3175 struct qedr_qp *qp = get_qedr_qp(ibqp); 3509 struct qedr_qp *qp = get_qedr_qp(ibqp);
@@ -3234,8 +3568,104 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3234 return rc; 3568 return rc;
3235} 3569}
3236 3570
3237int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 3571static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
3238 struct ib_recv_wr **bad_wr) 3572{
3573 u32 used;
3574
3575 /* Calculate number of elements used based on producer
3576 * count and consumer count and subtract it from max
3577 * work request supported so that we get elements left.
3578 */
3579 used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt;
3580
3581 return hw_srq->max_wr - used;
3582}
3583
3584int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
3585 const struct ib_recv_wr **bad_wr)
3586{
3587 struct qedr_srq *srq = get_qedr_srq(ibsrq);
3588 struct qedr_srq_hwq_info *hw_srq;
3589 struct qedr_dev *dev = srq->dev;
3590 struct qed_chain *pbl;
3591 unsigned long flags;
3592 int status = 0;
3593 u32 num_sge;
3594 u32 offset;
3595
3596 spin_lock_irqsave(&srq->lock, flags);
3597
3598 hw_srq = &srq->hw_srq;
3599 pbl = &srq->hw_srq.pbl;
3600 while (wr) {
3601 struct rdma_srq_wqe_header *hdr;
3602 int i;
3603
3604 if (!qedr_srq_elem_left(hw_srq) ||
3605 wr->num_sge > srq->hw_srq.max_sges) {
3606 DP_ERR(dev, "Can't post WR (%d,%d) || (%d > %d)\n",
3607 hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt,
3608 wr->num_sge, srq->hw_srq.max_sges);
3609 status = -ENOMEM;
3610 *bad_wr = wr;
3611 break;
3612 }
3613
3614 hdr = qed_chain_produce(pbl);
3615 num_sge = wr->num_sge;
3616 /* Set number of sge and work request id in header */
3617 SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
3618
3619 srq->hw_srq.wr_prod_cnt++;
3620 hw_srq->wqe_prod++;
3621 hw_srq->sge_prod++;
3622
3623 DP_DEBUG(dev, QEDR_MSG_SRQ,
3624 "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
3625 wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
3626
3627 for (i = 0; i < wr->num_sge; i++) {
3628 struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
3629
3630 /* Set SGE length, lkey and address */
3631 SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
3632 wr->sg_list[i].length, wr->sg_list[i].lkey);
3633
3634 DP_DEBUG(dev, QEDR_MSG_SRQ,
3635 "[%d]: len %d key %x addr %x:%x\n",
3636 i, srq_sge->length, srq_sge->l_key,
3637 srq_sge->addr.hi, srq_sge->addr.lo);
3638 hw_srq->sge_prod++;
3639 }
3640
3641 /* Flush WQE and SGE information before
3642 * updating producer.
3643 */
3644 wmb();
3645
3646 /* SRQ producer is 8 bytes. Need to update SGE producer index
3647 * in first 4 bytes and need to update WQE producer in
3648 * next 4 bytes.
3649 */
3650 *srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod;
3651 offset = offsetof(struct rdma_srq_producers, wqe_prod);
3652 *((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) =
3653 hw_srq->wqe_prod;
3654
3655 /* Flush producer after updating it. */
3656 wmb();
3657 wr = wr->next;
3658 }
3659
3660 DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
3661 qed_chain_get_elem_left(pbl));
3662 spin_unlock_irqrestore(&srq->lock, flags);
3663
3664 return status;
3665}
3666
3667int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
3668 const struct ib_recv_wr **bad_wr)
3239{ 3669{
3240 struct qedr_qp *qp = get_qedr_qp(ibqp); 3670 struct qedr_qp *qp = get_qedr_qp(ibqp);
3241 struct qedr_dev *dev = qp->dev; 3671 struct qedr_dev *dev = qp->dev;
@@ -3625,6 +4055,31 @@ static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3625 wc->wr_id = wr_id; 4055 wc->wr_id = wr_id;
3626} 4056}
3627 4057
4058static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4059 struct qedr_cq *cq, struct ib_wc *wc,
4060 struct rdma_cqe_responder *resp)
4061{
4062 struct qedr_srq *srq = qp->srq;
4063 u64 wr_id;
4064
4065 wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
4066 le32_to_cpu(resp->srq_wr_id.lo), u64);
4067
4068 if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4069 wc->status = IB_WC_WR_FLUSH_ERR;
4070 wc->vendor_err = 0;
4071 wc->wr_id = wr_id;
4072 wc->byte_len = 0;
4073 wc->src_qp = qp->id;
4074 wc->qp = &qp->ibqp;
4075 wc->wr_id = wr_id;
4076 } else {
4077 __process_resp_one(dev, qp, cq, wc, resp, wr_id);
4078 }
4079 srq->hw_srq.wr_cons_cnt++;
4080
4081 return 1;
4082}
3628static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp, 4083static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3629 struct qedr_cq *cq, struct ib_wc *wc, 4084 struct qedr_cq *cq, struct ib_wc *wc,
3630 struct rdma_cqe_responder *resp) 4085 struct rdma_cqe_responder *resp)
@@ -3674,6 +4129,19 @@ static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3674 } 4129 }
3675} 4130}
3676 4131
4132static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
4133 struct qedr_cq *cq, int num_entries,
4134 struct ib_wc *wc,
4135 struct rdma_cqe_responder *resp)
4136{
4137 int cnt;
4138
4139 cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
4140 consume_cqe(cq);
4141
4142 return cnt;
4143}
4144
3677static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp, 4145static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
3678 struct qedr_cq *cq, int num_entries, 4146 struct qedr_cq *cq, int num_entries,
3679 struct ib_wc *wc, struct rdma_cqe_responder *resp, 4147 struct ib_wc *wc, struct rdma_cqe_responder *resp,
@@ -3751,6 +4219,11 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
3751 cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc, 4219 cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
3752 &cqe->resp, &update); 4220 &cqe->resp, &update);
3753 break; 4221 break;
4222 case RDMA_CQE_TYPE_RESPONDER_SRQ:
4223 cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
4224 wc, &cqe->resp);
4225 update = 1;
4226 break;
3754 case RDMA_CQE_TYPE_INVALID: 4227 case RDMA_CQE_TYPE_INVALID:
3755 default: 4228 default:
3756 DP_ERR(dev, "Error: invalid CQE type = %d\n", 4229 DP_ERR(dev, "Error: invalid CQE type = %d\n",
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 2c57e4c592a6..0b7d0124b16c 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -66,6 +66,15 @@ int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr,
66 int qp_attr_mask, struct ib_qp_init_attr *); 66 int qp_attr_mask, struct ib_qp_init_attr *);
67int qedr_destroy_qp(struct ib_qp *ibqp); 67int qedr_destroy_qp(struct ib_qp *ibqp);
68 68
69struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
70 struct ib_srq_init_attr *attr,
71 struct ib_udata *udata);
72int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
73 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
74int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
75int qedr_destroy_srq(struct ib_srq *ibsrq);
76int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
77 const struct ib_recv_wr **bad_recv_wr);
69struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, 78struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
70 struct ib_udata *udata); 79 struct ib_udata *udata);
71int qedr_destroy_ah(struct ib_ah *ibah); 80int qedr_destroy_ah(struct ib_ah *ibah);
@@ -82,10 +91,10 @@ int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
82struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 91struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
83 u32 max_num_sg); 92 u32 max_num_sg);
84int qedr_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); 93int qedr_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc);
85int qedr_post_send(struct ib_qp *, struct ib_send_wr *, 94int qedr_post_send(struct ib_qp *, const struct ib_send_wr *,
86 struct ib_send_wr **bad_wr); 95 const struct ib_send_wr **bad_wr);
87int qedr_post_recv(struct ib_qp *, struct ib_recv_wr *, 96int qedr_post_recv(struct ib_qp *, const struct ib_recv_wr *,
88 struct ib_recv_wr **bad_wr); 97 const struct ib_recv_wr **bad_wr);
89int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, 98int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
90 u8 port_num, const struct ib_wc *in_wc, 99 u8 port_num, const struct ib_wc *in_wc,
91 const struct ib_grh *in_grh, 100 const struct ib_grh *in_grh,
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 14b4057a2b8f..41babbc0db58 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1489,7 +1489,8 @@ static void qib_fill_device_attr(struct qib_devdata *dd)
1489 rdi->dparms.props.max_mr_size = ~0ULL; 1489 rdi->dparms.props.max_mr_size = ~0ULL;
1490 rdi->dparms.props.max_qp = ib_qib_max_qps; 1490 rdi->dparms.props.max_qp = ib_qib_max_qps;
1491 rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs; 1491 rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs;
1492 rdi->dparms.props.max_sge = ib_qib_max_sges; 1492 rdi->dparms.props.max_send_sge = ib_qib_max_sges;
1493 rdi->dparms.props.max_recv_sge = ib_qib_max_sges;
1493 rdi->dparms.props.max_sge_rd = ib_qib_max_sges; 1494 rdi->dparms.props.max_sge_rd = ib_qib_max_sges;
1494 rdi->dparms.props.max_cq = ib_qib_max_cqs; 1495 rdi->dparms.props.max_cq = ib_qib_max_cqs;
1495 rdi->dparms.props.max_cqe = ib_qib_max_cqes; 1496 rdi->dparms.props.max_cqe = ib_qib_max_cqes;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index f9a46768a19a..666613eef88f 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -78,9 +78,6 @@ struct qib_verbs_txreq;
78 78
79#define QIB_VENDOR_IPG cpu_to_be16(0xFFA0) 79#define QIB_VENDOR_IPG cpu_to_be16(0xFFA0)
80 80
81/* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */
82#define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26)
83
84#define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) 81#define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL)
85 82
86/* Values for set/get portinfo VLCap OperationalVLs */ 83/* Values for set/get portinfo VLCap OperationalVLs */
@@ -314,7 +311,7 @@ void qib_rc_rnr_retry(unsigned long arg);
314 311
315void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr); 312void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr);
316 313
317int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr); 314int qib_post_ud_send(struct rvt_qp *qp, const struct ib_send_wr *wr);
318 315
319void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, 316void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
320 int has_grh, void *data, u32 tlen, struct rvt_qp *qp); 317 int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
diff --git a/drivers/infiniband/hw/usnic/Kconfig b/drivers/infiniband/hw/usnic/Kconfig
index 29ab11c34f3f..d1dae2af4ca9 100644
--- a/drivers/infiniband/hw/usnic/Kconfig
+++ b/drivers/infiniband/hw/usnic/Kconfig
@@ -1,10 +1,10 @@
1config INFINIBAND_USNIC 1config INFINIBAND_USNIC
2 tristate "Verbs support for Cisco VIC" 2 tristate "Verbs support for Cisco VIC"
3 depends on NETDEVICES && ETHERNET && INET && PCI && INTEL_IOMMU 3 depends on NETDEVICES && ETHERNET && INET && PCI && INTEL_IOMMU
4 depends on INFINIBAND_USER_ACCESS
4 select ENIC 5 select ENIC
5 select NET_VENDOR_CISCO 6 select NET_VENDOR_CISCO
6 select PCI_IOV 7 select PCI_IOV
7 select INFINIBAND_USER_ACCESS
8 ---help--- 8 ---help---
9 This is a low-level driver for Cisco's Virtual Interface 9 This is a low-level driver for Cisco's Virtual Interface
10 Cards (VICs), including the VIC 1240 and 1280 cards. 10 Cards (VICs), including the VIC 1240 and 1280 cards.
diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c
index 995a26b65156..7875883621f4 100644
--- a/drivers/infiniband/hw/usnic/usnic_fwd.c
+++ b/drivers/infiniband/hw/usnic/usnic_fwd.c
@@ -92,8 +92,8 @@ struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev)
92 ufdev->pdev = pdev; 92 ufdev->pdev = pdev;
93 ufdev->netdev = pci_get_drvdata(pdev); 93 ufdev->netdev = pci_get_drvdata(pdev);
94 spin_lock_init(&ufdev->lock); 94 spin_lock_init(&ufdev->lock);
95 strncpy(ufdev->name, netdev_name(ufdev->netdev), 95 BUILD_BUG_ON(sizeof(ufdev->name) != sizeof(ufdev->netdev->name));
96 sizeof(ufdev->name) - 1); 96 strcpy(ufdev->name, ufdev->netdev->name);
97 97
98 return ufdev; 98 return ufdev;
99} 99}
diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h
index 0b2cc4e79707..f0b71d593da5 100644
--- a/drivers/infiniband/hw/usnic/usnic_fwd.h
+++ b/drivers/infiniband/hw/usnic/usnic_fwd.h
@@ -57,7 +57,7 @@ struct usnic_fwd_dev {
57 char mac[ETH_ALEN]; 57 char mac[ETH_ALEN];
58 unsigned int mtu; 58 unsigned int mtu;
59 __be32 inaddr; 59 __be32 inaddr;
60 char name[IFNAMSIZ+1]; 60 char name[IFNAMSIZ];
61}; 61};
62 62
63struct usnic_fwd_flow { 63struct usnic_fwd_flow {
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index a688a5669168..9973ac893635 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -666,7 +666,7 @@ int usnic_ib_dereg_mr(struct ib_mr *ibmr)
666 666
667 usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length); 667 usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length);
668 668
669 usnic_uiom_reg_release(mr->umem, ibmr->pd->uobject->context->closing); 669 usnic_uiom_reg_release(mr->umem, ibmr->uobject->context);
670 kfree(mr); 670 kfree(mr);
671 return 0; 671 return 0;
672} 672}
@@ -771,15 +771,15 @@ int usnic_ib_destroy_ah(struct ib_ah *ah)
771 return -EINVAL; 771 return -EINVAL;
772} 772}
773 773
774int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 774int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
775 struct ib_send_wr **bad_wr) 775 const struct ib_send_wr **bad_wr)
776{ 776{
777 usnic_dbg("\n"); 777 usnic_dbg("\n");
778 return -EINVAL; 778 return -EINVAL;
779} 779}
780 780
781int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 781int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
782 struct ib_recv_wr **bad_wr) 782 const struct ib_recv_wr **bad_wr)
783{ 783{
784 usnic_dbg("\n"); 784 usnic_dbg("\n");
785 return -EINVAL; 785 return -EINVAL;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 1fda94425116..2a2c9beb715f 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -80,10 +80,10 @@ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
80 struct ib_udata *udata); 80 struct ib_udata *udata);
81 81
82int usnic_ib_destroy_ah(struct ib_ah *ah); 82int usnic_ib_destroy_ah(struct ib_ah *ah);
83int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 83int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
84 struct ib_send_wr **bad_wr); 84 const struct ib_send_wr **bad_wr);
85int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 85int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
86 struct ib_recv_wr **bad_wr); 86 const struct ib_recv_wr **bad_wr);
87int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries, 87int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries,
88 struct ib_wc *wc); 88 struct ib_wc *wc);
89int usnic_ib_req_notify_cq(struct ib_cq *cq, 89int usnic_ib_req_notify_cq(struct ib_cq *cq,
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 4381c0a9a873..9dd39daa602b 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -41,6 +41,7 @@
41#include <linux/workqueue.h> 41#include <linux/workqueue.h>
42#include <linux/list.h> 42#include <linux/list.h>
43#include <linux/pci.h> 43#include <linux/pci.h>
44#include <rdma/ib_verbs.h>
44 45
45#include "usnic_log.h" 46#include "usnic_log.h"
46#include "usnic_uiom.h" 47#include "usnic_uiom.h"
@@ -88,7 +89,7 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
88 for_each_sg(chunk->page_list, sg, chunk->nents, i) { 89 for_each_sg(chunk->page_list, sg, chunk->nents, i) {
89 page = sg_page(sg); 90 page = sg_page(sg);
90 pa = sg_phys(sg); 91 pa = sg_phys(sg);
91 if (dirty) 92 if (!PageDirty(page) && dirty)
92 set_page_dirty_lock(page); 93 set_page_dirty_lock(page);
93 put_page(page); 94 put_page(page);
94 usnic_dbg("pa: %pa\n", &pa); 95 usnic_dbg("pa: %pa\n", &pa);
@@ -114,6 +115,16 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
114 dma_addr_t pa; 115 dma_addr_t pa;
115 unsigned int gup_flags; 116 unsigned int gup_flags;
116 117
118 /*
119 * If the combination of the addr and size requested for this memory
120 * region causes an integer overflow, return error.
121 */
122 if (((addr + size) < addr) || PAGE_ALIGN(addr + size) < (addr + size))
123 return -EINVAL;
124
125 if (!size)
126 return -EINVAL;
127
117 if (!can_do_mlock()) 128 if (!can_do_mlock())
118 return -EPERM; 129 return -EPERM;
119 130
@@ -127,7 +138,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
127 138
128 down_write(&current->mm->mmap_sem); 139 down_write(&current->mm->mmap_sem);
129 140
130 locked = npages + current->mm->locked_vm; 141 locked = npages + current->mm->pinned_vm;
131 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 142 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
132 143
133 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { 144 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
@@ -143,7 +154,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
143 ret = 0; 154 ret = 0;
144 155
145 while (npages) { 156 while (npages) {
146 ret = get_user_pages(cur_base, 157 ret = get_user_pages_longterm(cur_base,
147 min_t(unsigned long, npages, 158 min_t(unsigned long, npages,
148 PAGE_SIZE / sizeof(struct page *)), 159 PAGE_SIZE / sizeof(struct page *)),
149 gup_flags, page_list, NULL); 160 gup_flags, page_list, NULL);
@@ -186,7 +197,7 @@ out:
186 if (ret < 0) 197 if (ret < 0)
187 usnic_uiom_put_pages(chunk_list, 0); 198 usnic_uiom_put_pages(chunk_list, 0);
188 else 199 else
189 current->mm->locked_vm = locked; 200 current->mm->pinned_vm = locked;
190 201
191 up_write(&current->mm->mmap_sem); 202 up_write(&current->mm->mmap_sem);
192 free_page((unsigned long) page_list); 203 free_page((unsigned long) page_list);
@@ -420,18 +431,22 @@ out_free_uiomr:
420 return ERR_PTR(err); 431 return ERR_PTR(err);
421} 432}
422 433
423void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) 434void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
435 struct ib_ucontext *ucontext)
424{ 436{
437 struct task_struct *task;
425 struct mm_struct *mm; 438 struct mm_struct *mm;
426 unsigned long diff; 439 unsigned long diff;
427 440
428 __usnic_uiom_reg_release(uiomr->pd, uiomr, 1); 441 __usnic_uiom_reg_release(uiomr->pd, uiomr, 1);
429 442
430 mm = get_task_mm(current); 443 task = get_pid_task(ucontext->tgid, PIDTYPE_PID);
431 if (!mm) { 444 if (!task)
432 kfree(uiomr); 445 goto out;
433 return; 446 mm = get_task_mm(task);
434 } 447 put_task_struct(task);
448 if (!mm)
449 goto out;
435 450
436 diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; 451 diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
437 452
@@ -443,7 +458,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing)
443 * up here and not be able to take the mmap_sem. In that case 458 * up here and not be able to take the mmap_sem. In that case
444 * we defer the vm_locked accounting to the system workqueue. 459 * we defer the vm_locked accounting to the system workqueue.
445 */ 460 */
446 if (closing) { 461 if (ucontext->closing) {
447 if (!down_write_trylock(&mm->mmap_sem)) { 462 if (!down_write_trylock(&mm->mmap_sem)) {
448 INIT_WORK(&uiomr->work, usnic_uiom_reg_account); 463 INIT_WORK(&uiomr->work, usnic_uiom_reg_account);
449 uiomr->mm = mm; 464 uiomr->mm = mm;
@@ -455,9 +470,10 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing)
455 } else 470 } else
456 down_write(&mm->mmap_sem); 471 down_write(&mm->mmap_sem);
457 472
458 current->mm->locked_vm -= diff; 473 mm->pinned_vm -= diff;
459 up_write(&mm->mmap_sem); 474 up_write(&mm->mmap_sem);
460 mmput(mm); 475 mmput(mm);
476out:
461 kfree(uiomr); 477 kfree(uiomr);
462} 478}
463 479
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h
index 431efe4143f4..8c096acff123 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.h
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.h
@@ -39,6 +39,8 @@
39 39
40#include "usnic_uiom_interval_tree.h" 40#include "usnic_uiom_interval_tree.h"
41 41
42struct ib_ucontext;
43
42#define USNIC_UIOM_READ (1) 44#define USNIC_UIOM_READ (1)
43#define USNIC_UIOM_WRITE (2) 45#define USNIC_UIOM_WRITE (2)
44 46
@@ -89,7 +91,8 @@ void usnic_uiom_free_dev_list(struct device **devs);
89struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, 91struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
90 unsigned long addr, size_t size, 92 unsigned long addr, size_t size,
91 int access, int dmasync); 93 int access, int dmasync);
92void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing); 94void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
95 struct ib_ucontext *ucontext);
93int usnic_uiom_init(char *drv_name); 96int usnic_uiom_init(char *drv_name);
94void usnic_uiom_fini(void); 97void usnic_uiom_fini(void);
95#endif /* USNIC_UIOM_H_ */ 98#endif /* USNIC_UIOM_H_ */
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index 44cb1cfba417..42b8685c997e 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -378,11 +378,6 @@ static inline enum ib_port_speed pvrdma_port_speed_to_ib(
378 return (enum ib_port_speed)speed; 378 return (enum ib_port_speed)speed;
379} 379}
380 380
381static inline int pvrdma_qp_attr_mask_to_ib(int attr_mask)
382{
383 return attr_mask;
384}
385
386static inline int ib_qp_attr_mask_to_pvrdma(int attr_mask) 381static inline int ib_qp_attr_mask_to_pvrdma(int attr_mask)
387{ 382{
388 return attr_mask & PVRDMA_MASK(PVRDMA_QP_ATTR_MASK_MAX); 383 return attr_mask & PVRDMA_MASK(PVRDMA_QP_ATTR_MASK_MAX);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index f95b97646c25..0f004c737620 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -278,19 +278,6 @@ int pvrdma_destroy_cq(struct ib_cq *cq)
278 return ret; 278 return ret;
279} 279}
280 280
281/**
282 * pvrdma_modify_cq - modify the CQ moderation parameters
283 * @ibcq: the CQ to modify
284 * @cq_count: number of CQEs that will trigger an event
285 * @cq_period: max period of time in usec before triggering an event
286 *
287 * @return: -EOPNOTSUPP as CQ resize is not supported.
288 */
289int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
290{
291 return -EOPNOTSUPP;
292}
293
294static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i) 281static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i)
295{ 282{
296 return (struct pvrdma_cqe *)pvrdma_page_dir_get_ptr( 283 return (struct pvrdma_cqe *)pvrdma_page_dir_get_ptr(
@@ -428,16 +415,3 @@ int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
428 /* Ensure we do not return errors from poll_cq */ 415 /* Ensure we do not return errors from poll_cq */
429 return npolled; 416 return npolled;
430} 417}
431
432/**
433 * pvrdma_resize_cq - resize CQ
434 * @ibcq: the completion queue
435 * @entries: CQ entries
436 * @udata: user data
437 *
438 * @return: -EOPNOTSUPP as CQ resize is not supported.
439 */
440int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
441{
442 return -EOPNOTSUPP;
443}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 0be33a81bbe6..a5719899f49a 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -62,9 +62,7 @@ static DEFINE_MUTEX(pvrdma_device_list_lock);
62static LIST_HEAD(pvrdma_device_list); 62static LIST_HEAD(pvrdma_device_list);
63static struct workqueue_struct *event_wq; 63static struct workqueue_struct *event_wq;
64 64
65static int pvrdma_add_gid(const union ib_gid *gid, 65static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context);
66 const struct ib_gid_attr *attr,
67 void **context);
68static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context); 66static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context);
69 67
70static ssize_t show_hca(struct device *device, struct device_attribute *attr, 68static ssize_t show_hca(struct device *device, struct device_attribute *attr,
@@ -216,8 +214,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
216 dev->ib_dev.post_send = pvrdma_post_send; 214 dev->ib_dev.post_send = pvrdma_post_send;
217 dev->ib_dev.post_recv = pvrdma_post_recv; 215 dev->ib_dev.post_recv = pvrdma_post_recv;
218 dev->ib_dev.create_cq = pvrdma_create_cq; 216 dev->ib_dev.create_cq = pvrdma_create_cq;
219 dev->ib_dev.modify_cq = pvrdma_modify_cq;
220 dev->ib_dev.resize_cq = pvrdma_resize_cq;
221 dev->ib_dev.destroy_cq = pvrdma_destroy_cq; 217 dev->ib_dev.destroy_cq = pvrdma_destroy_cq;
222 dev->ib_dev.poll_cq = pvrdma_poll_cq; 218 dev->ib_dev.poll_cq = pvrdma_poll_cq;
223 dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq; 219 dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq;
@@ -261,7 +257,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
261 dev->ib_dev.modify_srq = pvrdma_modify_srq; 257 dev->ib_dev.modify_srq = pvrdma_modify_srq;
262 dev->ib_dev.query_srq = pvrdma_query_srq; 258 dev->ib_dev.query_srq = pvrdma_query_srq;
263 dev->ib_dev.destroy_srq = pvrdma_destroy_srq; 259 dev->ib_dev.destroy_srq = pvrdma_destroy_srq;
264 dev->ib_dev.post_srq_recv = pvrdma_post_srq_recv;
265 260
266 dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, 261 dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq,
267 sizeof(struct pvrdma_srq *), 262 sizeof(struct pvrdma_srq *),
@@ -650,13 +645,11 @@ static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev,
650 return 0; 645 return 0;
651} 646}
652 647
653static int pvrdma_add_gid(const union ib_gid *gid, 648static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context)
654 const struct ib_gid_attr *attr,
655 void **context)
656{ 649{
657 struct pvrdma_dev *dev = to_vdev(attr->device); 650 struct pvrdma_dev *dev = to_vdev(attr->device);
658 651
659 return pvrdma_add_gid_at_index(dev, gid, 652 return pvrdma_add_gid_at_index(dev, &attr->gid,
660 ib_gid_type_to_pvrdma(attr->gid_type), 653 ib_gid_type_to_pvrdma(attr->gid_type),
661 attr->index); 654 attr->index);
662} 655}
@@ -699,8 +692,12 @@ static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context)
699} 692}
700 693
701static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, 694static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
695 struct net_device *ndev,
702 unsigned long event) 696 unsigned long event)
703{ 697{
698 struct pci_dev *pdev_net;
699 unsigned int slot;
700
704 switch (event) { 701 switch (event) {
705 case NETDEV_REBOOT: 702 case NETDEV_REBOOT:
706 case NETDEV_DOWN: 703 case NETDEV_DOWN:
@@ -718,6 +715,24 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
718 else 715 else
719 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); 716 pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
720 break; 717 break;
718 case NETDEV_UNREGISTER:
719 dev_put(dev->netdev);
720 dev->netdev = NULL;
721 break;
722 case NETDEV_REGISTER:
723 /* vmxnet3 will have same bus, slot. But func will be 0 */
724 slot = PCI_SLOT(dev->pdev->devfn);
725 pdev_net = pci_get_slot(dev->pdev->bus,
726 PCI_DEVFN(slot, 0));
727 if ((dev->netdev == NULL) &&
728 (pci_get_drvdata(pdev_net) == ndev)) {
729 /* this is our netdev */
730 dev->netdev = ndev;
731 dev_hold(ndev);
732 }
733 pci_dev_put(pdev_net);
734 break;
735
721 default: 736 default:
722 dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", 737 dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n",
723 event, dev->ib_dev.name); 738 event, dev->ib_dev.name);
@@ -734,8 +749,11 @@ static void pvrdma_netdevice_event_work(struct work_struct *work)
734 749
735 mutex_lock(&pvrdma_device_list_lock); 750 mutex_lock(&pvrdma_device_list_lock);
736 list_for_each_entry(dev, &pvrdma_device_list, device_link) { 751 list_for_each_entry(dev, &pvrdma_device_list, device_link) {
737 if (dev->netdev == netdev_work->event_netdev) { 752 if ((netdev_work->event == NETDEV_REGISTER) ||
738 pvrdma_netdevice_event_handle(dev, netdev_work->event); 753 (dev->netdev == netdev_work->event_netdev)) {
754 pvrdma_netdevice_event_handle(dev,
755 netdev_work->event_netdev,
756 netdev_work->event);
739 break; 757 break;
740 } 758 }
741 } 759 }
@@ -968,6 +986,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
968 ret = -ENODEV; 986 ret = -ENODEV;
969 goto err_free_cq_ring; 987 goto err_free_cq_ring;
970 } 988 }
989 dev_hold(dev->netdev);
971 990
972 dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name); 991 dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name);
973 992
@@ -1040,6 +1059,10 @@ err_free_intrs:
1040 pvrdma_free_irq(dev); 1059 pvrdma_free_irq(dev);
1041 pci_free_irq_vectors(pdev); 1060 pci_free_irq_vectors(pdev);
1042err_free_cq_ring: 1061err_free_cq_ring:
1062 if (dev->netdev) {
1063 dev_put(dev->netdev);
1064 dev->netdev = NULL;
1065 }
1043 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir); 1066 pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
1044err_free_async_ring: 1067err_free_async_ring:
1045 pvrdma_page_dir_cleanup(dev, &dev->async_pdir); 1068 pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
@@ -1079,6 +1102,11 @@ static void pvrdma_pci_remove(struct pci_dev *pdev)
1079 1102
1080 flush_workqueue(event_wq); 1103 flush_workqueue(event_wq);
1081 1104
1105 if (dev->netdev) {
1106 dev_put(dev->netdev);
1107 dev->netdev = NULL;
1108 }
1109
1082 /* Unregister ib device */ 1110 /* Unregister ib device */
1083 ib_unregister_device(&dev->ib_dev); 1111 ib_unregister_device(&dev->ib_dev);
1084 1112
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index eb5b1065ec08..60083c0363a5 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -599,7 +599,8 @@ static inline void *get_rq_wqe(struct pvrdma_qp *qp, unsigned int n)
599 qp->rq.offset + n * qp->rq.wqe_size); 599 qp->rq.offset + n * qp->rq.wqe_size);
600} 600}
601 601
602static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, struct ib_reg_wr *wr) 602static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr,
603 const struct ib_reg_wr *wr)
603{ 604{
604 struct pvrdma_user_mr *mr = to_vmr(wr->mr); 605 struct pvrdma_user_mr *mr = to_vmr(wr->mr);
605 606
@@ -623,8 +624,8 @@ static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, struct ib_reg_wr *wr)
623 * 624 *
624 * @return: 0 on success, otherwise errno returned. 625 * @return: 0 on success, otherwise errno returned.
625 */ 626 */
626int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 627int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
627 struct ib_send_wr **bad_wr) 628 const struct ib_send_wr **bad_wr)
628{ 629{
629 struct pvrdma_qp *qp = to_vqp(ibqp); 630 struct pvrdma_qp *qp = to_vqp(ibqp);
630 struct pvrdma_dev *dev = to_vdev(ibqp->device); 631 struct pvrdma_dev *dev = to_vdev(ibqp->device);
@@ -827,8 +828,8 @@ out:
827 * 828 *
828 * @return: 0 on success, otherwise errno returned. 829 * @return: 0 on success, otherwise errno returned.
829 */ 830 */
830int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 831int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
831 struct ib_recv_wr **bad_wr) 832 const struct ib_recv_wr **bad_wr)
832{ 833{
833 struct pvrdma_dev *dev = to_vdev(ibqp->device); 834 struct pvrdma_dev *dev = to_vdev(ibqp->device);
834 unsigned long flags; 835 unsigned long flags;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
index af235967a9c2..dc0ce877c7a3 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
@@ -52,13 +52,6 @@
52 52
53#include "pvrdma.h" 53#include "pvrdma.h"
54 54
55int pvrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
56 struct ib_recv_wr **bad_wr)
57{
58 /* No support for kernel clients. */
59 return -EOPNOTSUPP;
60}
61
62/** 55/**
63 * pvrdma_query_srq - query shared receive queue 56 * pvrdma_query_srq - query shared receive queue
64 * @ibsrq: the shared receive queue to query 57 * @ibsrq: the shared receive queue to query
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index a51463cd2f37..b65d10b0a875 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -82,7 +82,8 @@ int pvrdma_query_device(struct ib_device *ibdev,
82 props->max_qp = dev->dsr->caps.max_qp; 82 props->max_qp = dev->dsr->caps.max_qp;
83 props->max_qp_wr = dev->dsr->caps.max_qp_wr; 83 props->max_qp_wr = dev->dsr->caps.max_qp_wr;
84 props->device_cap_flags = dev->dsr->caps.device_cap_flags; 84 props->device_cap_flags = dev->dsr->caps.device_cap_flags;
85 props->max_sge = dev->dsr->caps.max_sge; 85 props->max_send_sge = dev->dsr->caps.max_sge;
86 props->max_recv_sge = dev->dsr->caps.max_sge;
86 props->max_sge_rd = PVRDMA_GET_CAP(dev, dev->dsr->caps.max_sge, 87 props->max_sge_rd = PVRDMA_GET_CAP(dev, dev->dsr->caps.max_sge,
87 dev->dsr->caps.max_sge_rd); 88 dev->dsr->caps.max_sge_rd);
88 props->max_srq = dev->dsr->caps.max_srq; 89 props->max_srq = dev->dsr->caps.max_srq;
@@ -154,7 +155,8 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port,
154 props->gid_tbl_len = resp->attrs.gid_tbl_len; 155 props->gid_tbl_len = resp->attrs.gid_tbl_len;
155 props->port_cap_flags = 156 props->port_cap_flags =
156 pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags); 157 pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags);
157 props->port_cap_flags |= IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; 158 props->port_cap_flags |= IB_PORT_CM_SUP;
159 props->ip_gids = true;
158 props->max_msg_sz = resp->attrs.max_msg_sz; 160 props->max_msg_sz = resp->attrs.max_msg_sz;
159 props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr; 161 props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr;
160 props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr; 162 props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index b7b25728a7e5..b2e3ab50cb08 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -412,15 +412,10 @@ struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
412 u32 max_num_sg); 412 u32 max_num_sg);
413int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 413int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
414 int sg_nents, unsigned int *sg_offset); 414 int sg_nents, unsigned int *sg_offset);
415int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
416int pvrdma_resize_cq(struct ib_cq *ibcq, int entries,
417 struct ib_udata *udata);
418struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev, 415struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
419 const struct ib_cq_init_attr *attr, 416 const struct ib_cq_init_attr *attr,
420 struct ib_ucontext *context, 417 struct ib_ucontext *context,
421 struct ib_udata *udata); 418 struct ib_udata *udata);
422int pvrdma_resize_cq(struct ib_cq *ibcq, int entries,
423 struct ib_udata *udata);
424int pvrdma_destroy_cq(struct ib_cq *cq); 419int pvrdma_destroy_cq(struct ib_cq *cq);
425int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); 420int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
426int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); 421int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
@@ -435,8 +430,6 @@ int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
435 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); 430 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
436int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); 431int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
437int pvrdma_destroy_srq(struct ib_srq *srq); 432int pvrdma_destroy_srq(struct ib_srq *srq);
438int pvrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
439 struct ib_recv_wr **bad_wr);
440 433
441struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, 434struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
442 struct ib_qp_init_attr *init_attr, 435 struct ib_qp_init_attr *init_attr,
@@ -446,9 +439,9 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
446int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, 439int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
447 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); 440 int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
448int pvrdma_destroy_qp(struct ib_qp *qp); 441int pvrdma_destroy_qp(struct ib_qp *qp);
449int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 442int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
450 struct ib_send_wr **bad_wr); 443 const struct ib_send_wr **bad_wr);
451int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 444int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
452 struct ib_recv_wr **bad_wr); 445 const struct ib_recv_wr **bad_wr);
453 446
454#endif /* __PVRDMA_VERBS_H__ */ 447#endif /* __PVRDMA_VERBS_H__ */
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index ba3639a0d77c..89ec0f64abfc 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -120,7 +120,8 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd,
120 dev->n_ahs_allocated++; 120 dev->n_ahs_allocated++;
121 spin_unlock_irqrestore(&dev->n_ahs_lock, flags); 121 spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
122 122
123 ah->attr = *ah_attr; 123 rdma_copy_ah_attr(&ah->attr, ah_attr);
124
124 atomic_set(&ah->refcount, 0); 125 atomic_set(&ah->refcount, 0);
125 126
126 if (dev->driver_f.notify_new_ah) 127 if (dev->driver_f.notify_new_ah)
@@ -148,6 +149,7 @@ int rvt_destroy_ah(struct ib_ah *ibah)
148 dev->n_ahs_allocated--; 149 dev->n_ahs_allocated--;
149 spin_unlock_irqrestore(&dev->n_ahs_lock, flags); 150 spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
150 151
152 rdma_destroy_ah_attr(&ah->attr);
151 kfree(ah); 153 kfree(ah);
152 154
153 return 0; 155 return 0;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 41183bd665ca..5ce403c6cddb 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -780,14 +780,15 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
780 if (!rdi) 780 if (!rdi)
781 return ERR_PTR(-EINVAL); 781 return ERR_PTR(-EINVAL);
782 782
783 if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge || 783 if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge ||
784 init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || 784 init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
785 init_attr->create_flags) 785 init_attr->create_flags)
786 return ERR_PTR(-EINVAL); 786 return ERR_PTR(-EINVAL);
787 787
788 /* Check receive queue parameters if no SRQ is specified. */ 788 /* Check receive queue parameters if no SRQ is specified. */
789 if (!init_attr->srq) { 789 if (!init_attr->srq) {
790 if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge || 790 if (init_attr->cap.max_recv_sge >
791 rdi->dparms.props.max_recv_sge ||
791 init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr) 792 init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr)
792 return ERR_PTR(-EINVAL); 793 return ERR_PTR(-EINVAL);
793 794
@@ -1336,13 +1337,13 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1336 qp->qp_access_flags = attr->qp_access_flags; 1337 qp->qp_access_flags = attr->qp_access_flags;
1337 1338
1338 if (attr_mask & IB_QP_AV) { 1339 if (attr_mask & IB_QP_AV) {
1339 qp->remote_ah_attr = attr->ah_attr; 1340 rdma_replace_ah_attr(&qp->remote_ah_attr, &attr->ah_attr);
1340 qp->s_srate = rdma_ah_get_static_rate(&attr->ah_attr); 1341 qp->s_srate = rdma_ah_get_static_rate(&attr->ah_attr);
1341 qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); 1342 qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
1342 } 1343 }
1343 1344
1344 if (attr_mask & IB_QP_ALT_PATH) { 1345 if (attr_mask & IB_QP_ALT_PATH) {
1345 qp->alt_ah_attr = attr->alt_ah_attr; 1346 rdma_replace_ah_attr(&qp->alt_ah_attr, &attr->alt_ah_attr);
1346 qp->s_alt_pkey_index = attr->alt_pkey_index; 1347 qp->s_alt_pkey_index = attr->alt_pkey_index;
1347 } 1348 }
1348 1349
@@ -1459,6 +1460,8 @@ int rvt_destroy_qp(struct ib_qp *ibqp)
1459 vfree(qp->s_wq); 1460 vfree(qp->s_wq);
1460 rdi->driver_f.qp_priv_free(rdi, qp); 1461 rdi->driver_f.qp_priv_free(rdi, qp);
1461 kfree(qp->s_ack_queue); 1462 kfree(qp->s_ack_queue);
1463 rdma_destroy_ah_attr(&qp->remote_ah_attr);
1464 rdma_destroy_ah_attr(&qp->alt_ah_attr);
1462 kfree(qp); 1465 kfree(qp);
1463 return 0; 1466 return 0;
1464} 1467}
@@ -1535,8 +1538,8 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1535 * 1538 *
1536 * Return: 0 on success otherwise errno 1539 * Return: 0 on success otherwise errno
1537 */ 1540 */
1538int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 1541int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
1539 struct ib_recv_wr **bad_wr) 1542 const struct ib_recv_wr **bad_wr)
1540{ 1543{
1541 struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); 1544 struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1542 struct rvt_rwq *wq = qp->r_rq.wq; 1545 struct rvt_rwq *wq = qp->r_rq.wq;
@@ -1617,7 +1620,7 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1617static inline int rvt_qp_valid_operation( 1620static inline int rvt_qp_valid_operation(
1618 struct rvt_qp *qp, 1621 struct rvt_qp *qp,
1619 const struct rvt_operation_params *post_parms, 1622 const struct rvt_operation_params *post_parms,
1620 struct ib_send_wr *wr) 1623 const struct ib_send_wr *wr)
1621{ 1624{
1622 int len; 1625 int len;
1623 1626
@@ -1714,7 +1717,7 @@ static inline int rvt_qp_is_avail(
1714 * @wr: the work request to send 1717 * @wr: the work request to send
1715 */ 1718 */
1716static int rvt_post_one_wr(struct rvt_qp *qp, 1719static int rvt_post_one_wr(struct rvt_qp *qp,
1717 struct ib_send_wr *wr, 1720 const struct ib_send_wr *wr,
1718 int *call_send) 1721 int *call_send)
1719{ 1722{
1720 struct rvt_swqe *wqe; 1723 struct rvt_swqe *wqe;
@@ -1888,8 +1891,8 @@ bail_inval_free:
1888 * 1891 *
1889 * Return: 0 on success else errno 1892 * Return: 0 on success else errno
1890 */ 1893 */
1891int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 1894int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
1892 struct ib_send_wr **bad_wr) 1895 const struct ib_send_wr **bad_wr)
1893{ 1896{
1894 struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); 1897 struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1895 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); 1898 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
@@ -1945,8 +1948,8 @@ bail:
1945 * 1948 *
1946 * Return: 0 on success else errno 1949 * Return: 0 on success else errno
1947 */ 1950 */
1948int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, 1951int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
1949 struct ib_recv_wr **bad_wr) 1952 const struct ib_recv_wr **bad_wr)
1950{ 1953{
1951 struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); 1954 struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
1952 struct rvt_rwq *wq; 1955 struct rvt_rwq *wq;
diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h
index 8409f80d5f25..264811fdc530 100644
--- a/drivers/infiniband/sw/rdmavt/qp.h
+++ b/drivers/infiniband/sw/rdmavt/qp.h
@@ -60,10 +60,10 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
60int rvt_destroy_qp(struct ib_qp *ibqp); 60int rvt_destroy_qp(struct ib_qp *ibqp);
61int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 61int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
62 int attr_mask, struct ib_qp_init_attr *init_attr); 62 int attr_mask, struct ib_qp_init_attr *init_attr);
63int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 63int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
64 struct ib_recv_wr **bad_wr); 64 const struct ib_recv_wr **bad_wr);
65int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 65int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
66 struct ib_send_wr **bad_wr); 66 const struct ib_send_wr **bad_wr);
67int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, 67int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
68 struct ib_recv_wr **bad_wr); 68 const struct ib_recv_wr **bad_wr);
69#endif /* DEF_RVTQP_H */ 69#endif /* DEF_RVTQP_H */
diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c
index 3707952b4364..78e06fc456c5 100644
--- a/drivers/infiniband/sw/rdmavt/srq.c
+++ b/drivers/infiniband/sw/rdmavt/srq.c
@@ -82,7 +82,7 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
82 struct ib_srq *ret; 82 struct ib_srq *ret;
83 83
84 if (srq_init_attr->srq_type != IB_SRQT_BASIC) 84 if (srq_init_attr->srq_type != IB_SRQT_BASIC)
85 return ERR_PTR(-ENOSYS); 85 return ERR_PTR(-EOPNOTSUPP);
86 86
87 if (srq_init_attr->attr.max_sge == 0 || 87 if (srq_init_attr->attr.max_sge == 0 ||
88 srq_init_attr->attr.max_sge > dev->dparms.props.max_srq_sge || 88 srq_init_attr->attr.max_sge > dev->dparms.props.max_srq_sge ||
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 7121e1b1eb89..10999fa69281 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -91,7 +91,8 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
91 rxe->attr.max_qp = RXE_MAX_QP; 91 rxe->attr.max_qp = RXE_MAX_QP;
92 rxe->attr.max_qp_wr = RXE_MAX_QP_WR; 92 rxe->attr.max_qp_wr = RXE_MAX_QP_WR;
93 rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS; 93 rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS;
94 rxe->attr.max_sge = RXE_MAX_SGE; 94 rxe->attr.max_send_sge = RXE_MAX_SGE;
95 rxe->attr.max_recv_sge = RXE_MAX_SGE;
95 rxe->attr.max_sge_rd = RXE_MAX_SGE_RD; 96 rxe->attr.max_sge_rd = RXE_MAX_SGE_RD;
96 rxe->attr.max_cq = RXE_MAX_CQ; 97 rxe->attr.max_cq = RXE_MAX_CQ;
97 rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1; 98 rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1;
diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 7f1ae364088a..26fe8d7dbc55 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -55,29 +55,41 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr)
55void rxe_av_from_attr(u8 port_num, struct rxe_av *av, 55void rxe_av_from_attr(u8 port_num, struct rxe_av *av,
56 struct rdma_ah_attr *attr) 56 struct rdma_ah_attr *attr)
57{ 57{
58 const struct ib_global_route *grh = rdma_ah_read_grh(attr);
59
58 memset(av, 0, sizeof(*av)); 60 memset(av, 0, sizeof(*av));
59 memcpy(&av->grh, rdma_ah_read_grh(attr), 61 memcpy(av->grh.dgid.raw, grh->dgid.raw, sizeof(grh->dgid.raw));
60 sizeof(*rdma_ah_read_grh(attr))); 62 av->grh.flow_label = grh->flow_label;
63 av->grh.sgid_index = grh->sgid_index;
64 av->grh.hop_limit = grh->hop_limit;
65 av->grh.traffic_class = grh->traffic_class;
61 av->port_num = port_num; 66 av->port_num = port_num;
62} 67}
63 68
64void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr) 69void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr)
65{ 70{
71 struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
72
66 attr->type = RDMA_AH_ATTR_TYPE_ROCE; 73 attr->type = RDMA_AH_ATTR_TYPE_ROCE;
67 memcpy(rdma_ah_retrieve_grh(attr), &av->grh, sizeof(av->grh)); 74
75 memcpy(grh->dgid.raw, av->grh.dgid.raw, sizeof(av->grh.dgid.raw));
76 grh->flow_label = av->grh.flow_label;
77 grh->sgid_index = av->grh.sgid_index;
78 grh->hop_limit = av->grh.hop_limit;
79 grh->traffic_class = av->grh.traffic_class;
80
68 rdma_ah_set_ah_flags(attr, IB_AH_GRH); 81 rdma_ah_set_ah_flags(attr, IB_AH_GRH);
69 rdma_ah_set_port_num(attr, av->port_num); 82 rdma_ah_set_port_num(attr, av->port_num);
70} 83}
71 84
72void rxe_av_fill_ip_info(struct rxe_av *av, 85void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr)
73 struct rdma_ah_attr *attr,
74 struct ib_gid_attr *sgid_attr,
75 union ib_gid *sgid)
76{ 86{
77 rdma_gid2ip((struct sockaddr *)&av->sgid_addr, sgid); 87 const struct ib_gid_attr *sgid_attr = attr->grh.sgid_attr;
88
89 rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid);
78 rdma_gid2ip((struct sockaddr *)&av->dgid_addr, 90 rdma_gid2ip((struct sockaddr *)&av->dgid_addr,
79 &rdma_ah_read_grh(attr)->dgid); 91 &rdma_ah_read_grh(attr)->dgid);
80 av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid); 92 av->network_type = rdma_gid_attr_network_type(sgid_attr);
81} 93}
82 94
83struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) 95struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt)
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 98d470d1f3fc..83311dd07019 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -276,6 +276,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
276 case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: 276 case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
277 if (wqe->wr.opcode != IB_WR_RDMA_READ && 277 if (wqe->wr.opcode != IB_WR_RDMA_READ &&
278 wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) { 278 wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) {
279 wqe->status = IB_WC_FATAL_ERR;
279 return COMPST_ERROR; 280 return COMPST_ERROR;
280 } 281 }
281 reset_retry_counters(qp); 282 reset_retry_counters(qp);
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index a51ece596c43..87d14f7ef21b 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -43,10 +43,7 @@ void rxe_av_from_attr(u8 port_num, struct rxe_av *av,
43 43
44void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr); 44void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr);
45 45
46void rxe_av_fill_ip_info(struct rxe_av *av, 46void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr);
47 struct rdma_ah_attr *attr,
48 struct ib_gid_attr *sgid_attr,
49 union ib_gid *sgid);
50 47
51struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt); 48struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt);
52 49
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 59ec6d918ed4..8094cbaa54a9 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -182,39 +182,19 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
182 182
183#endif 183#endif
184 184
185/*
186 * Derive the net_device from the av.
187 * For physical devices, this will just return rxe->ndev.
188 * But for VLAN devices, it will return the vlan dev.
189 * Caller should dev_put() the returned net_device.
190 */
191static struct net_device *rxe_netdev_from_av(struct rxe_dev *rxe,
192 int port_num,
193 struct rxe_av *av)
194{
195 union ib_gid gid;
196 struct ib_gid_attr attr;
197 struct net_device *ndev = rxe->ndev;
198
199 if (ib_get_cached_gid(&rxe->ib_dev, port_num, av->grh.sgid_index,
200 &gid, &attr) == 0 &&
201 attr.ndev && attr.ndev != ndev)
202 ndev = attr.ndev;
203 else
204 /* Only to ensure that caller may call dev_put() */
205 dev_hold(ndev);
206
207 return ndev;
208}
209
210static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, 185static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
211 struct rxe_qp *qp, 186 struct rxe_qp *qp,
212 struct rxe_av *av) 187 struct rxe_av *av)
213{ 188{
189 const struct ib_gid_attr *attr;
214 struct dst_entry *dst = NULL; 190 struct dst_entry *dst = NULL;
215 struct net_device *ndev; 191 struct net_device *ndev;
216 192
217 ndev = rxe_netdev_from_av(rxe, qp->attr.port_num, av); 193 attr = rdma_get_gid_attr(&rxe->ib_dev, qp->attr.port_num,
194 av->grh.sgid_index);
195 if (IS_ERR(attr))
196 return NULL;
197 ndev = attr->ndev;
218 198
219 if (qp_type(qp) == IB_QPT_RC) 199 if (qp_type(qp) == IB_QPT_RC)
220 dst = sk_dst_get(qp->sk->sk); 200 dst = sk_dst_get(qp->sk->sk);
@@ -243,9 +223,13 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
243 rt6_get_cookie((struct rt6_info *)dst); 223 rt6_get_cookie((struct rt6_info *)dst);
244#endif 224#endif
245 } 225 }
246 }
247 226
248 dev_put(ndev); 227 if (dst && (qp_type(qp) == IB_QPT_RC)) {
228 dst_hold(dst);
229 sk_dst_set(qp->sk->sk, dst);
230 }
231 }
232 rdma_put_gid_attr(attr);
249 return dst; 233 return dst;
250} 234}
251 235
@@ -418,11 +402,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
418 prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP, 402 prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP,
419 av->grh.traffic_class, av->grh.hop_limit, df, xnet); 403 av->grh.traffic_class, av->grh.hop_limit, df, xnet);
420 404
421 if (qp_type(qp) == IB_QPT_RC) 405 dst_release(dst);
422 sk_dst_set(qp->sk->sk, dst);
423 else
424 dst_release(dst);
425
426 return 0; 406 return 0;
427} 407}
428 408
@@ -450,11 +430,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
450 av->grh.traffic_class, 430 av->grh.traffic_class,
451 av->grh.hop_limit); 431 av->grh.hop_limit);
452 432
453 if (qp_type(qp) == IB_QPT_RC) 433 dst_release(dst);
454 sk_dst_set(qp->sk->sk, dst);
455 else
456 dst_release(dst);
457
458 return 0; 434 return 0;
459} 435}
460 436
@@ -536,9 +512,13 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
536 unsigned int hdr_len; 512 unsigned int hdr_len;
537 struct sk_buff *skb; 513 struct sk_buff *skb;
538 struct net_device *ndev; 514 struct net_device *ndev;
515 const struct ib_gid_attr *attr;
539 const int port_num = 1; 516 const int port_num = 1;
540 517
541 ndev = rxe_netdev_from_av(rxe, port_num, av); 518 attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index);
519 if (IS_ERR(attr))
520 return NULL;
521 ndev = attr->ndev;
542 522
543 if (av->network_type == RDMA_NETWORK_IPV4) 523 if (av->network_type == RDMA_NETWORK_IPV4)
544 hdr_len = ETH_HLEN + sizeof(struct udphdr) + 524 hdr_len = ETH_HLEN + sizeof(struct udphdr) +
@@ -550,10 +530,8 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
550 skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev), 530 skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev),
551 GFP_ATOMIC); 531 GFP_ATOMIC);
552 532
553 if (unlikely(!skb)) { 533 if (unlikely(!skb))
554 dev_put(ndev); 534 goto out;
555 return NULL;
556 }
557 535
558 skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev)); 536 skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev));
559 537
@@ -568,7 +546,8 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
568 pkt->hdr = skb_put_zero(skb, paylen); 546 pkt->hdr = skb_put_zero(skb, paylen);
569 pkt->mask |= RXE_GRH_MASK; 547 pkt->mask |= RXE_GRH_MASK;
570 548
571 dev_put(ndev); 549out:
550 rdma_put_gid_attr(attr);
572 return skb; 551 return skb;
573} 552}
574 553
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 1b596fbbe251..4555510d86c4 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -83,7 +83,7 @@ enum rxe_device_param {
83 RXE_MAX_SGE_RD = 32, 83 RXE_MAX_SGE_RD = 32,
84 RXE_MAX_CQ = 16384, 84 RXE_MAX_CQ = 16384,
85 RXE_MAX_LOG_CQE = 15, 85 RXE_MAX_LOG_CQE = 15,
86 RXE_MAX_MR = 2 * 1024, 86 RXE_MAX_MR = 256 * 1024,
87 RXE_MAX_PD = 0x7ffc, 87 RXE_MAX_PD = 0x7ffc,
88 RXE_MAX_QP_RD_ATOM = 128, 88 RXE_MAX_QP_RD_ATOM = 128,
89 RXE_MAX_EE_RD_ATOM = 0, 89 RXE_MAX_EE_RD_ATOM = 0,
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index b9f7aa1114b2..c58452daffc7 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -49,9 +49,9 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
49 goto err1; 49 goto err1;
50 } 50 }
51 51
52 if (cap->max_send_sge > rxe->attr.max_sge) { 52 if (cap->max_send_sge > rxe->attr.max_send_sge) {
53 pr_warn("invalid send sge = %d > %d\n", 53 pr_warn("invalid send sge = %d > %d\n",
54 cap->max_send_sge, rxe->attr.max_sge); 54 cap->max_send_sge, rxe->attr.max_send_sge);
55 goto err1; 55 goto err1;
56 } 56 }
57 57
@@ -62,9 +62,9 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
62 goto err1; 62 goto err1;
63 } 63 }
64 64
65 if (cap->max_recv_sge > rxe->attr.max_sge) { 65 if (cap->max_recv_sge > rxe->attr.max_recv_sge) {
66 pr_warn("invalid recv sge = %d > %d\n", 66 pr_warn("invalid recv sge = %d > %d\n",
67 cap->max_recv_sge, rxe->attr.max_sge); 67 cap->max_recv_sge, rxe->attr.max_recv_sge);
68 goto err1; 68 goto err1;
69 } 69 }
70 } 70 }
@@ -580,9 +580,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
580 struct ib_udata *udata) 580 struct ib_udata *udata)
581{ 581{
582 int err; 582 int err;
583 struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
584 union ib_gid sgid;
585 struct ib_gid_attr sgid_attr;
586 583
587 if (mask & IB_QP_MAX_QP_RD_ATOMIC) { 584 if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
588 int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic); 585 int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic);
@@ -623,30 +620,14 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
623 qp->attr.qkey = attr->qkey; 620 qp->attr.qkey = attr->qkey;
624 621
625 if (mask & IB_QP_AV) { 622 if (mask & IB_QP_AV) {
626 ib_get_cached_gid(&rxe->ib_dev, 1,
627 rdma_ah_read_grh(&attr->ah_attr)->sgid_index,
628 &sgid, &sgid_attr);
629 rxe_av_from_attr(attr->port_num, &qp->pri_av, &attr->ah_attr); 623 rxe_av_from_attr(attr->port_num, &qp->pri_av, &attr->ah_attr);
630 rxe_av_fill_ip_info(&qp->pri_av, &attr->ah_attr, 624 rxe_av_fill_ip_info(&qp->pri_av, &attr->ah_attr);
631 &sgid_attr, &sgid);
632 if (sgid_attr.ndev)
633 dev_put(sgid_attr.ndev);
634 } 625 }
635 626
636 if (mask & IB_QP_ALT_PATH) { 627 if (mask & IB_QP_ALT_PATH) {
637 u8 sgid_index =
638 rdma_ah_read_grh(&attr->alt_ah_attr)->sgid_index;
639
640 ib_get_cached_gid(&rxe->ib_dev, 1, sgid_index,
641 &sgid, &sgid_attr);
642
643 rxe_av_from_attr(attr->alt_port_num, &qp->alt_av, 628 rxe_av_from_attr(attr->alt_port_num, &qp->alt_av,
644 &attr->alt_ah_attr); 629 &attr->alt_ah_attr);
645 rxe_av_fill_ip_info(&qp->alt_av, &attr->alt_ah_attr, 630 rxe_av_fill_ip_info(&qp->alt_av, &attr->alt_ah_attr);
646 &sgid_attr, &sgid);
647 if (sgid_attr.ndev)
648 dev_put(sgid_attr.ndev);
649
650 qp->attr.alt_port_num = attr->alt_port_num; 631 qp->attr.alt_port_num = attr->alt_port_num;
651 qp->attr.alt_pkey_index = attr->alt_pkey_index; 632 qp->attr.alt_pkey_index = attr->alt_pkey_index;
652 qp->attr.alt_timeout = attr->alt_timeout; 633 qp->attr.alt_timeout = attr->alt_timeout;
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index dfba44a40f0b..d30dbac24583 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -225,9 +225,14 @@ static int hdr_check(struct rxe_pkt_info *pkt)
225 goto err1; 225 goto err1;
226 } 226 }
227 227
228 if (unlikely(qpn == 0)) {
229 pr_warn_once("QP 0 not supported");
230 goto err1;
231 }
232
228 if (qpn != IB_MULTICAST_QPN) { 233 if (qpn != IB_MULTICAST_QPN) {
229 index = (qpn == 0) ? port->qp_smi_index : 234 index = (qpn == 1) ? port->qp_gsi_index : qpn;
230 ((qpn == 1) ? port->qp_gsi_index : qpn); 235
231 qp = rxe_pool_get_index(&rxe->qp_pool, index); 236 qp = rxe_pool_get_index(&rxe->qp_pool, index);
232 if (unlikely(!qp)) { 237 if (unlikely(!qp)) {
233 pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn); 238 pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn);
@@ -256,8 +261,7 @@ static int hdr_check(struct rxe_pkt_info *pkt)
256 return 0; 261 return 0;
257 262
258err2: 263err2:
259 if (qp) 264 rxe_drop_ref(qp);
260 rxe_drop_ref(qp);
261err1: 265err1:
262 return -EINVAL; 266 return -EINVAL;
263} 267}
@@ -328,6 +332,7 @@ err1:
328 332
329static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) 333static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
330{ 334{
335 const struct ib_gid_attr *gid_attr;
331 union ib_gid dgid; 336 union ib_gid dgid;
332 union ib_gid *pdgid; 337 union ib_gid *pdgid;
333 338
@@ -339,9 +344,14 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
339 pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr; 344 pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr;
340 } 345 }
341 346
342 return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid, 347 gid_attr = rdma_find_gid_by_port(&rxe->ib_dev, pdgid,
343 IB_GID_TYPE_ROCE_UDP_ENCAP, 348 IB_GID_TYPE_ROCE_UDP_ENCAP,
344 1, skb->dev, NULL); 349 1, skb->dev);
350 if (IS_ERR(gid_attr))
351 return PTR_ERR(gid_attr);
352
353 rdma_put_gid_attr(gid_attr);
354 return 0;
345} 355}
346 356
347/* rxe_rcv is called from the interface driver */ 357/* rxe_rcv is called from the interface driver */
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 5b57de30dee4..aa5833318372 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -884,6 +884,11 @@ static enum resp_states do_complete(struct rxe_qp *qp,
884 else 884 else
885 wc->network_hdr_type = RDMA_NETWORK_IPV6; 885 wc->network_hdr_type = RDMA_NETWORK_IPV6;
886 886
887 if (is_vlan_dev(skb->dev)) {
888 wc->wc_flags |= IB_WC_WITH_VLAN;
889 wc->vlan_id = vlan_dev_vlan_id(skb->dev);
890 }
891
887 if (pkt->mask & RXE_IMMDT_MASK) { 892 if (pkt->mask & RXE_IMMDT_MASK) {
888 wc->wc_flags |= IB_WC_WITH_IMM; 893 wc->wc_flags |= IB_WC_WITH_IMM;
889 wc->ex.imm_data = immdt_imm(pkt); 894 wc->ex.imm_data = immdt_imm(pkt);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 9deafc3aa6af..f5b1e0ad6142 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -222,25 +222,11 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd)
222 return 0; 222 return 0;
223} 223}
224 224
225static int rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr, 225static void rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr,
226 struct rxe_av *av) 226 struct rxe_av *av)
227{ 227{
228 int err;
229 union ib_gid sgid;
230 struct ib_gid_attr sgid_attr;
231
232 err = ib_get_cached_gid(&rxe->ib_dev, rdma_ah_get_port_num(attr),
233 rdma_ah_read_grh(attr)->sgid_index, &sgid,
234 &sgid_attr);
235 if (err) {
236 pr_err("Failed to query sgid. err = %d\n", err);
237 return err;
238 }
239
240 rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr); 228 rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr);
241 rxe_av_fill_ip_info(av, attr, &sgid_attr, &sgid); 229 rxe_av_fill_ip_info(av, attr);
242 dev_put(sgid_attr.ndev);
243 return 0;
244} 230}
245 231
246static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, 232static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
@@ -255,28 +241,17 @@ static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
255 241
256 err = rxe_av_chk_attr(rxe, attr); 242 err = rxe_av_chk_attr(rxe, attr);
257 if (err) 243 if (err)
258 goto err1; 244 return ERR_PTR(err);
259 245
260 ah = rxe_alloc(&rxe->ah_pool); 246 ah = rxe_alloc(&rxe->ah_pool);
261 if (!ah) { 247 if (!ah)
262 err = -ENOMEM; 248 return ERR_PTR(-ENOMEM);
263 goto err1;
264 }
265 249
266 rxe_add_ref(pd); 250 rxe_add_ref(pd);
267 ah->pd = pd; 251 ah->pd = pd;
268 252
269 err = rxe_init_av(rxe, attr, &ah->av); 253 rxe_init_av(rxe, attr, &ah->av);
270 if (err)
271 goto err2;
272
273 return &ah->ibah; 254 return &ah->ibah;
274
275err2:
276 rxe_drop_ref(pd);
277 rxe_drop_ref(ah);
278err1:
279 return ERR_PTR(err);
280} 255}
281 256
282static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) 257static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
@@ -289,10 +264,7 @@ static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
289 if (err) 264 if (err)
290 return err; 265 return err;
291 266
292 err = rxe_init_av(rxe, attr, &ah->av); 267 rxe_init_av(rxe, attr, &ah->av);
293 if (err)
294 return err;
295
296 return 0; 268 return 0;
297} 269}
298 270
@@ -315,7 +287,7 @@ static int rxe_destroy_ah(struct ib_ah *ibah)
315 return 0; 287 return 0;
316} 288}
317 289
318static int post_one_recv(struct rxe_rq *rq, struct ib_recv_wr *ibwr) 290static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
319{ 291{
320 int err; 292 int err;
321 int i; 293 int i;
@@ -466,8 +438,8 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq)
466 return 0; 438 return 0;
467} 439}
468 440
469static int rxe_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, 441static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
470 struct ib_recv_wr **bad_wr) 442 const struct ib_recv_wr **bad_wr)
471{ 443{
472 int err = 0; 444 int err = 0;
473 unsigned long flags; 445 unsigned long flags;
@@ -582,7 +554,7 @@ static int rxe_destroy_qp(struct ib_qp *ibqp)
582 return 0; 554 return 0;
583} 555}
584 556
585static int validate_send_wr(struct rxe_qp *qp, struct ib_send_wr *ibwr, 557static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
586 unsigned int mask, unsigned int length) 558 unsigned int mask, unsigned int length)
587{ 559{
588 int num_sge = ibwr->num_sge; 560 int num_sge = ibwr->num_sge;
@@ -610,7 +582,7 @@ err1:
610} 582}
611 583
612static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, 584static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
613 struct ib_send_wr *ibwr) 585 const struct ib_send_wr *ibwr)
614{ 586{
615 wr->wr_id = ibwr->wr_id; 587 wr->wr_id = ibwr->wr_id;
616 wr->num_sge = ibwr->num_sge; 588 wr->num_sge = ibwr->num_sge;
@@ -665,7 +637,7 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
665 } 637 }
666} 638}
667 639
668static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr, 640static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
669 unsigned int mask, unsigned int length, 641 unsigned int mask, unsigned int length,
670 struct rxe_send_wqe *wqe) 642 struct rxe_send_wqe *wqe)
671{ 643{
@@ -713,7 +685,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr,
713 return 0; 685 return 0;
714} 686}
715 687
716static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr, 688static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
717 unsigned int mask, u32 length) 689 unsigned int mask, u32 length)
718{ 690{
719 int err; 691 int err;
@@ -754,8 +726,8 @@ err1:
754 return err; 726 return err;
755} 727}
756 728
757static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr, 729static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
758 struct ib_send_wr **bad_wr) 730 const struct ib_send_wr **bad_wr)
759{ 731{
760 int err = 0; 732 int err = 0;
761 unsigned int mask; 733 unsigned int mask;
@@ -797,8 +769,8 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr,
797 return err; 769 return err;
798} 770}
799 771
800static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 772static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
801 struct ib_send_wr **bad_wr) 773 const struct ib_send_wr **bad_wr)
802{ 774{
803 struct rxe_qp *qp = to_rqp(ibqp); 775 struct rxe_qp *qp = to_rqp(ibqp);
804 776
@@ -820,8 +792,8 @@ static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
820 return rxe_post_send_kernel(qp, wr, bad_wr); 792 return rxe_post_send_kernel(qp, wr, bad_wr);
821} 793}
822 794
823static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 795static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
824 struct ib_recv_wr **bad_wr) 796 const struct ib_recv_wr **bad_wr)
825{ 797{
826 int err = 0; 798 int err = 0;
827 struct rxe_qp *qp = to_rqp(ibqp); 799 struct rxe_qp *qp = to_rqp(ibqp);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index a50b062ed13e..1abe3c62f106 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -91,11 +91,9 @@ enum {
91 IPOIB_STOP_REAPER = 7, 91 IPOIB_STOP_REAPER = 7,
92 IPOIB_FLAG_ADMIN_CM = 9, 92 IPOIB_FLAG_ADMIN_CM = 9,
93 IPOIB_FLAG_UMCAST = 10, 93 IPOIB_FLAG_UMCAST = 10,
94 IPOIB_STOP_NEIGH_GC = 11,
95 IPOIB_NEIGH_TBL_FLUSH = 12, 94 IPOIB_NEIGH_TBL_FLUSH = 12,
96 IPOIB_FLAG_DEV_ADDR_SET = 13, 95 IPOIB_FLAG_DEV_ADDR_SET = 13,
97 IPOIB_FLAG_DEV_ADDR_CTRL = 14, 96 IPOIB_FLAG_DEV_ADDR_CTRL = 14,
98 IPOIB_FLAG_GOING_DOWN = 15,
99 97
100 IPOIB_MAX_BACKOFF_SECONDS = 16, 98 IPOIB_MAX_BACKOFF_SECONDS = 16,
101 99
@@ -252,11 +250,11 @@ struct ipoib_cm_tx {
252 struct ipoib_neigh *neigh; 250 struct ipoib_neigh *neigh;
253 struct ipoib_path *path; 251 struct ipoib_path *path;
254 struct ipoib_tx_buf *tx_ring; 252 struct ipoib_tx_buf *tx_ring;
255 unsigned tx_head; 253 unsigned int tx_head;
256 unsigned tx_tail; 254 unsigned int tx_tail;
257 unsigned long flags; 255 unsigned long flags;
258 u32 mtu; 256 u32 mtu;
259 unsigned max_send_sge; 257 unsigned int max_send_sge;
260}; 258};
261 259
262struct ipoib_cm_rx_buf { 260struct ipoib_cm_rx_buf {
@@ -325,15 +323,22 @@ struct ipoib_dev_priv {
325 spinlock_t lock; 323 spinlock_t lock;
326 324
327 struct net_device *dev; 325 struct net_device *dev;
326 void (*next_priv_destructor)(struct net_device *dev);
328 327
329 struct napi_struct send_napi; 328 struct napi_struct send_napi;
330 struct napi_struct recv_napi; 329 struct napi_struct recv_napi;
331 330
332 unsigned long flags; 331 unsigned long flags;
333 332
333 /*
334 * This protects access to the child_intfs list.
335 * To READ from child_intfs the RTNL or vlan_rwsem read side must be
336 * held. To WRITE RTNL and the vlan_rwsem write side must be held (in
337 * that order) This lock exists because we have a few contexts where
338 * we need the child_intfs, but do not want to grab the RTNL.
339 */
334 struct rw_semaphore vlan_rwsem; 340 struct rw_semaphore vlan_rwsem;
335 struct mutex mcast_mutex; 341 struct mutex mcast_mutex;
336 struct mutex sysfs_mutex;
337 342
338 struct rb_root path_tree; 343 struct rb_root path_tree;
339 struct list_head path_list; 344 struct list_head path_list;
@@ -373,8 +378,8 @@ struct ipoib_dev_priv {
373 struct ipoib_rx_buf *rx_ring; 378 struct ipoib_rx_buf *rx_ring;
374 379
375 struct ipoib_tx_buf *tx_ring; 380 struct ipoib_tx_buf *tx_ring;
376 unsigned tx_head; 381 unsigned int tx_head;
377 unsigned tx_tail; 382 unsigned int tx_tail;
378 struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; 383 struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
379 struct ib_ud_wr tx_wr; 384 struct ib_ud_wr tx_wr;
380 struct ib_wc send_wc[MAX_SEND_CQE]; 385 struct ib_wc send_wc[MAX_SEND_CQE];
@@ -404,7 +409,7 @@ struct ipoib_dev_priv {
404#endif 409#endif
405 u64 hca_caps; 410 u64 hca_caps;
406 struct ipoib_ethtool_st ethtool; 411 struct ipoib_ethtool_st ethtool;
407 unsigned max_send_sge; 412 unsigned int max_send_sge;
408 bool sm_fullmember_sendonly_support; 413 bool sm_fullmember_sendonly_support;
409 const struct net_device_ops *rn_ops; 414 const struct net_device_ops *rn_ops;
410}; 415};
@@ -414,7 +419,7 @@ struct ipoib_ah {
414 struct ib_ah *ah; 419 struct ib_ah *ah;
415 struct list_head list; 420 struct list_head list;
416 struct kref ref; 421 struct kref ref;
417 unsigned last_send; 422 unsigned int last_send;
418 int valid; 423 int valid;
419}; 424};
420 425
@@ -483,6 +488,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah)
483 kref_put(&ah->ref, ipoib_free_ah); 488 kref_put(&ah->ref, ipoib_free_ah);
484} 489}
485int ipoib_open(struct net_device *dev); 490int ipoib_open(struct net_device *dev);
491void ipoib_intf_free(struct net_device *dev);
486int ipoib_add_pkey_attr(struct net_device *dev); 492int ipoib_add_pkey_attr(struct net_device *dev);
487int ipoib_add_umcast_attr(struct net_device *dev); 493int ipoib_add_umcast_attr(struct net_device *dev);
488 494
@@ -510,9 +516,6 @@ void ipoib_ib_dev_down(struct net_device *dev);
510int ipoib_ib_dev_stop_default(struct net_device *dev); 516int ipoib_ib_dev_stop_default(struct net_device *dev);
511void ipoib_pkey_dev_check_presence(struct net_device *dev); 517void ipoib_pkey_dev_check_presence(struct net_device *dev);
512 518
513int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
514void ipoib_dev_cleanup(struct net_device *dev);
515
516void ipoib_mcast_join_task(struct work_struct *work); 519void ipoib_mcast_join_task(struct work_struct *work);
517void ipoib_mcast_carrier_on_task(struct work_struct *work); 520void ipoib_mcast_carrier_on_task(struct work_struct *work);
518void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb); 521void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
@@ -600,7 +603,6 @@ void ipoib_pkey_open(struct ipoib_dev_priv *priv);
600void ipoib_drain_cq(struct net_device *dev); 603void ipoib_drain_cq(struct net_device *dev);
601 604
602void ipoib_set_ethtool_ops(struct net_device *dev); 605void ipoib_set_ethtool_ops(struct net_device *dev);
603void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca);
604 606
605#define IPOIB_FLAGS_RC 0x80 607#define IPOIB_FLAGS_RC 0x80
606#define IPOIB_FLAGS_UC 0x40 608#define IPOIB_FLAGS_UC 0x40
@@ -729,7 +731,7 @@ void ipoib_cm_dev_stop(struct net_device *dev)
729static inline 731static inline
730int ipoib_cm_dev_init(struct net_device *dev) 732int ipoib_cm_dev_init(struct net_device *dev)
731{ 733{
732 return -ENOSYS; 734 return -EOPNOTSUPP;
733} 735}
734 736
735static inline 737static inline
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 6535d9beb24d..ea01b8dd2be6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -78,7 +78,7 @@ static struct ib_send_wr ipoib_cm_rx_drain_wr = {
78}; 78};
79 79
80static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, 80static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
81 struct ib_cm_event *event); 81 const struct ib_cm_event *event);
82 82
83static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags, 83static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
84 u64 mapping[IPOIB_CM_RX_SG]) 84 u64 mapping[IPOIB_CM_RX_SG])
@@ -94,7 +94,6 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
94static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) 94static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
95{ 95{
96 struct ipoib_dev_priv *priv = ipoib_priv(dev); 96 struct ipoib_dev_priv *priv = ipoib_priv(dev);
97 struct ib_recv_wr *bad_wr;
98 int i, ret; 97 int i, ret;
99 98
100 priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; 99 priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
@@ -102,7 +101,7 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
102 for (i = 0; i < priv->cm.num_frags; ++i) 101 for (i = 0; i < priv->cm.num_frags; ++i)
103 priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; 102 priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
104 103
105 ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); 104 ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, NULL);
106 if (unlikely(ret)) { 105 if (unlikely(ret)) {
107 ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); 106 ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
108 ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1, 107 ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
@@ -120,7 +119,6 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
120 struct ib_sge *sge, int id) 119 struct ib_sge *sge, int id)
121{ 120{
122 struct ipoib_dev_priv *priv = ipoib_priv(dev); 121 struct ipoib_dev_priv *priv = ipoib_priv(dev);
123 struct ib_recv_wr *bad_wr;
124 int i, ret; 122 int i, ret;
125 123
126 wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; 124 wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
@@ -128,7 +126,7 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
128 for (i = 0; i < IPOIB_CM_RX_SG; ++i) 126 for (i = 0; i < IPOIB_CM_RX_SG; ++i)
129 sge[i].addr = rx->rx_ring[id].mapping[i]; 127 sge[i].addr = rx->rx_ring[id].mapping[i];
130 128
131 ret = ib_post_recv(rx->qp, wr, &bad_wr); 129 ret = ib_post_recv(rx->qp, wr, NULL);
132 if (unlikely(ret)) { 130 if (unlikely(ret)) {
133 ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); 131 ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret);
134 ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, 132 ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
@@ -212,7 +210,6 @@ static void ipoib_cm_free_rx_ring(struct net_device *dev,
212 210
213static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) 211static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
214{ 212{
215 struct ib_send_wr *bad_wr;
216 struct ipoib_cm_rx *p; 213 struct ipoib_cm_rx *p;
217 214
218 /* We only reserved 1 extra slot in CQ for drain WRs, so 215 /* We only reserved 1 extra slot in CQ for drain WRs, so
@@ -227,7 +224,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
227 */ 224 */
228 p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list); 225 p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
229 ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID; 226 ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID;
230 if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr)) 227 if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, NULL))
231 ipoib_warn(priv, "failed to post drain wr\n"); 228 ipoib_warn(priv, "failed to post drain wr\n");
232 229
233 list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list); 230 list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
@@ -275,7 +272,7 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
275 272
276static int ipoib_cm_modify_rx_qp(struct net_device *dev, 273static int ipoib_cm_modify_rx_qp(struct net_device *dev,
277 struct ib_cm_id *cm_id, struct ib_qp *qp, 274 struct ib_cm_id *cm_id, struct ib_qp *qp,
278 unsigned psn) 275 unsigned int psn)
279{ 276{
280 struct ipoib_dev_priv *priv = ipoib_priv(dev); 277 struct ipoib_dev_priv *priv = ipoib_priv(dev);
281 struct ib_qp_attr qp_attr; 278 struct ib_qp_attr qp_attr;
@@ -363,7 +360,7 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
363 if (!rx->rx_ring) 360 if (!rx->rx_ring)
364 return -ENOMEM; 361 return -ENOMEM;
365 362
366 t = kmalloc(sizeof *t, GFP_KERNEL); 363 t = kmalloc(sizeof(*t), GFP_KERNEL);
367 if (!t) { 364 if (!t) {
368 ret = -ENOMEM; 365 ret = -ENOMEM;
369 goto err_free_1; 366 goto err_free_1;
@@ -421,8 +418,9 @@ err_free_1:
421} 418}
422 419
423static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, 420static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
424 struct ib_qp *qp, struct ib_cm_req_event_param *req, 421 struct ib_qp *qp,
425 unsigned psn) 422 const struct ib_cm_req_event_param *req,
423 unsigned int psn)
426{ 424{
427 struct ipoib_dev_priv *priv = ipoib_priv(dev); 425 struct ipoib_dev_priv *priv = ipoib_priv(dev);
428 struct ipoib_cm_data data = {}; 426 struct ipoib_cm_data data = {};
@@ -432,7 +430,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
432 data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); 430 data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
433 431
434 rep.private_data = &data; 432 rep.private_data = &data;
435 rep.private_data_len = sizeof data; 433 rep.private_data_len = sizeof(data);
436 rep.flow_control = 0; 434 rep.flow_control = 0;
437 rep.rnr_retry_count = req->rnr_retry_count; 435 rep.rnr_retry_count = req->rnr_retry_count;
438 rep.srq = ipoib_cm_has_srq(dev); 436 rep.srq = ipoib_cm_has_srq(dev);
@@ -441,16 +439,17 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
441 return ib_send_cm_rep(cm_id, &rep); 439 return ib_send_cm_rep(cm_id, &rep);
442} 440}
443 441
444static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) 442static int ipoib_cm_req_handler(struct ib_cm_id *cm_id,
443 const struct ib_cm_event *event)
445{ 444{
446 struct net_device *dev = cm_id->context; 445 struct net_device *dev = cm_id->context;
447 struct ipoib_dev_priv *priv = ipoib_priv(dev); 446 struct ipoib_dev_priv *priv = ipoib_priv(dev);
448 struct ipoib_cm_rx *p; 447 struct ipoib_cm_rx *p;
449 unsigned psn; 448 unsigned int psn;
450 int ret; 449 int ret;
451 450
452 ipoib_dbg(priv, "REQ arrived\n"); 451 ipoib_dbg(priv, "REQ arrived\n");
453 p = kzalloc(sizeof *p, GFP_KERNEL); 452 p = kzalloc(sizeof(*p), GFP_KERNEL);
454 if (!p) 453 if (!p)
455 return -ENOMEM; 454 return -ENOMEM;
456 p->dev = dev; 455 p->dev = dev;
@@ -503,7 +502,7 @@ err_qp:
503} 502}
504 503
505static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, 504static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
506 struct ib_cm_event *event) 505 const struct ib_cm_event *event)
507{ 506{
508 struct ipoib_cm_rx *p; 507 struct ipoib_cm_rx *p;
509 struct ipoib_dev_priv *priv; 508 struct ipoib_dev_priv *priv;
@@ -547,7 +546,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
547 0, PAGE_SIZE); 546 0, PAGE_SIZE);
548 --skb_shinfo(skb)->nr_frags; 547 --skb_shinfo(skb)->nr_frags;
549 } else { 548 } else {
550 size = min(length, (unsigned) PAGE_SIZE); 549 size = min_t(unsigned int, length, PAGE_SIZE);
551 550
552 skb_frag_size_set(frag, size); 551 skb_frag_size_set(frag, size);
553 skb->data_len += size; 552 skb->data_len += size;
@@ -641,8 +640,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
641 } 640 }
642 } 641 }
643 642
644 frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, 643 frags = PAGE_ALIGN(wc->byte_len -
645 (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; 644 min_t(u32, wc->byte_len, IPOIB_CM_HEAD_SIZE)) /
645 PAGE_SIZE;
646 646
647 newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, 647 newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags,
648 mapping, GFP_ATOMIC); 648 mapping, GFP_ATOMIC);
@@ -657,7 +657,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
657 } 657 }
658 658
659 ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping); 659 ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping);
660 memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping); 660 memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof(*mapping));
661 661
662 ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", 662 ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
663 wc->byte_len, wc->slid); 663 wc->byte_len, wc->slid);
@@ -698,13 +698,11 @@ static inline int post_send(struct ipoib_dev_priv *priv,
698 unsigned int wr_id, 698 unsigned int wr_id,
699 struct ipoib_tx_buf *tx_req) 699 struct ipoib_tx_buf *tx_req)
700{ 700{
701 struct ib_send_wr *bad_wr;
702
703 ipoib_build_sge(priv, tx_req); 701 ipoib_build_sge(priv, tx_req);
704 702
705 priv->tx_wr.wr.wr_id = wr_id | IPOIB_OP_CM; 703 priv->tx_wr.wr.wr_id = wr_id | IPOIB_OP_CM;
706 704
707 return ib_post_send(tx->qp, &priv->tx_wr.wr, &bad_wr); 705 return ib_post_send(tx->qp, &priv->tx_wr.wr, NULL);
708} 706}
709 707
710void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) 708void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
@@ -712,7 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
712 struct ipoib_dev_priv *priv = ipoib_priv(dev); 710 struct ipoib_dev_priv *priv = ipoib_priv(dev);
713 struct ipoib_tx_buf *tx_req; 711 struct ipoib_tx_buf *tx_req;
714 int rc; 712 int rc;
715 unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb); 713 unsigned int usable_sge = tx->max_send_sge - !!skb_headlen(skb);
716 714
717 if (unlikely(skb->len > tx->mtu)) { 715 if (unlikely(skb->len > tx->mtu)) {
718 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", 716 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
@@ -982,7 +980,8 @@ void ipoib_cm_dev_stop(struct net_device *dev)
982 cancel_delayed_work(&priv->cm.stale_task); 980 cancel_delayed_work(&priv->cm.stale_task);
983} 981}
984 982
985static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) 983static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id,
984 const struct ib_cm_event *event)
986{ 985{
987 struct ipoib_cm_tx *p = cm_id->context; 986 struct ipoib_cm_tx *p = cm_id->context;
988 struct ipoib_dev_priv *priv = ipoib_priv(p->dev); 987 struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
@@ -1068,8 +1067,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
1068 struct ib_qp *tx_qp; 1067 struct ib_qp *tx_qp;
1069 1068
1070 if (dev->features & NETIF_F_SG) 1069 if (dev->features & NETIF_F_SG)
1071 attr.cap.max_send_sge = 1070 attr.cap.max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge,
1072 min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1); 1071 MAX_SKB_FRAGS + 1);
1073 1072
1074 tx_qp = ib_create_qp(priv->pd, &attr); 1073 tx_qp = ib_create_qp(priv->pd, &attr);
1075 tx->max_send_sge = attr.cap.max_send_sge; 1074 tx->max_send_sge = attr.cap.max_send_sge;
@@ -1094,7 +1093,7 @@ static int ipoib_cm_send_req(struct net_device *dev,
1094 req.qp_num = qp->qp_num; 1093 req.qp_num = qp->qp_num;
1095 req.qp_type = qp->qp_type; 1094 req.qp_type = qp->qp_type;
1096 req.private_data = &data; 1095 req.private_data = &data;
1097 req.private_data_len = sizeof data; 1096 req.private_data_len = sizeof(data);
1098 req.flow_control = 0; 1097 req.flow_control = 0;
1099 1098
1100 req.starting_psn = 0; /* FIXME */ 1099 req.starting_psn = 0; /* FIXME */
@@ -1152,7 +1151,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
1152 ret = -ENOMEM; 1151 ret = -ENOMEM;
1153 goto err_tx; 1152 goto err_tx;
1154 } 1153 }
1155 memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring); 1154 memset(p->tx_ring, 0, ipoib_sendq_size * sizeof(*p->tx_ring));
1156 1155
1157 p->qp = ipoib_cm_create_tx_qp(p->dev, p); 1156 p->qp = ipoib_cm_create_tx_qp(p->dev, p);
1158 memalloc_noio_restore(noio_flag); 1157 memalloc_noio_restore(noio_flag);
@@ -1248,7 +1247,7 @@ timeout:
1248} 1247}
1249 1248
1250static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, 1249static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
1251 struct ib_cm_event *event) 1250 const struct ib_cm_event *event)
1252{ 1251{
1253 struct ipoib_cm_tx *tx = cm_id->context; 1252 struct ipoib_cm_tx *tx = cm_id->context;
1254 struct ipoib_dev_priv *priv = ipoib_priv(tx->dev); 1253 struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
@@ -1305,7 +1304,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
1305 struct ipoib_dev_priv *priv = ipoib_priv(dev); 1304 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1306 struct ipoib_cm_tx *tx; 1305 struct ipoib_cm_tx *tx;
1307 1306
1308 tx = kzalloc(sizeof *tx, GFP_ATOMIC); 1307 tx = kzalloc(sizeof(*tx), GFP_ATOMIC);
1309 if (!tx) 1308 if (!tx)
1310 return NULL; 1309 return NULL;
1311 1310
@@ -1370,7 +1369,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
1370 neigh->daddr + QPN_AND_OPTIONS_OFFSET); 1369 neigh->daddr + QPN_AND_OPTIONS_OFFSET);
1371 goto free_neigh; 1370 goto free_neigh;
1372 } 1371 }
1373 memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); 1372 memcpy(&pathrec, &p->path->pathrec, sizeof(pathrec));
1374 1373
1375 spin_unlock_irqrestore(&priv->lock, flags); 1374 spin_unlock_irqrestore(&priv->lock, flags);
1376 netif_tx_unlock_bh(dev); 1375 netif_tx_unlock_bh(dev);
@@ -1428,7 +1427,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
1428 struct net_device *dev = priv->dev; 1427 struct net_device *dev = priv->dev;
1429 struct sk_buff *skb; 1428 struct sk_buff *skb;
1430 unsigned long flags; 1429 unsigned long flags;
1431 unsigned mtu = priv->mcast_mtu; 1430 unsigned int mtu = priv->mcast_mtu;
1432 1431
1433 netif_tx_lock_bh(dev); 1432 netif_tx_lock_bh(dev);
1434 spin_lock_irqsave(&priv->lock, flags); 1433 spin_lock_irqsave(&priv->lock, flags);
@@ -1518,19 +1517,16 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
1518{ 1517{
1519 struct net_device *dev = to_net_dev(d); 1518 struct net_device *dev = to_net_dev(d);
1520 int ret; 1519 int ret;
1521 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1522
1523 if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags))
1524 return -EPERM;
1525
1526 if (!mutex_trylock(&priv->sysfs_mutex))
1527 return restart_syscall();
1528 1520
1529 if (!rtnl_trylock()) { 1521 if (!rtnl_trylock()) {
1530 mutex_unlock(&priv->sysfs_mutex);
1531 return restart_syscall(); 1522 return restart_syscall();
1532 } 1523 }
1533 1524
1525 if (dev->reg_state != NETREG_REGISTERED) {
1526 rtnl_unlock();
1527 return -EPERM;
1528 }
1529
1534 ret = ipoib_set_mode(dev, buf); 1530 ret = ipoib_set_mode(dev, buf);
1535 1531
1536 /* The assumption is that the function ipoib_set_mode returned 1532 /* The assumption is that the function ipoib_set_mode returned
@@ -1539,7 +1535,6 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
1539 */ 1535 */
1540 if (ret != -EBUSY) 1536 if (ret != -EBUSY)
1541 rtnl_unlock(); 1537 rtnl_unlock();
1542 mutex_unlock(&priv->sysfs_mutex);
1543 1538
1544 return (!ret || ret == -EBUSY) ? count : ret; 1539 return (!ret || ret == -EBUSY) ? count : ret;
1545} 1540}
@@ -1564,7 +1559,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
1564 1559
1565 priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); 1560 priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
1566 if (IS_ERR(priv->cm.srq)) { 1561 if (IS_ERR(priv->cm.srq)) {
1567 if (PTR_ERR(priv->cm.srq) != -ENOSYS) 1562 if (PTR_ERR(priv->cm.srq) != -EOPNOTSUPP)
1568 pr_warn("%s: failed to allocate SRQ, error %ld\n", 1563 pr_warn("%s: failed to allocate SRQ, error %ld\n",
1569 priv->ca->name, PTR_ERR(priv->cm.srq)); 1564 priv->ca->name, PTR_ERR(priv->cm.srq));
1570 priv->cm.srq = NULL; 1565 priv->cm.srq = NULL;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 2706bf26cbac..83429925dfc6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -102,7 +102,7 @@ static int ipoib_set_coalesce(struct net_device *dev,
102 ret = rdma_set_cq_moderation(priv->recv_cq, 102 ret = rdma_set_cq_moderation(priv->recv_cq,
103 coal->rx_max_coalesced_frames, 103 coal->rx_max_coalesced_frames,
104 coal->rx_coalesce_usecs); 104 coal->rx_coalesce_usecs);
105 if (ret && ret != -ENOSYS) { 105 if (ret && ret != -EOPNOTSUPP) {
106 ipoib_warn(priv, "failed modifying CQ (%d)\n", ret); 106 ipoib_warn(priv, "failed modifying CQ (%d)\n", ret);
107 return ret; 107 return ret;
108 } 108 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index ea302b054601..178488028734 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -262,15 +262,15 @@ static const struct file_operations ipoib_path_fops = {
262void ipoib_create_debug_files(struct net_device *dev) 262void ipoib_create_debug_files(struct net_device *dev)
263{ 263{
264 struct ipoib_dev_priv *priv = ipoib_priv(dev); 264 struct ipoib_dev_priv *priv = ipoib_priv(dev);
265 char name[IFNAMSIZ + sizeof "_path"]; 265 char name[IFNAMSIZ + sizeof("_path")];
266 266
267 snprintf(name, sizeof name, "%s_mcg", dev->name); 267 snprintf(name, sizeof(name), "%s_mcg", dev->name);
268 priv->mcg_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO, 268 priv->mcg_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
269 ipoib_root, dev, &ipoib_mcg_fops); 269 ipoib_root, dev, &ipoib_mcg_fops);
270 if (!priv->mcg_dentry) 270 if (!priv->mcg_dentry)
271 ipoib_warn(priv, "failed to create mcg debug file\n"); 271 ipoib_warn(priv, "failed to create mcg debug file\n");
272 272
273 snprintf(name, sizeof name, "%s_path", dev->name); 273 snprintf(name, sizeof(name), "%s_path", dev->name);
274 priv->path_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO, 274 priv->path_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
275 ipoib_root, dev, &ipoib_path_fops); 275 ipoib_root, dev, &ipoib_path_fops);
276 if (!priv->path_dentry) 276 if (!priv->path_dentry)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index f47f9ace1f48..9006a13af1de 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -40,6 +40,7 @@
40 40
41#include <linux/ip.h> 41#include <linux/ip.h>
42#include <linux/tcp.h> 42#include <linux/tcp.h>
43#include <rdma/ib_cache.h>
43 44
44#include "ipoib.h" 45#include "ipoib.h"
45 46
@@ -57,7 +58,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
57 struct ipoib_ah *ah; 58 struct ipoib_ah *ah;
58 struct ib_ah *vah; 59 struct ib_ah *vah;
59 60
60 ah = kmalloc(sizeof *ah, GFP_KERNEL); 61 ah = kmalloc(sizeof(*ah), GFP_KERNEL);
61 if (!ah) 62 if (!ah)
62 return ERR_PTR(-ENOMEM); 63 return ERR_PTR(-ENOMEM);
63 64
@@ -100,7 +101,6 @@ static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
100static int ipoib_ib_post_receive(struct net_device *dev, int id) 101static int ipoib_ib_post_receive(struct net_device *dev, int id)
101{ 102{
102 struct ipoib_dev_priv *priv = ipoib_priv(dev); 103 struct ipoib_dev_priv *priv = ipoib_priv(dev);
103 struct ib_recv_wr *bad_wr;
104 int ret; 104 int ret;
105 105
106 priv->rx_wr.wr_id = id | IPOIB_OP_RECV; 106 priv->rx_wr.wr_id = id | IPOIB_OP_RECV;
@@ -108,7 +108,7 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id)
108 priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1]; 108 priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1];
109 109
110 110
111 ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr); 111 ret = ib_post_recv(priv->qp, &priv->rx_wr, NULL);
112 if (unlikely(ret)) { 112 if (unlikely(ret)) {
113 ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); 113 ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
114 ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[id].mapping); 114 ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[id].mapping);
@@ -202,7 +202,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
202 } 202 }
203 203
204 memcpy(mapping, priv->rx_ring[wr_id].mapping, 204 memcpy(mapping, priv->rx_ring[wr_id].mapping,
205 IPOIB_UD_RX_SG * sizeof *mapping); 205 IPOIB_UD_RX_SG * sizeof(*mapping));
206 206
207 /* 207 /*
208 * If we can't allocate a new RX buffer, dump 208 * If we can't allocate a new RX buffer, dump
@@ -541,7 +541,6 @@ static inline int post_send(struct ipoib_dev_priv *priv,
541 struct ipoib_tx_buf *tx_req, 541 struct ipoib_tx_buf *tx_req,
542 void *head, int hlen) 542 void *head, int hlen)
543{ 543{
544 struct ib_send_wr *bad_wr;
545 struct sk_buff *skb = tx_req->skb; 544 struct sk_buff *skb = tx_req->skb;
546 545
547 ipoib_build_sge(priv, tx_req); 546 ipoib_build_sge(priv, tx_req);
@@ -558,7 +557,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
558 } else 557 } else
559 priv->tx_wr.wr.opcode = IB_WR_SEND; 558 priv->tx_wr.wr.opcode = IB_WR_SEND;
560 559
561 return ib_post_send(priv->qp, &priv->tx_wr.wr, &bad_wr); 560 return ib_post_send(priv->qp, &priv->tx_wr.wr, NULL);
562} 561}
563 562
564int ipoib_send(struct net_device *dev, struct sk_buff *skb, 563int ipoib_send(struct net_device *dev, struct sk_buff *skb,
@@ -568,7 +567,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb,
568 struct ipoib_tx_buf *tx_req; 567 struct ipoib_tx_buf *tx_req;
569 int hlen, rc; 568 int hlen, rc;
570 void *phead; 569 void *phead;
571 unsigned usable_sge = priv->max_send_sge - !!skb_headlen(skb); 570 unsigned int usable_sge = priv->max_send_sge - !!skb_headlen(skb);
572 571
573 if (skb_is_gso(skb)) { 572 if (skb_is_gso(skb)) {
574 hlen = skb_transport_offset(skb) + tcp_hdrlen(skb); 573 hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
@@ -1069,7 +1068,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
1069 bool ret = false; 1068 bool ret = false;
1070 1069
1071 netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4); 1070 netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4);
1072 if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL)) 1071 if (rdma_query_gid(priv->ca, priv->port, 0, &gid0))
1073 return false; 1072 return false;
1074 1073
1075 netif_addr_lock_bh(priv->dev); 1074 netif_addr_lock_bh(priv->dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 26cde95bc0f3..e3d28f9ad9c0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -215,11 +215,6 @@ static int ipoib_stop(struct net_device *dev)
215 return 0; 215 return 0;
216} 216}
217 217
218static void ipoib_uninit(struct net_device *dev)
219{
220 ipoib_dev_cleanup(dev);
221}
222
223static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features) 218static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features)
224{ 219{
225 struct ipoib_dev_priv *priv = ipoib_priv(dev); 220 struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -634,7 +629,7 @@ struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev)
634{ 629{
635 struct ipoib_path_iter *iter; 630 struct ipoib_path_iter *iter;
636 631
637 iter = kmalloc(sizeof *iter, GFP_KERNEL); 632 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
638 if (!iter) 633 if (!iter)
639 return NULL; 634 return NULL;
640 635
@@ -770,8 +765,10 @@ static void path_rec_completion(int status,
770 struct rdma_ah_attr av; 765 struct rdma_ah_attr av;
771 766
772 if (!ib_init_ah_attr_from_path(priv->ca, priv->port, 767 if (!ib_init_ah_attr_from_path(priv->ca, priv->port,
773 pathrec, &av)) 768 pathrec, &av, NULL)) {
774 ah = ipoib_create_ah(dev, priv->pd, &av); 769 ah = ipoib_create_ah(dev, priv->pd, &av);
770 rdma_destroy_ah_attr(&av);
771 }
775 } 772 }
776 773
777 spin_lock_irqsave(&priv->lock, flags); 774 spin_lock_irqsave(&priv->lock, flags);
@@ -883,7 +880,7 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
883 if (!priv->broadcast) 880 if (!priv->broadcast)
884 return NULL; 881 return NULL;
885 882
886 path = kzalloc(sizeof *path, GFP_ATOMIC); 883 path = kzalloc(sizeof(*path), GFP_ATOMIC);
887 if (!path) 884 if (!path)
888 return NULL; 885 return NULL;
889 886
@@ -1199,11 +1196,13 @@ static void ipoib_timeout(struct net_device *dev)
1199static int ipoib_hard_header(struct sk_buff *skb, 1196static int ipoib_hard_header(struct sk_buff *skb,
1200 struct net_device *dev, 1197 struct net_device *dev,
1201 unsigned short type, 1198 unsigned short type,
1202 const void *daddr, const void *saddr, unsigned len) 1199 const void *daddr,
1200 const void *saddr,
1201 unsigned int len)
1203{ 1202{
1204 struct ipoib_header *header; 1203 struct ipoib_header *header;
1205 1204
1206 header = skb_push(skb, sizeof *header); 1205 header = skb_push(skb, sizeof(*header));
1207 1206
1208 header->proto = htons(type); 1207 header->proto = htons(type);
1209 header->reserved = 0; 1208 header->reserved = 0;
@@ -1306,9 +1305,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
1306 int i; 1305 int i;
1307 LIST_HEAD(remove_list); 1306 LIST_HEAD(remove_list);
1308 1307
1309 if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
1310 return;
1311
1312 spin_lock_irqsave(&priv->lock, flags); 1308 spin_lock_irqsave(&priv->lock, flags);
1313 1309
1314 htbl = rcu_dereference_protected(ntbl->htbl, 1310 htbl = rcu_dereference_protected(ntbl->htbl,
@@ -1320,9 +1316,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
1320 /* neigh is obsolete if it was idle for two GC periods */ 1316 /* neigh is obsolete if it was idle for two GC periods */
1321 dt = 2 * arp_tbl.gc_interval; 1317 dt = 2 * arp_tbl.gc_interval;
1322 neigh_obsolete = jiffies - dt; 1318 neigh_obsolete = jiffies - dt;
1323 /* handle possible race condition */
1324 if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
1325 goto out_unlock;
1326 1319
1327 for (i = 0; i < htbl->size; i++) { 1320 for (i = 0; i < htbl->size; i++) {
1328 struct ipoib_neigh *neigh; 1321 struct ipoib_neigh *neigh;
@@ -1360,9 +1353,8 @@ static void ipoib_reap_neigh(struct work_struct *work)
1360 1353
1361 __ipoib_reap_neigh(priv); 1354 __ipoib_reap_neigh(priv);
1362 1355
1363 if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) 1356 queue_delayed_work(priv->wq, &priv->neigh_reap_task,
1364 queue_delayed_work(priv->wq, &priv->neigh_reap_task, 1357 arp_tbl.gc_interval);
1365 arp_tbl.gc_interval);
1366} 1358}
1367 1359
1368 1360
@@ -1371,7 +1363,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr,
1371{ 1363{
1372 struct ipoib_neigh *neigh; 1364 struct ipoib_neigh *neigh;
1373 1365
1374 neigh = kzalloc(sizeof *neigh, GFP_ATOMIC); 1366 neigh = kzalloc(sizeof(*neigh), GFP_ATOMIC);
1375 if (!neigh) 1367 if (!neigh)
1376 return NULL; 1368 return NULL;
1377 1369
@@ -1524,9 +1516,8 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
1524 htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); 1516 htbl = kzalloc(sizeof(*htbl), GFP_KERNEL);
1525 if (!htbl) 1517 if (!htbl)
1526 return -ENOMEM; 1518 return -ENOMEM;
1527 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1528 size = roundup_pow_of_two(arp_tbl.gc_thresh3); 1519 size = roundup_pow_of_two(arp_tbl.gc_thresh3);
1529 buckets = kcalloc(size, sizeof(*buckets), GFP_KERNEL); 1520 buckets = kvcalloc(size, sizeof(*buckets), GFP_KERNEL);
1530 if (!buckets) { 1521 if (!buckets) {
1531 kfree(htbl); 1522 kfree(htbl);
1532 return -ENOMEM; 1523 return -ENOMEM;
@@ -1539,7 +1530,6 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
1539 atomic_set(&ntbl->entries, 0); 1530 atomic_set(&ntbl->entries, 0);
1540 1531
1541 /* start garbage collection */ 1532 /* start garbage collection */
1542 clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1543 queue_delayed_work(priv->wq, &priv->neigh_reap_task, 1533 queue_delayed_work(priv->wq, &priv->neigh_reap_task,
1544 arp_tbl.gc_interval); 1534 arp_tbl.gc_interval);
1545 1535
@@ -1554,7 +1544,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
1554 struct ipoib_neigh __rcu **buckets = htbl->buckets; 1544 struct ipoib_neigh __rcu **buckets = htbl->buckets;
1555 struct ipoib_neigh_table *ntbl = htbl->ntbl; 1545 struct ipoib_neigh_table *ntbl = htbl->ntbl;
1556 1546
1557 kfree(buckets); 1547 kvfree(buckets);
1558 kfree(htbl); 1548 kfree(htbl);
1559 complete(&ntbl->deleted); 1549 complete(&ntbl->deleted);
1560} 1550}
@@ -1649,15 +1639,11 @@ out_unlock:
1649static void ipoib_neigh_hash_uninit(struct net_device *dev) 1639static void ipoib_neigh_hash_uninit(struct net_device *dev)
1650{ 1640{
1651 struct ipoib_dev_priv *priv = ipoib_priv(dev); 1641 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1652 int stopped;
1653 1642
1654 ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n"); 1643 ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
1655 init_completion(&priv->ntbl.deleted); 1644 init_completion(&priv->ntbl.deleted);
1656 1645
1657 /* Stop GC if called at init fail need to cancel work */ 1646 cancel_delayed_work_sync(&priv->neigh_reap_task);
1658 stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1659 if (!stopped)
1660 cancel_delayed_work(&priv->neigh_reap_task);
1661 1647
1662 ipoib_flush_neighs(priv); 1648 ipoib_flush_neighs(priv);
1663 1649
@@ -1755,13 +1741,11 @@ static int ipoib_ioctl(struct net_device *dev, struct ifreq *ifr,
1755 return priv->rn_ops->ndo_do_ioctl(dev, ifr, cmd); 1741 return priv->rn_ops->ndo_do_ioctl(dev, ifr, cmd);
1756} 1742}
1757 1743
1758int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) 1744static int ipoib_dev_init(struct net_device *dev)
1759{ 1745{
1760 struct ipoib_dev_priv *priv = ipoib_priv(dev); 1746 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1761 int ret = -ENOMEM; 1747 int ret = -ENOMEM;
1762 1748
1763 priv->ca = ca;
1764 priv->port = port;
1765 priv->qp = NULL; 1749 priv->qp = NULL;
1766 1750
1767 /* 1751 /*
@@ -1777,7 +1761,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
1777 /* create pd, which used both for control and datapath*/ 1761 /* create pd, which used both for control and datapath*/
1778 priv->pd = ib_alloc_pd(priv->ca, 0); 1762 priv->pd = ib_alloc_pd(priv->ca, 0);
1779 if (IS_ERR(priv->pd)) { 1763 if (IS_ERR(priv->pd)) {
1780 pr_warn("%s: failed to allocate PD\n", ca->name); 1764 pr_warn("%s: failed to allocate PD\n", priv->ca->name);
1781 goto clean_wq; 1765 goto clean_wq;
1782 } 1766 }
1783 1767
@@ -1787,7 +1771,8 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
1787 goto out_free_pd; 1771 goto out_free_pd;
1788 } 1772 }
1789 1773
1790 if (ipoib_neigh_hash_init(priv) < 0) { 1774 ret = ipoib_neigh_hash_init(priv);
1775 if (ret) {
1791 pr_warn("%s failed to init neigh hash\n", dev->name); 1776 pr_warn("%s failed to init neigh hash\n", dev->name);
1792 goto out_dev_uninit; 1777 goto out_dev_uninit;
1793 } 1778 }
@@ -1796,12 +1781,15 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
1796 if (ipoib_ib_dev_open(dev)) { 1781 if (ipoib_ib_dev_open(dev)) {
1797 pr_warn("%s failed to open device\n", dev->name); 1782 pr_warn("%s failed to open device\n", dev->name);
1798 ret = -ENODEV; 1783 ret = -ENODEV;
1799 goto out_dev_uninit; 1784 goto out_hash_uninit;
1800 } 1785 }
1801 } 1786 }
1802 1787
1803 return 0; 1788 return 0;
1804 1789
1790out_hash_uninit:
1791 ipoib_neigh_hash_uninit(dev);
1792
1805out_dev_uninit: 1793out_dev_uninit:
1806 ipoib_ib_dev_cleanup(dev); 1794 ipoib_ib_dev_cleanup(dev);
1807 1795
@@ -1821,21 +1809,151 @@ out:
1821 return ret; 1809 return ret;
1822} 1810}
1823 1811
1824void ipoib_dev_cleanup(struct net_device *dev) 1812/*
1813 * This must be called before doing an unregister_netdev on a parent device to
1814 * shutdown the IB event handler.
1815 */
1816static void ipoib_parent_unregister_pre(struct net_device *ndev)
1825{ 1817{
1826 struct ipoib_dev_priv *priv = ipoib_priv(dev), *cpriv, *tcpriv; 1818 struct ipoib_dev_priv *priv = ipoib_priv(ndev);
1827 LIST_HEAD(head);
1828 1819
1829 ASSERT_RTNL(); 1820 /*
1821 * ipoib_set_mac checks netif_running before pushing work, clearing
1822 * running ensures the it will not add more work.
1823 */
1824 rtnl_lock();
1825 dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
1826 rtnl_unlock();
1830 1827
1831 /* Delete any child interfaces first */ 1828 /* ipoib_event() cannot be running once this returns */
1832 list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { 1829 ib_unregister_event_handler(&priv->event_handler);
1833 /* Stop GC on child */ 1830
1834 set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags); 1831 /*
1835 cancel_delayed_work(&cpriv->neigh_reap_task); 1832 * Work on the queue grabs the rtnl lock, so this cannot be done while
1836 unregister_netdevice_queue(cpriv->dev, &head); 1833 * also holding it.
1834 */
1835 flush_workqueue(ipoib_workqueue);
1836}
1837
1838static void ipoib_set_dev_features(struct ipoib_dev_priv *priv)
1839{
1840 priv->hca_caps = priv->ca->attrs.device_cap_flags;
1841
1842 if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
1843 priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
1844
1845 if (priv->hca_caps & IB_DEVICE_UD_TSO)
1846 priv->dev->hw_features |= NETIF_F_TSO;
1847
1848 priv->dev->features |= priv->dev->hw_features;
1849 }
1850}
1851
1852static int ipoib_parent_init(struct net_device *ndev)
1853{
1854 struct ipoib_dev_priv *priv = ipoib_priv(ndev);
1855 struct ib_port_attr attr;
1856 int result;
1857
1858 result = ib_query_port(priv->ca, priv->port, &attr);
1859 if (result) {
1860 pr_warn("%s: ib_query_port %d failed\n", priv->ca->name,
1861 priv->port);
1862 return result;
1863 }
1864 priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
1865
1866 result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey);
1867 if (result) {
1868 pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n",
1869 priv->ca->name, priv->port, result);
1870 return result;
1837 } 1871 }
1838 unregister_netdevice_many(&head); 1872
1873 result = rdma_query_gid(priv->ca, priv->port, 0, &priv->local_gid);
1874 if (result) {
1875 pr_warn("%s: rdma_query_gid port %d failed (ret = %d)\n",
1876 priv->ca->name, priv->port, result);
1877 return result;
1878 }
1879 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw,
1880 sizeof(union ib_gid));
1881
1882 SET_NETDEV_DEV(priv->dev, priv->ca->dev.parent);
1883 priv->dev->dev_id = priv->port - 1;
1884
1885 return 0;
1886}
1887
1888static void ipoib_child_init(struct net_device *ndev)
1889{
1890 struct ipoib_dev_priv *priv = ipoib_priv(ndev);
1891 struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
1892
1893 dev_hold(priv->parent);
1894
1895 down_write(&ppriv->vlan_rwsem);
1896 list_add_tail(&priv->list, &ppriv->child_intfs);
1897 up_write(&ppriv->vlan_rwsem);
1898
1899 priv->max_ib_mtu = ppriv->max_ib_mtu;
1900 set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
1901 memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN);
1902 memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid));
1903}
1904
1905static int ipoib_ndo_init(struct net_device *ndev)
1906{
1907 struct ipoib_dev_priv *priv = ipoib_priv(ndev);
1908 int rc;
1909
1910 if (priv->parent) {
1911 ipoib_child_init(ndev);
1912 } else {
1913 rc = ipoib_parent_init(ndev);
1914 if (rc)
1915 return rc;
1916 }
1917
1918 /* MTU will be reset when mcast join happens */
1919 ndev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
1920 priv->mcast_mtu = priv->admin_mtu = ndev->mtu;
1921 ndev->max_mtu = IPOIB_CM_MTU;
1922
1923 ndev->neigh_priv_len = sizeof(struct ipoib_neigh);
1924
1925 /*
1926 * Set the full membership bit, so that we join the right
1927 * broadcast group, etc.
1928 */
1929 priv->pkey |= 0x8000;
1930
1931 ndev->broadcast[8] = priv->pkey >> 8;
1932 ndev->broadcast[9] = priv->pkey & 0xff;
1933 set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
1934
1935 ipoib_set_dev_features(priv);
1936
1937 rc = ipoib_dev_init(ndev);
1938 if (rc) {
1939 pr_warn("%s: failed to initialize device: %s port %d (ret = %d)\n",
1940 priv->ca->name, priv->dev->name, priv->port, rc);
1941 }
1942
1943 return 0;
1944}
1945
1946static void ipoib_ndo_uninit(struct net_device *dev)
1947{
1948 struct ipoib_dev_priv *priv = ipoib_priv(dev);
1949
1950 ASSERT_RTNL();
1951
1952 /*
1953 * ipoib_remove_one guarantees the children are removed before the
1954 * parent, and that is the only place where a parent can be removed.
1955 */
1956 WARN_ON(!list_empty(&priv->child_intfs));
1839 1957
1840 ipoib_neigh_hash_uninit(dev); 1958 ipoib_neigh_hash_uninit(dev);
1841 1959
@@ -1847,6 +1965,16 @@ void ipoib_dev_cleanup(struct net_device *dev)
1847 destroy_workqueue(priv->wq); 1965 destroy_workqueue(priv->wq);
1848 priv->wq = NULL; 1966 priv->wq = NULL;
1849 } 1967 }
1968
1969 if (priv->parent) {
1970 struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
1971
1972 down_write(&ppriv->vlan_rwsem);
1973 list_del(&priv->list);
1974 up_write(&ppriv->vlan_rwsem);
1975
1976 dev_put(priv->parent);
1977 }
1850} 1978}
1851 1979
1852static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state) 1980static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state)
@@ -1894,7 +2022,8 @@ static const struct header_ops ipoib_header_ops = {
1894}; 2022};
1895 2023
1896static const struct net_device_ops ipoib_netdev_ops_pf = { 2024static const struct net_device_ops ipoib_netdev_ops_pf = {
1897 .ndo_uninit = ipoib_uninit, 2025 .ndo_init = ipoib_ndo_init,
2026 .ndo_uninit = ipoib_ndo_uninit,
1898 .ndo_open = ipoib_open, 2027 .ndo_open = ipoib_open,
1899 .ndo_stop = ipoib_stop, 2028 .ndo_stop = ipoib_stop,
1900 .ndo_change_mtu = ipoib_change_mtu, 2029 .ndo_change_mtu = ipoib_change_mtu,
@@ -1913,7 +2042,8 @@ static const struct net_device_ops ipoib_netdev_ops_pf = {
1913}; 2042};
1914 2043
1915static const struct net_device_ops ipoib_netdev_ops_vf = { 2044static const struct net_device_ops ipoib_netdev_ops_vf = {
1916 .ndo_uninit = ipoib_uninit, 2045 .ndo_init = ipoib_ndo_init,
2046 .ndo_uninit = ipoib_ndo_uninit,
1917 .ndo_open = ipoib_open, 2047 .ndo_open = ipoib_open,
1918 .ndo_stop = ipoib_stop, 2048 .ndo_stop = ipoib_stop,
1919 .ndo_change_mtu = ipoib_change_mtu, 2049 .ndo_change_mtu = ipoib_change_mtu,
@@ -1945,6 +2075,13 @@ void ipoib_setup_common(struct net_device *dev)
1945 netif_keep_dst(dev); 2075 netif_keep_dst(dev);
1946 2076
1947 memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); 2077 memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
2078
2079 /*
2080 * unregister_netdev always frees the netdev, we use this mode
2081 * consistently to unify all the various unregister paths, including
2082 * those connected to rtnl_link_ops which require it.
2083 */
2084 dev->needs_free_netdev = true;
1948} 2085}
1949 2086
1950static void ipoib_build_priv(struct net_device *dev) 2087static void ipoib_build_priv(struct net_device *dev)
@@ -1955,7 +2092,6 @@ static void ipoib_build_priv(struct net_device *dev)
1955 spin_lock_init(&priv->lock); 2092 spin_lock_init(&priv->lock);
1956 init_rwsem(&priv->vlan_rwsem); 2093 init_rwsem(&priv->vlan_rwsem);
1957 mutex_init(&priv->mcast_mutex); 2094 mutex_init(&priv->mcast_mutex);
1958 mutex_init(&priv->sysfs_mutex);
1959 2095
1960 INIT_LIST_HEAD(&priv->path_list); 2096 INIT_LIST_HEAD(&priv->path_list);
1961 INIT_LIST_HEAD(&priv->child_intfs); 2097 INIT_LIST_HEAD(&priv->child_intfs);
@@ -1999,9 +2135,7 @@ static struct net_device
1999 rn->send = ipoib_send; 2135 rn->send = ipoib_send;
2000 rn->attach_mcast = ipoib_mcast_attach; 2136 rn->attach_mcast = ipoib_mcast_attach;
2001 rn->detach_mcast = ipoib_mcast_detach; 2137 rn->detach_mcast = ipoib_mcast_detach;
2002 rn->free_rdma_netdev = free_netdev;
2003 rn->hca = hca; 2138 rn->hca = hca;
2004
2005 dev->netdev_ops = &ipoib_netdev_default_pf; 2139 dev->netdev_ops = &ipoib_netdev_default_pf;
2006 2140
2007 return dev; 2141 return dev;
@@ -2039,6 +2173,9 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
2039 if (!priv) 2173 if (!priv)
2040 return NULL; 2174 return NULL;
2041 2175
2176 priv->ca = hca;
2177 priv->port = port;
2178
2042 dev = ipoib_get_netdev(hca, port, name); 2179 dev = ipoib_get_netdev(hca, port, name);
2043 if (!dev) 2180 if (!dev)
2044 goto free_priv; 2181 goto free_priv;
@@ -2053,6 +2190,15 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
2053 2190
2054 rn = netdev_priv(dev); 2191 rn = netdev_priv(dev);
2055 rn->clnt_priv = priv; 2192 rn->clnt_priv = priv;
2193
2194 /*
2195 * Only the child register_netdev flows can handle priv_destructor
2196 * being set, so we force it to NULL here and handle manually until it
2197 * is safe to turn on.
2198 */
2199 priv->next_priv_destructor = dev->priv_destructor;
2200 dev->priv_destructor = NULL;
2201
2056 ipoib_build_priv(dev); 2202 ipoib_build_priv(dev);
2057 2203
2058 return priv; 2204 return priv;
@@ -2061,6 +2207,27 @@ free_priv:
2061 return NULL; 2207 return NULL;
2062} 2208}
2063 2209
2210void ipoib_intf_free(struct net_device *dev)
2211{
2212 struct ipoib_dev_priv *priv = ipoib_priv(dev);
2213 struct rdma_netdev *rn = netdev_priv(dev);
2214
2215 dev->priv_destructor = priv->next_priv_destructor;
2216 if (dev->priv_destructor)
2217 dev->priv_destructor(dev);
2218
2219 /*
2220 * There are some error flows around register_netdev failing that may
2221 * attempt to call priv_destructor twice, prevent that from happening.
2222 */
2223 dev->priv_destructor = NULL;
2224
2225 /* unregister/destroy is very complicated. Make bugs more obvious. */
2226 rn->clnt_priv = NULL;
2227
2228 kfree(priv);
2229}
2230
2064static ssize_t show_pkey(struct device *dev, 2231static ssize_t show_pkey(struct device *dev,
2065 struct device_attribute *attr, char *buf) 2232 struct device_attribute *attr, char *buf)
2066{ 2233{
@@ -2186,12 +2353,6 @@ static ssize_t create_child(struct device *dev,
2186 if (pkey <= 0 || pkey > 0xffff || pkey == 0x8000) 2353 if (pkey <= 0 || pkey > 0xffff || pkey == 0x8000)
2187 return -EINVAL; 2354 return -EINVAL;
2188 2355
2189 /*
2190 * Set the full membership bit, so that we join the right
2191 * broadcast group, etc.
2192 */
2193 pkey |= 0x8000;
2194
2195 ret = ipoib_vlan_add(to_net_dev(dev), pkey); 2356 ret = ipoib_vlan_add(to_net_dev(dev), pkey);
2196 2357
2197 return ret ? ret : count; 2358 return ret ? ret : count;
@@ -2223,87 +2384,19 @@ int ipoib_add_pkey_attr(struct net_device *dev)
2223 return device_create_file(&dev->dev, &dev_attr_pkey); 2384 return device_create_file(&dev->dev, &dev_attr_pkey);
2224} 2385}
2225 2386
2226void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
2227{
2228 priv->hca_caps = hca->attrs.device_cap_flags;
2229
2230 if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
2231 priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
2232
2233 if (priv->hca_caps & IB_DEVICE_UD_TSO)
2234 priv->dev->hw_features |= NETIF_F_TSO;
2235
2236 priv->dev->features |= priv->dev->hw_features;
2237 }
2238}
2239
2240static struct net_device *ipoib_add_port(const char *format, 2387static struct net_device *ipoib_add_port(const char *format,
2241 struct ib_device *hca, u8 port) 2388 struct ib_device *hca, u8 port)
2242{ 2389{
2243 struct ipoib_dev_priv *priv; 2390 struct ipoib_dev_priv *priv;
2244 struct ib_port_attr attr; 2391 struct net_device *ndev;
2245 struct rdma_netdev *rn; 2392 int result;
2246 int result = -ENOMEM;
2247 2393
2248 priv = ipoib_intf_alloc(hca, port, format); 2394 priv = ipoib_intf_alloc(hca, port, format);
2249 if (!priv) { 2395 if (!priv) {
2250 pr_warn("%s, %d: ipoib_intf_alloc failed\n", hca->name, port); 2396 pr_warn("%s, %d: ipoib_intf_alloc failed\n", hca->name, port);
2251 goto alloc_mem_failed; 2397 return ERR_PTR(-ENOMEM);
2252 }
2253
2254 SET_NETDEV_DEV(priv->dev, hca->dev.parent);
2255 priv->dev->dev_id = port - 1;
2256
2257 result = ib_query_port(hca, port, &attr);
2258 if (result) {
2259 pr_warn("%s: ib_query_port %d failed\n", hca->name, port);
2260 goto device_init_failed;
2261 }
2262
2263 priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
2264
2265 /* MTU will be reset when mcast join happens */
2266 priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
2267 priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu;
2268 priv->dev->max_mtu = IPOIB_CM_MTU;
2269
2270 priv->dev->neigh_priv_len = sizeof(struct ipoib_neigh);
2271
2272 result = ib_query_pkey(hca, port, 0, &priv->pkey);
2273 if (result) {
2274 pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n",
2275 hca->name, port, result);
2276 goto device_init_failed;
2277 }
2278
2279 ipoib_set_dev_features(priv, hca);
2280
2281 /*
2282 * Set the full membership bit, so that we join the right
2283 * broadcast group, etc.
2284 */
2285 priv->pkey |= 0x8000;
2286
2287 priv->dev->broadcast[8] = priv->pkey >> 8;
2288 priv->dev->broadcast[9] = priv->pkey & 0xff;
2289
2290 result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL);
2291 if (result) {
2292 pr_warn("%s: ib_query_gid port %d failed (ret = %d)\n",
2293 hca->name, port, result);
2294 goto device_init_failed;
2295 }
2296
2297 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw,
2298 sizeof(union ib_gid));
2299 set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
2300
2301 result = ipoib_dev_init(priv->dev, hca, port);
2302 if (result) {
2303 pr_warn("%s: failed to initialize port %d (ret = %d)\n",
2304 hca->name, port, result);
2305 goto device_init_failed;
2306 } 2398 }
2399 ndev = priv->dev;
2307 2400
2308 INIT_IB_EVENT_HANDLER(&priv->event_handler, 2401 INIT_IB_EVENT_HANDLER(&priv->event_handler,
2309 priv->ca, ipoib_event); 2402 priv->ca, ipoib_event);
@@ -2312,46 +2405,43 @@ static struct net_device *ipoib_add_port(const char *format,
2312 /* call event handler to ensure pkey in sync */ 2405 /* call event handler to ensure pkey in sync */
2313 queue_work(ipoib_workqueue, &priv->flush_heavy); 2406 queue_work(ipoib_workqueue, &priv->flush_heavy);
2314 2407
2315 result = register_netdev(priv->dev); 2408 result = register_netdev(ndev);
2316 if (result) { 2409 if (result) {
2317 pr_warn("%s: couldn't register ipoib port %d; error %d\n", 2410 pr_warn("%s: couldn't register ipoib port %d; error %d\n",
2318 hca->name, port, result); 2411 hca->name, port, result);
2319 goto register_failed; 2412
2413 ipoib_parent_unregister_pre(ndev);
2414 ipoib_intf_free(ndev);
2415 free_netdev(ndev);
2416
2417 return ERR_PTR(result);
2320 } 2418 }
2321 2419
2322 result = -ENOMEM; 2420 /*
2323 if (ipoib_cm_add_mode_attr(priv->dev)) 2421 * We cannot set priv_destructor before register_netdev because we
2422 * need priv to be always valid during the error flow to execute
2423 * ipoib_parent_unregister_pre(). Instead handle it manually and only
2424 * enter priv_destructor mode once we are completely registered.
2425 */
2426 ndev->priv_destructor = ipoib_intf_free;
2427
2428 if (ipoib_cm_add_mode_attr(ndev))
2324 goto sysfs_failed; 2429 goto sysfs_failed;
2325 if (ipoib_add_pkey_attr(priv->dev)) 2430 if (ipoib_add_pkey_attr(ndev))
2326 goto sysfs_failed; 2431 goto sysfs_failed;
2327 if (ipoib_add_umcast_attr(priv->dev)) 2432 if (ipoib_add_umcast_attr(ndev))
2328 goto sysfs_failed; 2433 goto sysfs_failed;
2329 if (device_create_file(&priv->dev->dev, &dev_attr_create_child)) 2434 if (device_create_file(&ndev->dev, &dev_attr_create_child))
2330 goto sysfs_failed; 2435 goto sysfs_failed;
2331 if (device_create_file(&priv->dev->dev, &dev_attr_delete_child)) 2436 if (device_create_file(&ndev->dev, &dev_attr_delete_child))
2332 goto sysfs_failed; 2437 goto sysfs_failed;
2333 2438
2334 return priv->dev; 2439 return ndev;
2335 2440
2336sysfs_failed: 2441sysfs_failed:
2337 unregister_netdev(priv->dev); 2442 ipoib_parent_unregister_pre(ndev);
2338 2443 unregister_netdev(ndev);
2339register_failed: 2444 return ERR_PTR(-ENOMEM);
2340 ib_unregister_event_handler(&priv->event_handler);
2341 flush_workqueue(ipoib_workqueue);
2342 /* Stop GC if started before flush */
2343 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
2344 cancel_delayed_work(&priv->neigh_reap_task);
2345 flush_workqueue(priv->wq);
2346 ipoib_dev_cleanup(priv->dev);
2347
2348device_init_failed:
2349 rn = netdev_priv(priv->dev);
2350 rn->free_rdma_netdev(priv->dev);
2351 kfree(priv);
2352
2353alloc_mem_failed:
2354 return ERR_PTR(result);
2355} 2445}
2356 2446
2357static void ipoib_add_one(struct ib_device *device) 2447static void ipoib_add_one(struct ib_device *device)
@@ -2362,7 +2452,7 @@ static void ipoib_add_one(struct ib_device *device)
2362 int p; 2452 int p;
2363 int count = 0; 2453 int count = 0;
2364 2454
2365 dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); 2455 dev_list = kmalloc(sizeof(*dev_list), GFP_KERNEL);
2366 if (!dev_list) 2456 if (!dev_list)
2367 return; 2457 return;
2368 2458
@@ -2396,39 +2486,18 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
2396 return; 2486 return;
2397 2487
2398 list_for_each_entry_safe(priv, tmp, dev_list, list) { 2488 list_for_each_entry_safe(priv, tmp, dev_list, list) {
2399 struct rdma_netdev *parent_rn = netdev_priv(priv->dev); 2489 LIST_HEAD(head);
2400 2490 ipoib_parent_unregister_pre(priv->dev);
2401 ib_unregister_event_handler(&priv->event_handler);
2402 flush_workqueue(ipoib_workqueue);
2403
2404 /* mark interface in the middle of destruction */
2405 set_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags);
2406 2491
2407 rtnl_lock(); 2492 rtnl_lock();
2408 dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
2409 rtnl_unlock();
2410
2411 /* Stop GC */
2412 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
2413 cancel_delayed_work(&priv->neigh_reap_task);
2414 flush_workqueue(priv->wq);
2415
2416 /* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */
2417 mutex_lock(&priv->sysfs_mutex);
2418 unregister_netdev(priv->dev);
2419 mutex_unlock(&priv->sysfs_mutex);
2420
2421 parent_rn->free_rdma_netdev(priv->dev);
2422 2493
2423 list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { 2494 list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs,
2424 struct rdma_netdev *child_rn; 2495 list)
2496 unregister_netdevice_queue(cpriv->dev, &head);
2497 unregister_netdevice_queue(priv->dev, &head);
2498 unregister_netdevice_many(&head);
2425 2499
2426 child_rn = netdev_priv(cpriv->dev); 2500 rtnl_unlock();
2427 child_rn->free_rdma_netdev(cpriv->dev);
2428 kfree(cpriv);
2429 }
2430
2431 kfree(priv);
2432 } 2501 }
2433 2502
2434 kfree(dev_list); 2503 kfree(dev_list);
@@ -2476,8 +2545,7 @@ static int __init ipoib_init_module(void)
2476 * its private workqueue, and we only queue up flush events 2545 * its private workqueue, and we only queue up flush events
2477 * on our global flush workqueue. This avoids the deadlocks. 2546 * on our global flush workqueue. This avoids the deadlocks.
2478 */ 2547 */
2479 ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush", 2548 ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush", 0);
2480 WQ_MEM_RECLAIM);
2481 if (!ipoib_workqueue) { 2549 if (!ipoib_workqueue) {
2482 ret = -ENOMEM; 2550 ret = -ENOMEM;
2483 goto err_fs; 2551 goto err_fs;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 6709328d90f8..b9e9562f5034 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -140,7 +140,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
140{ 140{
141 struct ipoib_mcast *mcast; 141 struct ipoib_mcast *mcast;
142 142
143 mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC); 143 mcast = kzalloc(sizeof(*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC);
144 if (!mcast) 144 if (!mcast)
145 return NULL; 145 return NULL;
146 146
@@ -822,6 +822,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
822 if (neigh && list_empty(&neigh->list)) { 822 if (neigh && list_empty(&neigh->list)) {
823 kref_get(&mcast->ah->ref); 823 kref_get(&mcast->ah->ref);
824 neigh->ah = mcast->ah; 824 neigh->ah = mcast->ah;
825 neigh->ah->valid = 1;
825 list_add_tail(&neigh->list, &mcast->neigh_list); 826 list_add_tail(&neigh->list, &mcast->neigh_list);
826 } 827 }
827 } 828 }
@@ -917,7 +918,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
917 if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast)) 918 if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast))
918 continue; 919 continue;
919 920
920 memcpy(mgid.raw, ha->addr + 4, sizeof mgid); 921 memcpy(mgid.raw, ha->addr + 4, sizeof(mgid));
921 922
922 mcast = __ipoib_mcast_find(dev, &mgid); 923 mcast = __ipoib_mcast_find(dev, &mgid);
923 if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 924 if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -997,7 +998,7 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
997{ 998{
998 struct ipoib_mcast_iter *iter; 999 struct ipoib_mcast_iter *iter;
999 1000
1000 iter = kmalloc(sizeof *iter, GFP_KERNEL); 1001 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
1001 if (!iter) 1002 if (!iter)
1002 return NULL; 1003 return NULL;
1003 1004
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index 3e44087935ae..d4d553a51fa9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -122,15 +122,6 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
122 } else 122 } else
123 child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]); 123 child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]);
124 124
125 if (child_pkey == 0 || child_pkey == 0x8000)
126 return -EINVAL;
127
128 /*
129 * Set the full membership bit, so that we join the right
130 * broadcast group, etc.
131 */
132 child_pkey |= 0x8000;
133
134 err = __ipoib_vlan_add(ppriv, ipoib_priv(dev), 125 err = __ipoib_vlan_add(ppriv, ipoib_priv(dev),
135 child_pkey, IPOIB_RTNL_CHILD); 126 child_pkey, IPOIB_RTNL_CHILD);
136 127
@@ -139,19 +130,6 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
139 return err; 130 return err;
140} 131}
141 132
142static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head *head)
143{
144 struct ipoib_dev_priv *priv, *ppriv;
145
146 priv = ipoib_priv(dev);
147 ppriv = ipoib_priv(priv->parent);
148
149 down_write(&ppriv->vlan_rwsem);
150 unregister_netdevice_queue(dev, head);
151 list_del(&priv->list);
152 up_write(&ppriv->vlan_rwsem);
153}
154
155static size_t ipoib_get_size(const struct net_device *dev) 133static size_t ipoib_get_size(const struct net_device *dev)
156{ 134{
157 return nla_total_size(2) + /* IFLA_IPOIB_PKEY */ 135 return nla_total_size(2) + /* IFLA_IPOIB_PKEY */
@@ -167,7 +145,6 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
167 .setup = ipoib_setup_common, 145 .setup = ipoib_setup_common,
168 .newlink = ipoib_new_child_link, 146 .newlink = ipoib_new_child_link,
169 .changelink = ipoib_changelink, 147 .changelink = ipoib_changelink,
170 .dellink = ipoib_unregister_child_dev,
171 .get_size = ipoib_get_size, 148 .get_size = ipoib_get_size,
172 .fill_info = ipoib_fill_info, 149 .fill_info = ipoib_fill_info,
173}; 150};
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 984a88096f39..9f36ca786df8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -52,7 +52,7 @@ int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca,
52 52
53 if (set_qkey) { 53 if (set_qkey) {
54 ret = -ENOMEM; 54 ret = -ENOMEM;
55 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); 55 qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL);
56 if (!qp_attr) 56 if (!qp_attr)
57 goto out; 57 goto out;
58 58
@@ -147,7 +147,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
147 .cap = { 147 .cap = {
148 .max_send_wr = ipoib_sendq_size, 148 .max_send_wr = ipoib_sendq_size,
149 .max_recv_wr = ipoib_recvq_size, 149 .max_recv_wr = ipoib_recvq_size,
150 .max_send_sge = min_t(u32, priv->ca->attrs.max_sge, 150 .max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge,
151 MAX_SKB_FRAGS + 1), 151 MAX_SKB_FRAGS + 1),
152 .max_recv_sge = IPOIB_UD_RX_SG 152 .max_recv_sge = IPOIB_UD_RX_SG
153 }, 153 },
@@ -168,8 +168,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
168 else 168 else
169 size += ipoib_recvq_size * ipoib_max_conn_qp; 169 size += ipoib_recvq_size * ipoib_max_conn_qp;
170 } else 170 } else
171 if (ret != -ENOSYS) 171 if (ret != -EOPNOTSUPP)
172 return -ENODEV; 172 return ret;
173 173
174 req_vec = (priv->port - 1) * 2; 174 req_vec = (priv->port - 1) * 2;
175 175
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 55a9b71ed05a..341753fbda54 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -50,68 +50,112 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr,
50} 50}
51static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); 51static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL);
52 52
53static bool is_child_unique(struct ipoib_dev_priv *ppriv,
54 struct ipoib_dev_priv *priv)
55{
56 struct ipoib_dev_priv *tpriv;
57
58 ASSERT_RTNL();
59
60 /*
61 * Since the legacy sysfs interface uses pkey for deletion it cannot
62 * support more than one interface with the same pkey, it creates
63 * ambiguity. The RTNL interface deletes using the netdev so it does
64 * not have a problem to support duplicated pkeys.
65 */
66 if (priv->child_type != IPOIB_LEGACY_CHILD)
67 return true;
68
69 /*
70 * First ensure this isn't a duplicate. We check the parent device and
71 * then all of the legacy child interfaces to make sure the Pkey
72 * doesn't match.
73 */
74 if (ppriv->pkey == priv->pkey)
75 return false;
76
77 list_for_each_entry(tpriv, &ppriv->child_intfs, list) {
78 if (tpriv->pkey == priv->pkey &&
79 tpriv->child_type == IPOIB_LEGACY_CHILD)
80 return false;
81 }
82
83 return true;
84}
85
86/*
87 * NOTE: If this function fails then the priv->dev will remain valid, however
88 * priv can have been freed and must not be touched by caller in the error
89 * case.
90 *
91 * If (ndev->reg_state == NETREG_UNINITIALIZED) then it is up to the caller to
92 * free the net_device (just as rtnl_newlink does) otherwise the net_device
93 * will be freed when the rtnl is unlocked.
94 */
53int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, 95int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
54 u16 pkey, int type) 96 u16 pkey, int type)
55{ 97{
98 struct net_device *ndev = priv->dev;
56 int result; 99 int result;
57 100
58 priv->max_ib_mtu = ppriv->max_ib_mtu; 101 ASSERT_RTNL();
59 /* MTU will be reset when mcast join happens */ 102
60 priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); 103 /*
61 priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; 104 * Racing with unregister of the parent must be prevented by the
62 priv->parent = ppriv->dev; 105 * caller.
63 set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); 106 */
107 WARN_ON(ppriv->dev->reg_state != NETREG_REGISTERED);
64 108
65 ipoib_set_dev_features(priv, ppriv->ca); 109 if (pkey == 0 || pkey == 0x8000) {
110 result = -EINVAL;
111 goto out_early;
112 }
66 113
114 priv->parent = ppriv->dev;
67 priv->pkey = pkey; 115 priv->pkey = pkey;
116 priv->child_type = type;
68 117
69 memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); 118 if (!is_child_unique(ppriv, priv)) {
70 memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid)); 119 result = -ENOTUNIQ;
71 set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); 120 goto out_early;
72 priv->dev->broadcast[8] = pkey >> 8;
73 priv->dev->broadcast[9] = pkey & 0xff;
74
75 result = ipoib_dev_init(priv->dev, ppriv->ca, ppriv->port);
76 if (result < 0) {
77 ipoib_warn(ppriv, "failed to initialize subinterface: "
78 "device %s, port %d",
79 ppriv->ca->name, ppriv->port);
80 goto err;
81 } 121 }
82 122
83 result = register_netdevice(priv->dev); 123 /* We do not need to touch priv if register_netdevice fails */
124 ndev->priv_destructor = ipoib_intf_free;
125
126 result = register_netdevice(ndev);
84 if (result) { 127 if (result) {
85 ipoib_warn(priv, "failed to initialize; error %i", result); 128 ipoib_warn(priv, "failed to initialize; error %i", result);
86 goto register_failed; 129
130 /*
131 * register_netdevice sometimes calls priv_destructor,
132 * sometimes not. Make sure it was done.
133 */
134 goto out_early;
87 } 135 }
88 136
89 /* RTNL childs don't need proprietary sysfs entries */ 137 /* RTNL childs don't need proprietary sysfs entries */
90 if (type == IPOIB_LEGACY_CHILD) { 138 if (type == IPOIB_LEGACY_CHILD) {
91 if (ipoib_cm_add_mode_attr(priv->dev)) 139 if (ipoib_cm_add_mode_attr(ndev))
92 goto sysfs_failed; 140 goto sysfs_failed;
93 if (ipoib_add_pkey_attr(priv->dev)) 141 if (ipoib_add_pkey_attr(ndev))
94 goto sysfs_failed; 142 goto sysfs_failed;
95 if (ipoib_add_umcast_attr(priv->dev)) 143 if (ipoib_add_umcast_attr(ndev))
96 goto sysfs_failed; 144 goto sysfs_failed;
97 145
98 if (device_create_file(&priv->dev->dev, &dev_attr_parent)) 146 if (device_create_file(&ndev->dev, &dev_attr_parent))
99 goto sysfs_failed; 147 goto sysfs_failed;
100 } 148 }
101 149
102 priv->child_type = type;
103 list_add_tail(&priv->list, &ppriv->child_intfs);
104
105 return 0; 150 return 0;
106 151
107sysfs_failed: 152sysfs_failed:
108 result = -ENOMEM;
109 unregister_netdevice(priv->dev); 153 unregister_netdevice(priv->dev);
154 return -ENOMEM;
110 155
111register_failed: 156out_early:
112 ipoib_dev_cleanup(priv->dev); 157 if (ndev->priv_destructor)
113 158 ndev->priv_destructor(ndev);
114err:
115 return result; 159 return result;
116} 160}
117 161
@@ -119,129 +163,124 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
119{ 163{
120 struct ipoib_dev_priv *ppriv, *priv; 164 struct ipoib_dev_priv *ppriv, *priv;
121 char intf_name[IFNAMSIZ]; 165 char intf_name[IFNAMSIZ];
122 struct ipoib_dev_priv *tpriv; 166 struct net_device *ndev;
123 int result; 167 int result;
124 168
125 if (!capable(CAP_NET_ADMIN)) 169 if (!capable(CAP_NET_ADMIN))
126 return -EPERM; 170 return -EPERM;
127 171
128 ppriv = ipoib_priv(pdev); 172 if (!rtnl_trylock())
129
130 if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
131 return -EPERM;
132
133 snprintf(intf_name, sizeof intf_name, "%s.%04x",
134 ppriv->dev->name, pkey);
135
136 if (!mutex_trylock(&ppriv->sysfs_mutex))
137 return restart_syscall(); 173 return restart_syscall();
138 174
139 if (!rtnl_trylock()) { 175 if (pdev->reg_state != NETREG_REGISTERED) {
140 mutex_unlock(&ppriv->sysfs_mutex);
141 return restart_syscall();
142 }
143
144 if (!down_write_trylock(&ppriv->vlan_rwsem)) {
145 rtnl_unlock(); 176 rtnl_unlock();
146 mutex_unlock(&ppriv->sysfs_mutex); 177 return -EPERM;
147 return restart_syscall();
148 } 178 }
149 179
180 ppriv = ipoib_priv(pdev);
181
182 snprintf(intf_name, sizeof(intf_name), "%s.%04x",
183 ppriv->dev->name, pkey);
184
150 priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name); 185 priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
151 if (!priv) { 186 if (!priv) {
152 result = -ENOMEM; 187 result = -ENOMEM;
153 goto out; 188 goto out;
154 } 189 }
155 190 ndev = priv->dev;
156 /*
157 * First ensure this isn't a duplicate. We check the parent device and
158 * then all of the legacy child interfaces to make sure the Pkey
159 * doesn't match.
160 */
161 if (ppriv->pkey == pkey) {
162 result = -ENOTUNIQ;
163 goto out;
164 }
165
166 list_for_each_entry(tpriv, &ppriv->child_intfs, list) {
167 if (tpriv->pkey == pkey &&
168 tpriv->child_type == IPOIB_LEGACY_CHILD) {
169 result = -ENOTUNIQ;
170 goto out;
171 }
172 }
173 191
174 result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD); 192 result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
175 193
194 if (result && ndev->reg_state == NETREG_UNINITIALIZED)
195 free_netdev(ndev);
196
176out: 197out:
177 up_write(&ppriv->vlan_rwsem);
178 rtnl_unlock(); 198 rtnl_unlock();
179 mutex_unlock(&ppriv->sysfs_mutex);
180 199
181 if (result && priv) { 200 return result;
182 struct rdma_netdev *rn; 201}
202
203struct ipoib_vlan_delete_work {
204 struct work_struct work;
205 struct net_device *dev;
206};
207
208/*
209 * sysfs callbacks of a netdevice cannot obtain the rtnl lock as
210 * unregister_netdev ultimately deletes the sysfs files while holding the rtnl
211 * lock. This deadlocks the system.
212 *
213 * A callback can use rtnl_trylock to avoid the deadlock but it cannot call
214 * unregister_netdev as that internally takes and releases the rtnl_lock. So
215 * instead we find the netdev to unregister and then do the actual unregister
216 * from the global work queue where we can obtain the rtnl_lock safely.
217 */
218static void ipoib_vlan_delete_task(struct work_struct *work)
219{
220 struct ipoib_vlan_delete_work *pwork =
221 container_of(work, struct ipoib_vlan_delete_work, work);
222 struct net_device *dev = pwork->dev;
223
224 rtnl_lock();
225
226 /* Unregistering tasks can race with another task or parent removal */
227 if (dev->reg_state == NETREG_REGISTERED) {
228 struct ipoib_dev_priv *priv = ipoib_priv(dev);
229 struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
183 230
184 rn = netdev_priv(priv->dev); 231 ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name);
185 rn->free_rdma_netdev(priv->dev); 232 unregister_netdevice(dev);
186 kfree(priv);
187 } 233 }
188 234
189 return result; 235 rtnl_unlock();
236
237 kfree(pwork);
190} 238}
191 239
192int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) 240int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
193{ 241{
194 struct ipoib_dev_priv *ppriv, *priv, *tpriv; 242 struct ipoib_dev_priv *ppriv, *priv, *tpriv;
195 struct net_device *dev = NULL; 243 int rc;
196 244
197 if (!capable(CAP_NET_ADMIN)) 245 if (!capable(CAP_NET_ADMIN))
198 return -EPERM; 246 return -EPERM;
199 247
200 ppriv = ipoib_priv(pdev); 248 if (!rtnl_trylock())
201
202 if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
203 return -EPERM;
204
205 if (!mutex_trylock(&ppriv->sysfs_mutex))
206 return restart_syscall(); 249 return restart_syscall();
207 250
208 if (!rtnl_trylock()) { 251 if (pdev->reg_state != NETREG_REGISTERED) {
209 mutex_unlock(&ppriv->sysfs_mutex);
210 return restart_syscall();
211 }
212
213 if (!down_write_trylock(&ppriv->vlan_rwsem)) {
214 rtnl_unlock(); 252 rtnl_unlock();
215 mutex_unlock(&ppriv->sysfs_mutex); 253 return -EPERM;
216 return restart_syscall();
217 } 254 }
218 255
256 ppriv = ipoib_priv(pdev);
257
258 rc = -ENODEV;
219 list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { 259 list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
220 if (priv->pkey == pkey && 260 if (priv->pkey == pkey &&
221 priv->child_type == IPOIB_LEGACY_CHILD) { 261 priv->child_type == IPOIB_LEGACY_CHILD) {
222 list_del(&priv->list); 262 struct ipoib_vlan_delete_work *work;
223 dev = priv->dev; 263
264 work = kmalloc(sizeof(*work), GFP_KERNEL);
265 if (!work) {
266 rc = -ENOMEM;
267 goto out;
268 }
269
270 down_write(&ppriv->vlan_rwsem);
271 list_del_init(&priv->list);
272 up_write(&ppriv->vlan_rwsem);
273 work->dev = priv->dev;
274 INIT_WORK(&work->work, ipoib_vlan_delete_task);
275 queue_work(ipoib_workqueue, &work->work);
276
277 rc = 0;
224 break; 278 break;
225 } 279 }
226 } 280 }
227 up_write(&ppriv->vlan_rwsem);
228
229 if (dev) {
230 ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name);
231 unregister_netdevice(dev);
232 }
233 281
282out:
234 rtnl_unlock(); 283 rtnl_unlock();
235 mutex_unlock(&ppriv->sysfs_mutex);
236
237 if (dev) {
238 struct rdma_netdev *rn;
239
240 rn = netdev_priv(dev);
241 rn->free_rdma_netdev(priv->dev);
242 kfree(priv);
243 return 0;
244 }
245 284
246 return -ENODEV; 285 return rc;
247} 286}
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 9a6434c31db2..3fecd87c9f2b 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -610,12 +610,10 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
610 uint32_t initial_cmdsn) 610 uint32_t initial_cmdsn)
611{ 611{
612 struct iscsi_cls_session *cls_session; 612 struct iscsi_cls_session *cls_session;
613 struct iscsi_session *session;
614 struct Scsi_Host *shost; 613 struct Scsi_Host *shost;
615 struct iser_conn *iser_conn = NULL; 614 struct iser_conn *iser_conn = NULL;
616 struct ib_conn *ib_conn; 615 struct ib_conn *ib_conn;
617 u32 max_fr_sectors; 616 u32 max_fr_sectors;
618 u16 max_cmds;
619 617
620 shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0); 618 shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
621 if (!shost) 619 if (!shost)
@@ -633,8 +631,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
633 */ 631 */
634 if (ep) { 632 if (ep) {
635 iser_conn = ep->dd_data; 633 iser_conn = ep->dd_data;
636 max_cmds = iser_conn->max_cmds;
637 shost->sg_tablesize = iser_conn->scsi_sg_tablesize; 634 shost->sg_tablesize = iser_conn->scsi_sg_tablesize;
635 shost->can_queue = min_t(u16, cmds_max, iser_conn->max_cmds);
638 636
639 mutex_lock(&iser_conn->state_mutex); 637 mutex_lock(&iser_conn->state_mutex);
640 if (iser_conn->state != ISER_CONN_UP) { 638 if (iser_conn->state != ISER_CONN_UP) {
@@ -660,7 +658,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
660 } 658 }
661 mutex_unlock(&iser_conn->state_mutex); 659 mutex_unlock(&iser_conn->state_mutex);
662 } else { 660 } else {
663 max_cmds = ISER_DEF_XMIT_CMDS_MAX; 661 shost->can_queue = min_t(u16, cmds_max, ISER_DEF_XMIT_CMDS_MAX);
664 if (iscsi_host_add(shost, NULL)) 662 if (iscsi_host_add(shost, NULL))
665 goto free_host; 663 goto free_host;
666 } 664 }
@@ -676,21 +674,13 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
676 iser_warn("max_sectors was reduced from %u to %u\n", 674 iser_warn("max_sectors was reduced from %u to %u\n",
677 iser_max_sectors, shost->max_sectors); 675 iser_max_sectors, shost->max_sectors);
678 676
679 if (cmds_max > max_cmds) {
680 iser_info("cmds_max changed from %u to %u\n",
681 cmds_max, max_cmds);
682 cmds_max = max_cmds;
683 }
684
685 cls_session = iscsi_session_setup(&iscsi_iser_transport, shost, 677 cls_session = iscsi_session_setup(&iscsi_iser_transport, shost,
686 cmds_max, 0, 678 shost->can_queue, 0,
687 sizeof(struct iscsi_iser_task), 679 sizeof(struct iscsi_iser_task),
688 initial_cmdsn, 0); 680 initial_cmdsn, 0);
689 if (!cls_session) 681 if (!cls_session)
690 goto remove_host; 682 goto remove_host;
691 session = cls_session->dd_data;
692 683
693 shost->can_queue = session->scsi_cmds_max;
694 return cls_session; 684 return cls_session;
695 685
696remove_host: 686remove_host:
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 130bf163f066..009be8889d71 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -405,7 +405,8 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
405 405
406 ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); 406 ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
407 407
408 wr = sig_handover_wr(iser_tx_next_wr(tx_desc)); 408 wr = container_of(iser_tx_next_wr(tx_desc), struct ib_sig_handover_wr,
409 wr);
409 wr->wr.opcode = IB_WR_REG_SIG_MR; 410 wr->wr.opcode = IB_WR_REG_SIG_MR;
410 wr->wr.wr_cqe = cqe; 411 wr->wr.wr_cqe = cqe;
411 wr->wr.sg_list = &data_reg->sge; 412 wr->wr.sg_list = &data_reg->sge;
@@ -457,7 +458,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
457 return n < 0 ? n : -EINVAL; 458 return n < 0 ? n : -EINVAL;
458 } 459 }
459 460
460 wr = reg_wr(iser_tx_next_wr(tx_desc)); 461 wr = container_of(iser_tx_next_wr(tx_desc), struct ib_reg_wr, wr);
461 wr->wr.opcode = IB_WR_REG_MR; 462 wr->wr.opcode = IB_WR_REG_MR;
462 wr->wr.wr_cqe = cqe; 463 wr->wr.wr_cqe = cqe;
463 wr->wr.send_flags = 0; 464 wr->wr.send_flags = 0;
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 616d978cbf2b..b686a4aaffe8 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -1022,7 +1022,7 @@ int iser_post_recvl(struct iser_conn *iser_conn)
1022{ 1022{
1023 struct ib_conn *ib_conn = &iser_conn->ib_conn; 1023 struct ib_conn *ib_conn = &iser_conn->ib_conn;
1024 struct iser_login_desc *desc = &iser_conn->login_desc; 1024 struct iser_login_desc *desc = &iser_conn->login_desc;
1025 struct ib_recv_wr wr, *wr_failed; 1025 struct ib_recv_wr wr;
1026 int ib_ret; 1026 int ib_ret;
1027 1027
1028 desc->sge.addr = desc->rsp_dma; 1028 desc->sge.addr = desc->rsp_dma;
@@ -1036,7 +1036,7 @@ int iser_post_recvl(struct iser_conn *iser_conn)
1036 wr.next = NULL; 1036 wr.next = NULL;
1037 1037
1038 ib_conn->post_recv_buf_count++; 1038 ib_conn->post_recv_buf_count++;
1039 ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed); 1039 ib_ret = ib_post_recv(ib_conn->qp, &wr, NULL);
1040 if (ib_ret) { 1040 if (ib_ret) {
1041 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 1041 iser_err("ib_post_recv failed ret=%d\n", ib_ret);
1042 ib_conn->post_recv_buf_count--; 1042 ib_conn->post_recv_buf_count--;
@@ -1050,7 +1050,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count)
1050 struct ib_conn *ib_conn = &iser_conn->ib_conn; 1050 struct ib_conn *ib_conn = &iser_conn->ib_conn;
1051 unsigned int my_rx_head = iser_conn->rx_desc_head; 1051 unsigned int my_rx_head = iser_conn->rx_desc_head;
1052 struct iser_rx_desc *rx_desc; 1052 struct iser_rx_desc *rx_desc;
1053 struct ib_recv_wr *wr, *wr_failed; 1053 struct ib_recv_wr *wr;
1054 int i, ib_ret; 1054 int i, ib_ret;
1055 1055
1056 for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) { 1056 for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) {
@@ -1067,7 +1067,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count)
1067 wr->next = NULL; /* mark end of work requests list */ 1067 wr->next = NULL; /* mark end of work requests list */
1068 1068
1069 ib_conn->post_recv_buf_count += count; 1069 ib_conn->post_recv_buf_count += count;
1070 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &wr_failed); 1070 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, NULL);
1071 if (ib_ret) { 1071 if (ib_ret) {
1072 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 1072 iser_err("ib_post_recv failed ret=%d\n", ib_ret);
1073 ib_conn->post_recv_buf_count -= count; 1073 ib_conn->post_recv_buf_count -= count;
@@ -1086,7 +1086,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count)
1086int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, 1086int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
1087 bool signal) 1087 bool signal)
1088{ 1088{
1089 struct ib_send_wr *bad_wr, *wr = iser_tx_next_wr(tx_desc); 1089 struct ib_send_wr *wr = iser_tx_next_wr(tx_desc);
1090 int ib_ret; 1090 int ib_ret;
1091 1091
1092 ib_dma_sync_single_for_device(ib_conn->device->ib_device, 1092 ib_dma_sync_single_for_device(ib_conn->device->ib_device,
@@ -1100,10 +1100,10 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
1100 wr->opcode = IB_WR_SEND; 1100 wr->opcode = IB_WR_SEND;
1101 wr->send_flags = signal ? IB_SEND_SIGNALED : 0; 1101 wr->send_flags = signal ? IB_SEND_SIGNALED : 0;
1102 1102
1103 ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0].send, &bad_wr); 1103 ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0].send, NULL);
1104 if (ib_ret) 1104 if (ib_ret)
1105 iser_err("ib_post_send failed, ret:%d opcode:%d\n", 1105 iser_err("ib_post_send failed, ret:%d opcode:%d\n",
1106 ib_ret, bad_wr->opcode); 1106 ib_ret, wr->opcode);
1107 1107
1108 return ib_ret; 1108 return ib_ret;
1109} 1109}
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index cccbcf0eb035..f39670c5c25c 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -136,7 +136,7 @@ isert_create_qp(struct isert_conn *isert_conn,
136 attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1; 136 attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1;
137 attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; 137 attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
138 attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX; 138 attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX;
139 attr.cap.max_send_sge = device->ib_device->attrs.max_sge; 139 attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge;
140 attr.cap.max_recv_sge = 1; 140 attr.cap.max_recv_sge = 1;
141 attr.sq_sig_type = IB_SIGNAL_REQ_WR; 141 attr.sq_sig_type = IB_SIGNAL_REQ_WR;
142 attr.qp_type = IB_QPT_RC; 142 attr.qp_type = IB_QPT_RC;
@@ -299,7 +299,8 @@ isert_create_device_ib_res(struct isert_device *device)
299 struct ib_device *ib_dev = device->ib_device; 299 struct ib_device *ib_dev = device->ib_device;
300 int ret; 300 int ret;
301 301
302 isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge); 302 isert_dbg("devattr->max_send_sge: %d devattr->max_recv_sge %d\n",
303 ib_dev->attrs.max_send_sge, ib_dev->attrs.max_recv_sge);
303 isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd); 304 isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd);
304 305
305 ret = isert_alloc_comps(device); 306 ret = isert_alloc_comps(device);
@@ -809,7 +810,7 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
809static int 810static int
810isert_post_recvm(struct isert_conn *isert_conn, u32 count) 811isert_post_recvm(struct isert_conn *isert_conn, u32 count)
811{ 812{
812 struct ib_recv_wr *rx_wr, *rx_wr_failed; 813 struct ib_recv_wr *rx_wr;
813 int i, ret; 814 int i, ret;
814 struct iser_rx_desc *rx_desc; 815 struct iser_rx_desc *rx_desc;
815 816
@@ -825,8 +826,7 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count)
825 rx_wr--; 826 rx_wr--;
826 rx_wr->next = NULL; /* mark end of work requests list */ 827 rx_wr->next = NULL; /* mark end of work requests list */
827 828
828 ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr, 829 ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr, NULL);
829 &rx_wr_failed);
830 if (ret) 830 if (ret)
831 isert_err("ib_post_recv() failed with ret: %d\n", ret); 831 isert_err("ib_post_recv() failed with ret: %d\n", ret);
832 832
@@ -836,7 +836,7 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count)
836static int 836static int
837isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc) 837isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc)
838{ 838{
839 struct ib_recv_wr *rx_wr_failed, rx_wr; 839 struct ib_recv_wr rx_wr;
840 int ret; 840 int ret;
841 841
842 if (!rx_desc->in_use) { 842 if (!rx_desc->in_use) {
@@ -853,7 +853,7 @@ isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc)
853 rx_wr.num_sge = 1; 853 rx_wr.num_sge = 1;
854 rx_wr.next = NULL; 854 rx_wr.next = NULL;
855 855
856 ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_failed); 856 ret = ib_post_recv(isert_conn->qp, &rx_wr, NULL);
857 if (ret) 857 if (ret)
858 isert_err("ib_post_recv() failed with ret: %d\n", ret); 858 isert_err("ib_post_recv() failed with ret: %d\n", ret);
859 859
@@ -864,7 +864,7 @@ static int
864isert_login_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_desc) 864isert_login_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_desc)
865{ 865{
866 struct ib_device *ib_dev = isert_conn->cm_id->device; 866 struct ib_device *ib_dev = isert_conn->cm_id->device;
867 struct ib_send_wr send_wr, *send_wr_failed; 867 struct ib_send_wr send_wr;
868 int ret; 868 int ret;
869 869
870 ib_dma_sync_single_for_device(ib_dev, tx_desc->dma_addr, 870 ib_dma_sync_single_for_device(ib_dev, tx_desc->dma_addr,
@@ -879,7 +879,7 @@ isert_login_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_des
879 send_wr.opcode = IB_WR_SEND; 879 send_wr.opcode = IB_WR_SEND;
880 send_wr.send_flags = IB_SEND_SIGNALED; 880 send_wr.send_flags = IB_SEND_SIGNALED;
881 881
882 ret = ib_post_send(isert_conn->qp, &send_wr, &send_wr_failed); 882 ret = ib_post_send(isert_conn->qp, &send_wr, NULL);
883 if (ret) 883 if (ret)
884 isert_err("ib_post_send() failed, ret: %d\n", ret); 884 isert_err("ib_post_send() failed, ret: %d\n", ret);
885 885
@@ -967,7 +967,7 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
967static int 967static int
968isert_login_post_recv(struct isert_conn *isert_conn) 968isert_login_post_recv(struct isert_conn *isert_conn)
969{ 969{
970 struct ib_recv_wr rx_wr, *rx_wr_fail; 970 struct ib_recv_wr rx_wr;
971 struct ib_sge sge; 971 struct ib_sge sge;
972 int ret; 972 int ret;
973 973
@@ -986,7 +986,7 @@ isert_login_post_recv(struct isert_conn *isert_conn)
986 rx_wr.sg_list = &sge; 986 rx_wr.sg_list = &sge;
987 rx_wr.num_sge = 1; 987 rx_wr.num_sge = 1;
988 988
989 ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_fail); 989 ret = ib_post_recv(isert_conn->qp, &rx_wr, NULL);
990 if (ret) 990 if (ret)
991 isert_err("ib_post_recv() failed: %d\n", ret); 991 isert_err("ib_post_recv() failed: %d\n", ret);
992 992
@@ -1829,7 +1829,6 @@ isert_send_done(struct ib_cq *cq, struct ib_wc *wc)
1829static int 1829static int
1830isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd) 1830isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd)
1831{ 1831{
1832 struct ib_send_wr *wr_failed;
1833 int ret; 1832 int ret;
1834 1833
1835 ret = isert_post_recv(isert_conn, isert_cmd->rx_desc); 1834 ret = isert_post_recv(isert_conn, isert_cmd->rx_desc);
@@ -1838,8 +1837,7 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd)
1838 return ret; 1837 return ret;
1839 } 1838 }
1840 1839
1841 ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr, 1840 ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr, NULL);
1842 &wr_failed);
1843 if (ret) { 1841 if (ret) {
1844 isert_err("ib_post_send failed with %d\n", ret); 1842 isert_err("ib_post_send failed with %d\n", ret);
1845 return ret; 1843 return ret;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 9786b24b956f..444d16520506 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -57,13 +57,10 @@
57 57
58#define DRV_NAME "ib_srp" 58#define DRV_NAME "ib_srp"
59#define PFX DRV_NAME ": " 59#define PFX DRV_NAME ": "
60#define DRV_VERSION "2.0"
61#define DRV_RELDATE "July 26, 2015"
62 60
63MODULE_AUTHOR("Roland Dreier"); 61MODULE_AUTHOR("Roland Dreier");
64MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator"); 62MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
65MODULE_LICENSE("Dual BSD/GPL"); 63MODULE_LICENSE("Dual BSD/GPL");
66MODULE_INFO(release_date, DRV_RELDATE);
67 64
68#if !defined(CONFIG_DYNAMIC_DEBUG) 65#if !defined(CONFIG_DYNAMIC_DEBUG)
69#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) 66#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
@@ -145,7 +142,8 @@ static void srp_remove_one(struct ib_device *device, void *client_data);
145static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc); 142static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
146static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, 143static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
147 const char *opname); 144 const char *opname);
148static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); 145static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
146 const struct ib_cm_event *event);
149static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id, 147static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
150 struct rdma_cm_event *event); 148 struct rdma_cm_event *event);
151 149
@@ -1211,7 +1209,6 @@ static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1211static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch, 1209static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1212 u32 rkey) 1210 u32 rkey)
1213{ 1211{
1214 struct ib_send_wr *bad_wr;
1215 struct ib_send_wr wr = { 1212 struct ib_send_wr wr = {
1216 .opcode = IB_WR_LOCAL_INV, 1213 .opcode = IB_WR_LOCAL_INV,
1217 .next = NULL, 1214 .next = NULL,
@@ -1222,7 +1219,7 @@ static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1222 1219
1223 wr.wr_cqe = &req->reg_cqe; 1220 wr.wr_cqe = &req->reg_cqe;
1224 req->reg_cqe.done = srp_inv_rkey_err_done; 1221 req->reg_cqe.done = srp_inv_rkey_err_done;
1225 return ib_post_send(ch->qp, &wr, &bad_wr); 1222 return ib_post_send(ch->qp, &wr, NULL);
1226} 1223}
1227 1224
1228static void srp_unmap_data(struct scsi_cmnd *scmnd, 1225static void srp_unmap_data(struct scsi_cmnd *scmnd,
@@ -1503,7 +1500,6 @@ static int srp_map_finish_fr(struct srp_map_state *state,
1503{ 1500{
1504 struct srp_target_port *target = ch->target; 1501 struct srp_target_port *target = ch->target;
1505 struct srp_device *dev = target->srp_host->srp_dev; 1502 struct srp_device *dev = target->srp_host->srp_dev;
1506 struct ib_send_wr *bad_wr;
1507 struct ib_reg_wr wr; 1503 struct ib_reg_wr wr;
1508 struct srp_fr_desc *desc; 1504 struct srp_fr_desc *desc;
1509 u32 rkey; 1505 u32 rkey;
@@ -1567,7 +1563,7 @@ static int srp_map_finish_fr(struct srp_map_state *state,
1567 srp_map_desc(state, desc->mr->iova, 1563 srp_map_desc(state, desc->mr->iova,
1568 desc->mr->length, desc->mr->rkey); 1564 desc->mr->length, desc->mr->rkey);
1569 1565
1570 err = ib_post_send(ch->qp, &wr.wr, &bad_wr); 1566 err = ib_post_send(ch->qp, &wr.wr, NULL);
1571 if (unlikely(err)) { 1567 if (unlikely(err)) {
1572 WARN_ON_ONCE(err == -ENOMEM); 1568 WARN_ON_ONCE(err == -ENOMEM);
1573 return err; 1569 return err;
@@ -2018,7 +2014,7 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
2018{ 2014{
2019 struct srp_target_port *target = ch->target; 2015 struct srp_target_port *target = ch->target;
2020 struct ib_sge list; 2016 struct ib_sge list;
2021 struct ib_send_wr wr, *bad_wr; 2017 struct ib_send_wr wr;
2022 2018
2023 list.addr = iu->dma; 2019 list.addr = iu->dma;
2024 list.length = len; 2020 list.length = len;
@@ -2033,13 +2029,13 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
2033 wr.opcode = IB_WR_SEND; 2029 wr.opcode = IB_WR_SEND;
2034 wr.send_flags = IB_SEND_SIGNALED; 2030 wr.send_flags = IB_SEND_SIGNALED;
2035 2031
2036 return ib_post_send(ch->qp, &wr, &bad_wr); 2032 return ib_post_send(ch->qp, &wr, NULL);
2037} 2033}
2038 2034
2039static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu) 2035static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
2040{ 2036{
2041 struct srp_target_port *target = ch->target; 2037 struct srp_target_port *target = ch->target;
2042 struct ib_recv_wr wr, *bad_wr; 2038 struct ib_recv_wr wr;
2043 struct ib_sge list; 2039 struct ib_sge list;
2044 2040
2045 list.addr = iu->dma; 2041 list.addr = iu->dma;
@@ -2053,7 +2049,7 @@ static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
2053 wr.sg_list = &list; 2049 wr.sg_list = &list;
2054 wr.num_sge = 1; 2050 wr.num_sge = 1;
2055 2051
2056 return ib_post_recv(ch->qp, &wr, &bad_wr); 2052 return ib_post_recv(ch->qp, &wr, NULL);
2057} 2053}
2058 2054
2059static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) 2055static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
@@ -2558,7 +2554,7 @@ error:
2558} 2554}
2559 2555
2560static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id, 2556static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id,
2561 struct ib_cm_event *event, 2557 const struct ib_cm_event *event,
2562 struct srp_rdma_ch *ch) 2558 struct srp_rdma_ch *ch)
2563{ 2559{
2564 struct srp_target_port *target = ch->target; 2560 struct srp_target_port *target = ch->target;
@@ -2643,7 +2639,8 @@ static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id,
2643 } 2639 }
2644} 2640}
2645 2641
2646static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) 2642static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
2643 const struct ib_cm_event *event)
2647{ 2644{
2648 struct srp_rdma_ch *ch = cm_id->context; 2645 struct srp_rdma_ch *ch = cm_id->context;
2649 struct srp_target_port *target = ch->target; 2646 struct srp_target_port *target = ch->target;
@@ -3843,7 +3840,7 @@ static ssize_t srp_create_target(struct device *dev,
3843 INIT_WORK(&target->tl_err_work, srp_tl_err_work); 3840 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3844 INIT_WORK(&target->remove_work, srp_remove_work); 3841 INIT_WORK(&target->remove_work, srp_remove_work);
3845 spin_lock_init(&target->lock); 3842 spin_lock_init(&target->lock);
3846 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL); 3843 ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid);
3847 if (ret) 3844 if (ret)
3848 goto out; 3845 goto out;
3849 3846
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 1ae638b58b63..f37cbad022a2 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -575,8 +575,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
575 sport->sm_lid = port_attr.sm_lid; 575 sport->sm_lid = port_attr.sm_lid;
576 sport->lid = port_attr.lid; 576 sport->lid = port_attr.lid;
577 577
578 ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid, 578 ret = rdma_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
579 NULL);
580 if (ret) 579 if (ret)
581 goto err_query_port; 580 goto err_query_port;
582 581
@@ -720,7 +719,7 @@ static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev,
720 WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx) 719 WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx)
721 && ioctx_size != sizeof(struct srpt_send_ioctx)); 720 && ioctx_size != sizeof(struct srpt_send_ioctx));
722 721
723 ring = kmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL); 722 ring = kvmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL);
724 if (!ring) 723 if (!ring)
725 goto out; 724 goto out;
726 for (i = 0; i < ring_size; ++i) { 725 for (i = 0; i < ring_size; ++i) {
@@ -734,7 +733,7 @@ static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev,
734err: 733err:
735 while (--i >= 0) 734 while (--i >= 0)
736 srpt_free_ioctx(sdev, ring[i], dma_size, dir); 735 srpt_free_ioctx(sdev, ring[i], dma_size, dir);
737 kfree(ring); 736 kvfree(ring);
738 ring = NULL; 737 ring = NULL;
739out: 738out:
740 return ring; 739 return ring;
@@ -759,7 +758,7 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring,
759 758
760 for (i = 0; i < ring_size; ++i) 759 for (i = 0; i < ring_size; ++i)
761 srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir); 760 srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir);
762 kfree(ioctx_ring); 761 kvfree(ioctx_ring);
763} 762}
764 763
765/** 764/**
@@ -817,7 +816,7 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch,
817 struct srpt_recv_ioctx *ioctx) 816 struct srpt_recv_ioctx *ioctx)
818{ 817{
819 struct ib_sge list; 818 struct ib_sge list;
820 struct ib_recv_wr wr, *bad_wr; 819 struct ib_recv_wr wr;
821 820
822 BUG_ON(!sdev); 821 BUG_ON(!sdev);
823 list.addr = ioctx->ioctx.dma; 822 list.addr = ioctx->ioctx.dma;
@@ -831,9 +830,9 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch,
831 wr.num_sge = 1; 830 wr.num_sge = 1;
832 831
833 if (sdev->use_srq) 832 if (sdev->use_srq)
834 return ib_post_srq_recv(sdev->srq, &wr, &bad_wr); 833 return ib_post_srq_recv(sdev->srq, &wr, NULL);
835 else 834 else
836 return ib_post_recv(ch->qp, &wr, &bad_wr); 835 return ib_post_recv(ch->qp, &wr, NULL);
837} 836}
838 837
839/** 838/**
@@ -847,7 +846,6 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch,
847 */ 846 */
848static int srpt_zerolength_write(struct srpt_rdma_ch *ch) 847static int srpt_zerolength_write(struct srpt_rdma_ch *ch)
849{ 848{
850 struct ib_send_wr *bad_wr;
851 struct ib_rdma_wr wr = { 849 struct ib_rdma_wr wr = {
852 .wr = { 850 .wr = {
853 .next = NULL, 851 .next = NULL,
@@ -860,7 +858,7 @@ static int srpt_zerolength_write(struct srpt_rdma_ch *ch)
860 pr_debug("%s-%d: queued zerolength write\n", ch->sess_name, 858 pr_debug("%s-%d: queued zerolength write\n", ch->sess_name,
861 ch->qp->qp_num); 859 ch->qp->qp_num);
862 860
863 return ib_post_send(ch->qp, &wr.wr, &bad_wr); 861 return ib_post_send(ch->qp, &wr.wr, NULL);
864} 862}
865 863
866static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc) 864static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -1754,13 +1752,15 @@ retry:
1754 */ 1752 */
1755 qp_init->cap.max_send_wr = min(sq_size / 2, attrs->max_qp_wr); 1753 qp_init->cap.max_send_wr = min(sq_size / 2, attrs->max_qp_wr);
1756 qp_init->cap.max_rdma_ctxs = sq_size / 2; 1754 qp_init->cap.max_rdma_ctxs = sq_size / 2;
1757 qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE); 1755 qp_init->cap.max_send_sge = min(attrs->max_send_sge,
1756 SRPT_MAX_SG_PER_WQE);
1758 qp_init->port_num = ch->sport->port; 1757 qp_init->port_num = ch->sport->port;
1759 if (sdev->use_srq) { 1758 if (sdev->use_srq) {
1760 qp_init->srq = sdev->srq; 1759 qp_init->srq = sdev->srq;
1761 } else { 1760 } else {
1762 qp_init->cap.max_recv_wr = ch->rq_size; 1761 qp_init->cap.max_recv_wr = ch->rq_size;
1763 qp_init->cap.max_recv_sge = qp_init->cap.max_send_sge; 1762 qp_init->cap.max_recv_sge = min(attrs->max_recv_sge,
1763 SRPT_MAX_SG_PER_WQE);
1764 } 1764 }
1765 1765
1766 if (ch->using_rdma_cm) { 1766 if (ch->using_rdma_cm) {
@@ -1833,8 +1833,7 @@ static bool srpt_close_ch(struct srpt_rdma_ch *ch)
1833 int ret; 1833 int ret;
1834 1834
1835 if (!srpt_set_ch_state(ch, CH_DRAINING)) { 1835 if (!srpt_set_ch_state(ch, CH_DRAINING)) {
1836 pr_debug("%s-%d: already closed\n", ch->sess_name, 1836 pr_debug("%s: already closed\n", ch->sess_name);
1837 ch->qp->qp_num);
1838 return false; 1837 return false;
1839 } 1838 }
1840 1839
@@ -1940,8 +1939,8 @@ static void __srpt_close_all_ch(struct srpt_port *sport)
1940 list_for_each_entry(nexus, &sport->nexus_list, entry) { 1939 list_for_each_entry(nexus, &sport->nexus_list, entry) {
1941 list_for_each_entry(ch, &nexus->ch_list, list) { 1940 list_for_each_entry(ch, &nexus->ch_list, list) {
1942 if (srpt_disconnect_ch(ch) >= 0) 1941 if (srpt_disconnect_ch(ch) >= 0)
1943 pr_info("Closing channel %s-%d because target %s_%d has been disabled\n", 1942 pr_info("Closing channel %s because target %s_%d has been disabled\n",
1944 ch->sess_name, ch->qp->qp_num, 1943 ch->sess_name,
1945 sport->sdev->device->name, sport->port); 1944 sport->sdev->device->name, sport->port);
1946 srpt_close_ch(ch); 1945 srpt_close_ch(ch);
1947 } 1946 }
@@ -2086,7 +2085,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
2086 struct rdma_conn_param rdma_cm; 2085 struct rdma_conn_param rdma_cm;
2087 struct ib_cm_rep_param ib_cm; 2086 struct ib_cm_rep_param ib_cm;
2088 } *rep_param = NULL; 2087 } *rep_param = NULL;
2089 struct srpt_rdma_ch *ch; 2088 struct srpt_rdma_ch *ch = NULL;
2090 char i_port_id[36]; 2089 char i_port_id[36];
2091 u32 it_iu_len; 2090 u32 it_iu_len;
2092 int i, ret; 2091 int i, ret;
@@ -2233,13 +2232,15 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
2233 TARGET_PROT_NORMAL, 2232 TARGET_PROT_NORMAL,
2234 i_port_id + 2, ch, NULL); 2233 i_port_id + 2, ch, NULL);
2235 if (IS_ERR_OR_NULL(ch->sess)) { 2234 if (IS_ERR_OR_NULL(ch->sess)) {
2235 WARN_ON_ONCE(ch->sess == NULL);
2236 ret = PTR_ERR(ch->sess); 2236 ret = PTR_ERR(ch->sess);
2237 ch->sess = NULL;
2237 pr_info("Rejected login for initiator %s: ret = %d.\n", 2238 pr_info("Rejected login for initiator %s: ret = %d.\n",
2238 ch->sess_name, ret); 2239 ch->sess_name, ret);
2239 rej->reason = cpu_to_be32(ret == -ENOMEM ? 2240 rej->reason = cpu_to_be32(ret == -ENOMEM ?
2240 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES : 2241 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES :
2241 SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); 2242 SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
2242 goto reject; 2243 goto destroy_ib;
2243 } 2244 }
2244 2245
2245 mutex_lock(&sport->mutex); 2246 mutex_lock(&sport->mutex);
@@ -2278,7 +2279,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
2278 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); 2279 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2279 pr_err("rejected SRP_LOGIN_REQ because enabling RTR failed (error code = %d)\n", 2280 pr_err("rejected SRP_LOGIN_REQ because enabling RTR failed (error code = %d)\n",
2280 ret); 2281 ret);
2281 goto destroy_ib; 2282 goto reject;
2282 } 2283 }
2283 2284
2284 pr_debug("Establish connection sess=%p name=%s ch=%p\n", ch->sess, 2285 pr_debug("Establish connection sess=%p name=%s ch=%p\n", ch->sess,
@@ -2357,8 +2358,11 @@ free_ring:
2357 srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, 2358 srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
2358 ch->sport->sdev, ch->rq_size, 2359 ch->sport->sdev, ch->rq_size,
2359 ch->max_rsp_size, DMA_TO_DEVICE); 2360 ch->max_rsp_size, DMA_TO_DEVICE);
2361
2360free_ch: 2362free_ch:
2361 if (ib_cm_id) 2363 if (rdma_cm_id)
2364 rdma_cm_id->context = NULL;
2365 else
2362 ib_cm_id->context = NULL; 2366 ib_cm_id->context = NULL;
2363 kfree(ch); 2367 kfree(ch);
2364 ch = NULL; 2368 ch = NULL;
@@ -2378,6 +2382,15 @@ reject:
2378 ib_send_cm_rej(ib_cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, 2382 ib_send_cm_rej(ib_cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
2379 rej, sizeof(*rej)); 2383 rej, sizeof(*rej));
2380 2384
2385 if (ch && ch->sess) {
2386 srpt_close_ch(ch);
2387 /*
2388 * Tell the caller not to free cm_id since
2389 * srpt_release_channel_work() will do that.
2390 */
2391 ret = 0;
2392 }
2393
2381out: 2394out:
2382 kfree(rep_param); 2395 kfree(rep_param);
2383 kfree(rsp); 2396 kfree(rsp);
@@ -2387,7 +2400,7 @@ out:
2387} 2400}
2388 2401
2389static int srpt_ib_cm_req_recv(struct ib_cm_id *cm_id, 2402static int srpt_ib_cm_req_recv(struct ib_cm_id *cm_id,
2390 struct ib_cm_req_event_param *param, 2403 const struct ib_cm_req_event_param *param,
2391 void *private_data) 2404 void *private_data)
2392{ 2405{
2393 char sguid[40]; 2406 char sguid[40];
@@ -2499,7 +2512,8 @@ static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch)
2499 * a non-zero value in any other case will trigger a race with the 2512 * a non-zero value in any other case will trigger a race with the
2500 * ib_destroy_cm_id() call in srpt_release_channel(). 2513 * ib_destroy_cm_id() call in srpt_release_channel().
2501 */ 2514 */
2502static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) 2515static int srpt_cm_handler(struct ib_cm_id *cm_id,
2516 const struct ib_cm_event *event)
2503{ 2517{
2504 struct srpt_rdma_ch *ch = cm_id->context; 2518 struct srpt_rdma_ch *ch = cm_id->context;
2505 int ret; 2519 int ret;
@@ -2609,7 +2623,7 @@ static int srpt_write_pending(struct se_cmd *se_cmd)
2609 struct srpt_send_ioctx *ioctx = 2623 struct srpt_send_ioctx *ioctx =
2610 container_of(se_cmd, struct srpt_send_ioctx, cmd); 2624 container_of(se_cmd, struct srpt_send_ioctx, cmd);
2611 struct srpt_rdma_ch *ch = ioctx->ch; 2625 struct srpt_rdma_ch *ch = ioctx->ch;
2612 struct ib_send_wr *first_wr = NULL, *bad_wr; 2626 struct ib_send_wr *first_wr = NULL;
2613 struct ib_cqe *cqe = &ioctx->rdma_cqe; 2627 struct ib_cqe *cqe = &ioctx->rdma_cqe;
2614 enum srpt_command_state new_state; 2628 enum srpt_command_state new_state;
2615 int ret, i; 2629 int ret, i;
@@ -2633,7 +2647,7 @@ static int srpt_write_pending(struct se_cmd *se_cmd)
2633 cqe = NULL; 2647 cqe = NULL;
2634 } 2648 }
2635 2649
2636 ret = ib_post_send(ch->qp, first_wr, &bad_wr); 2650 ret = ib_post_send(ch->qp, first_wr, NULL);
2637 if (ret) { 2651 if (ret) {
2638 pr_err("%s: ib_post_send() returned %d for %d (avail: %d)\n", 2652 pr_err("%s: ib_post_send() returned %d for %d (avail: %d)\n",
2639 __func__, ret, ioctx->n_rdma, 2653 __func__, ret, ioctx->n_rdma,
@@ -2671,7 +2685,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
2671 container_of(cmd, struct srpt_send_ioctx, cmd); 2685 container_of(cmd, struct srpt_send_ioctx, cmd);
2672 struct srpt_rdma_ch *ch = ioctx->ch; 2686 struct srpt_rdma_ch *ch = ioctx->ch;
2673 struct srpt_device *sdev = ch->sport->sdev; 2687 struct srpt_device *sdev = ch->sport->sdev;
2674 struct ib_send_wr send_wr, *first_wr = &send_wr, *bad_wr; 2688 struct ib_send_wr send_wr, *first_wr = &send_wr;
2675 struct ib_sge sge; 2689 struct ib_sge sge;
2676 enum srpt_command_state state; 2690 enum srpt_command_state state;
2677 int resp_len, ret, i; 2691 int resp_len, ret, i;
@@ -2744,7 +2758,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
2744 send_wr.opcode = IB_WR_SEND; 2758 send_wr.opcode = IB_WR_SEND;
2745 send_wr.send_flags = IB_SEND_SIGNALED; 2759 send_wr.send_flags = IB_SEND_SIGNALED;
2746 2760
2747 ret = ib_post_send(ch->qp, first_wr, &bad_wr); 2761 ret = ib_post_send(ch->qp, first_wr, NULL);
2748 if (ret < 0) { 2762 if (ret < 0) {
2749 pr_err("%s: sending cmd response failed for tag %llu (%d)\n", 2763 pr_err("%s: sending cmd response failed for tag %llu (%d)\n",
2750 __func__, ioctx->cmd.tag, ret); 2764 __func__, ioctx->cmd.tag, ret);
@@ -2968,7 +2982,8 @@ static void srpt_add_one(struct ib_device *device)
2968 2982
2969 pr_debug("device = %p\n", device); 2983 pr_debug("device = %p\n", device);
2970 2984
2971 sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); 2985 sdev = kzalloc(struct_size(sdev, port, device->phys_port_cnt),
2986 GFP_KERNEL);
2972 if (!sdev) 2987 if (!sdev)
2973 goto err; 2988 goto err;
2974 2989
@@ -3022,8 +3037,6 @@ static void srpt_add_one(struct ib_device *device)
3022 srpt_event_handler); 3037 srpt_event_handler);
3023 ib_register_event_handler(&sdev->event_handler); 3038 ib_register_event_handler(&sdev->event_handler);
3024 3039
3025 WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port));
3026
3027 for (i = 1; i <= sdev->device->phys_port_cnt; i++) { 3040 for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
3028 sport = &sdev->port[i - 1]; 3041 sport = &sdev->port[i - 1];
3029 INIT_LIST_HEAD(&sport->nexus_list); 3042 INIT_LIST_HEAD(&sport->nexus_list);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 2361483476a0..444dfd7281b5 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -396,9 +396,9 @@ struct srpt_port {
396 * @sdev_mutex: Serializes use_srq changes. 396 * @sdev_mutex: Serializes use_srq changes.
397 * @use_srq: Whether or not to use SRQ. 397 * @use_srq: Whether or not to use SRQ.
398 * @ioctx_ring: Per-HCA SRQ. 398 * @ioctx_ring: Per-HCA SRQ.
399 * @port: Information about the ports owned by this HCA.
400 * @event_handler: Per-HCA asynchronous IB event handler. 399 * @event_handler: Per-HCA asynchronous IB event handler.
401 * @list: Node in srpt_dev_list. 400 * @list: Node in srpt_dev_list.
401 * @port: Information about the ports owned by this HCA.
402 */ 402 */
403struct srpt_device { 403struct srpt_device {
404 struct ib_device *device; 404 struct ib_device *device;
@@ -410,9 +410,9 @@ struct srpt_device {
410 struct mutex sdev_mutex; 410 struct mutex sdev_mutex;
411 bool use_srq; 411 bool use_srq;
412 struct srpt_recv_ioctx **ioctx_ring; 412 struct srpt_recv_ioctx **ioctx_ring;
413 struct srpt_port port[2];
414 struct ib_event_handler event_handler; 413 struct ib_event_handler event_handler;
415 struct list_head list; 414 struct list_head list;
415 struct srpt_port port[];
416}; 416};
417 417
418#endif /* IB_SRPT_H */ 418#endif /* IB_SRPT_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index 09e38f0733bd..b8f75a22fb6c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -755,7 +755,7 @@ struct cpl_abort_req_rss {
755struct cpl_abort_req_rss6 { 755struct cpl_abort_req_rss6 {
756 WR_HDR; 756 WR_HDR;
757 union opcode_tid ot; 757 union opcode_tid ot;
758 __u32 srqidx_status; 758 __be32 srqidx_status;
759}; 759};
760 760
761#define ABORT_RSS_STATUS_S 0 761#define ABORT_RSS_STATUS_S 0
@@ -785,7 +785,7 @@ struct cpl_abort_rpl_rss {
785 785
786struct cpl_abort_rpl_rss6 { 786struct cpl_abort_rpl_rss6 {
787 union opcode_tid ot; 787 union opcode_tid ot;
788 __u32 srqidx_status; 788 __be32 srqidx_status;
789}; 789};
790 790
791struct cpl_abort_rpl { 791struct cpl_abort_rpl {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index fe4ac40dbade..3ce14d42ddc8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -433,6 +433,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
433 case MLX5_CMD_OP_FPGA_QUERY_QP: 433 case MLX5_CMD_OP_FPGA_QUERY_QP:
434 case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS: 434 case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
435 case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: 435 case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
436 case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
437 case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
436 *status = MLX5_DRIVER_STATUS_ABORTED; 438 *status = MLX5_DRIVER_STATUS_ABORTED;
437 *synd = MLX5_DRIVER_SYND; 439 *synd = MLX5_DRIVER_SYND;
438 return -EIO; 440 return -EIO;
@@ -612,6 +614,9 @@ const char *mlx5_command_str(int command)
612 MLX5_COMMAND_STR_CASE(ARM_XRQ); 614 MLX5_COMMAND_STR_CASE(ARM_XRQ);
613 MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT); 615 MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT);
614 MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT); 616 MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT);
617 MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT);
618 MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT);
619 MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
615 default: return "unknown command opcode"; 620 default: return "unknown command opcode";
616 } 621 }
617} 622}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index b3820a34e773..0f11fff32a9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -240,6 +240,9 @@ const char *parse_fs_dst(struct trace_seq *p,
240 case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: 240 case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
241 trace_seq_printf(p, "ft=%p\n", dst->ft); 241 trace_seq_printf(p, "ft=%p\n", dst->ft);
242 break; 242 break;
243 case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
244 trace_seq_printf(p, "ft_num=%u\n", dst->ft_num);
245 break;
243 case MLX5_FLOW_DESTINATION_TYPE_TIR: 246 case MLX5_FLOW_DESTINATION_TYPE_TIR:
244 trace_seq_printf(p, "tir=%u\n", dst->tir_num); 247 trace_seq_printf(p, "tir=%u\n", dst->tir_num);
245 break; 248 break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 6a62b84e57f4..8e01f818021b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -368,18 +368,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
368 int list_size = 0; 368 int list_size = 0;
369 369
370 list_for_each_entry(dst, &fte->node.children, node.list) { 370 list_for_each_entry(dst, &fte->node.children, node.list) {
371 unsigned int id; 371 unsigned int id, type = dst->dest_attr.type;
372 372
373 if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) 373 if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
374 continue; 374 continue;
375 375
376 MLX5_SET(dest_format_struct, in_dests, destination_type, 376 switch (type) {
377 dst->dest_attr.type); 377 case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
378 if (dst->dest_attr.type == 378 id = dst->dest_attr.ft_num;
379 MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { 379 type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
380 break;
381 case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
380 id = dst->dest_attr.ft->id; 382 id = dst->dest_attr.ft->id;
381 } else if (dst->dest_attr.type == 383 break;
382 MLX5_FLOW_DESTINATION_TYPE_VPORT) { 384 case MLX5_FLOW_DESTINATION_TYPE_VPORT:
383 id = dst->dest_attr.vport.num; 385 id = dst->dest_attr.vport.num;
384 MLX5_SET(dest_format_struct, in_dests, 386 MLX5_SET(dest_format_struct, in_dests,
385 destination_eswitch_owner_vhca_id_valid, 387 destination_eswitch_owner_vhca_id_valid,
@@ -387,9 +389,13 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
387 MLX5_SET(dest_format_struct, in_dests, 389 MLX5_SET(dest_format_struct, in_dests,
388 destination_eswitch_owner_vhca_id, 390 destination_eswitch_owner_vhca_id,
389 dst->dest_attr.vport.vhca_id); 391 dst->dest_attr.vport.vhca_id);
390 } else { 392 break;
393 default:
391 id = dst->dest_attr.tir_num; 394 id = dst->dest_attr.tir_num;
392 } 395 }
396
397 MLX5_SET(dest_format_struct, in_dests, destination_type,
398 type);
393 MLX5_SET(dest_format_struct, in_dests, destination_id, id); 399 MLX5_SET(dest_format_struct, in_dests, destination_id, id);
394 in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); 400 in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
395 list_size++; 401 list_size++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 261cb6aacf12..f418541af7cf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -310,89 +310,17 @@ static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
310 return NULL; 310 return NULL;
311} 311}
312 312
313static bool check_last_reserved(const u32 *match_criteria)
314{
315 char *match_criteria_reserved =
316 MLX5_ADDR_OF(fte_match_param, match_criteria, MLX5_FTE_MATCH_PARAM_RESERVED);
317
318 return !match_criteria_reserved[0] &&
319 !memcmp(match_criteria_reserved, match_criteria_reserved + 1,
320 MLX5_FLD_SZ_BYTES(fte_match_param,
321 MLX5_FTE_MATCH_PARAM_RESERVED) - 1);
322}
323
324static bool check_valid_mask(u8 match_criteria_enable, const u32 *match_criteria)
325{
326 if (match_criteria_enable & ~(
327 (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) |
328 (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) |
329 (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) |
330 (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2)))
331 return false;
332
333 if (!(match_criteria_enable &
334 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS)) {
335 char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
336 match_criteria, outer_headers);
337
338 if (fg_type_mask[0] ||
339 memcmp(fg_type_mask, fg_type_mask + 1,
340 MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1))
341 return false;
342 }
343
344 if (!(match_criteria_enable &
345 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS)) {
346 char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
347 match_criteria, misc_parameters);
348
349 if (fg_type_mask[0] ||
350 memcmp(fg_type_mask, fg_type_mask + 1,
351 MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1))
352 return false;
353 }
354
355 if (!(match_criteria_enable &
356 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS)) {
357 char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
358 match_criteria, inner_headers);
359
360 if (fg_type_mask[0] ||
361 memcmp(fg_type_mask, fg_type_mask + 1,
362 MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1))
363 return false;
364 }
365
366 if (!(match_criteria_enable &
367 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2)) {
368 char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
369 match_criteria, misc_parameters_2);
370
371 if (fg_type_mask[0] ||
372 memcmp(fg_type_mask, fg_type_mask + 1,
373 MLX5_ST_SZ_BYTES(fte_match_set_misc2) - 1))
374 return false;
375 }
376
377 return check_last_reserved(match_criteria);
378}
379
380static bool check_valid_spec(const struct mlx5_flow_spec *spec) 313static bool check_valid_spec(const struct mlx5_flow_spec *spec)
381{ 314{
382 int i; 315 int i;
383 316
384 if (!check_valid_mask(spec->match_criteria_enable, spec->match_criteria)) {
385 pr_warn("mlx5_core: Match criteria given mismatches match_criteria_enable\n");
386 return false;
387 }
388
389 for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++) 317 for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++)
390 if (spec->match_value[i] & ~spec->match_criteria[i]) { 318 if (spec->match_value[i] & ~spec->match_criteria[i]) {
391 pr_warn("mlx5_core: match_value differs from match_criteria\n"); 319 pr_warn("mlx5_core: match_value differs from match_criteria\n");
392 return false; 320 return false;
393 } 321 }
394 322
395 return check_last_reserved(spec->match_value); 323 return true;
396} 324}
397 325
398static struct mlx5_flow_root_namespace *find_root(struct fs_node *node) 326static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
@@ -1159,9 +1087,6 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
1159 struct mlx5_flow_group *fg; 1087 struct mlx5_flow_group *fg;
1160 int err; 1088 int err;
1161 1089
1162 if (!check_valid_mask(match_criteria_enable, match_criteria))
1163 return ERR_PTR(-EINVAL);
1164
1165 if (ft->autogroup.active) 1090 if (ft->autogroup.active)
1166 return ERR_PTR(-EPERM); 1091 return ERR_PTR(-EPERM);
1167 1092
@@ -1432,7 +1357,9 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
1432 (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE && 1357 (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
1433 d1->ft == d2->ft) || 1358 d1->ft == d2->ft) ||
1434 (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR && 1359 (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
1435 d1->tir_num == d2->tir_num)) 1360 d1->tir_num == d2->tir_num) ||
1361 (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM &&
1362 d1->ft_num == d2->ft_num))
1436 return true; 1363 return true;
1437 } 1364 }
1438 1365
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index b7c21eb21a21..e3797a44e074 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -584,6 +584,22 @@ static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
584 return 0; 584 return 0;
585} 585}
586 586
587static void mlx5_rdma_netdev_free(struct net_device *netdev)
588{
589 struct mlx5e_priv *priv = mlx5i_epriv(netdev);
590 struct mlx5i_priv *ipriv = priv->ppriv;
591 const struct mlx5e_profile *profile = priv->profile;
592
593 mlx5e_detach_netdev(priv);
594 profile->cleanup(priv);
595 destroy_workqueue(priv->wq);
596
597 if (!ipriv->sub_interface) {
598 mlx5i_pkey_qpn_ht_cleanup(netdev);
599 mlx5e_destroy_mdev_resources(priv->mdev);
600 }
601}
602
587struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, 603struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
588 struct ib_device *ibdev, 604 struct ib_device *ibdev,
589 const char *name, 605 const char *name,
@@ -657,6 +673,9 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
657 rn->detach_mcast = mlx5i_detach_mcast; 673 rn->detach_mcast = mlx5i_detach_mcast;
658 rn->set_id = mlx5i_set_pkey_index; 674 rn->set_id = mlx5i_set_pkey_index;
659 675
676 netdev->priv_destructor = mlx5_rdma_netdev_free;
677 netdev->needs_free_netdev = 1;
678
660 return netdev; 679 return netdev;
661 680
662destroy_ht: 681destroy_ht:
@@ -669,21 +688,3 @@ err_free_netdev:
669 return NULL; 688 return NULL;
670} 689}
671EXPORT_SYMBOL(mlx5_rdma_netdev_alloc); 690EXPORT_SYMBOL(mlx5_rdma_netdev_alloc);
672
673void mlx5_rdma_netdev_free(struct net_device *netdev)
674{
675 struct mlx5e_priv *priv = mlx5i_epriv(netdev);
676 struct mlx5i_priv *ipriv = priv->ppriv;
677 const struct mlx5e_profile *profile = priv->profile;
678
679 mlx5e_detach_netdev(priv);
680 profile->cleanup(priv);
681 destroy_workqueue(priv->wq);
682
683 if (!ipriv->sub_interface) {
684 mlx5i_pkey_qpn_ht_cleanup(netdev);
685 mlx5e_destroy_mdev_resources(priv->mdev);
686 }
687 free_netdev(netdev);
688}
689EXPORT_SYMBOL(mlx5_rdma_netdev_free);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 0805fa6215ee..dc042017c293 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -378,7 +378,7 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
378 } 378 }
379 379
380 ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS, 380 ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS,
381 ndev->dev->attrs.max_sge - 1); 381 ndev->dev->attrs.max_send_sge - 1);
382 list_add(&ndev->entry, &device_list); 382 list_add(&ndev->entry, &device_list);
383out_unlock: 383out_unlock:
384 mutex_unlock(&device_list_mutex); 384 mutex_unlock(&device_list_mutex);
@@ -1093,7 +1093,6 @@ static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
1093static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, 1093static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
1094 struct nvme_rdma_request *req) 1094 struct nvme_rdma_request *req)
1095{ 1095{
1096 struct ib_send_wr *bad_wr;
1097 struct ib_send_wr wr = { 1096 struct ib_send_wr wr = {
1098 .opcode = IB_WR_LOCAL_INV, 1097 .opcode = IB_WR_LOCAL_INV,
1099 .next = NULL, 1098 .next = NULL,
@@ -1105,7 +1104,7 @@ static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
1105 req->reg_cqe.done = nvme_rdma_inv_rkey_done; 1104 req->reg_cqe.done = nvme_rdma_inv_rkey_done;
1106 wr.wr_cqe = &req->reg_cqe; 1105 wr.wr_cqe = &req->reg_cqe;
1107 1106
1108 return ib_post_send(queue->qp, &wr, &bad_wr); 1107 return ib_post_send(queue->qp, &wr, NULL);
1109} 1108}
1110 1109
1111static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, 1110static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
@@ -1308,7 +1307,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
1308 struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge, 1307 struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
1309 struct ib_send_wr *first) 1308 struct ib_send_wr *first)
1310{ 1309{
1311 struct ib_send_wr wr, *bad_wr; 1310 struct ib_send_wr wr;
1312 int ret; 1311 int ret;
1313 1312
1314 sge->addr = qe->dma; 1313 sge->addr = qe->dma;
@@ -1327,7 +1326,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
1327 else 1326 else
1328 first = &wr; 1327 first = &wr;
1329 1328
1330 ret = ib_post_send(queue->qp, first, &bad_wr); 1329 ret = ib_post_send(queue->qp, first, NULL);
1331 if (unlikely(ret)) { 1330 if (unlikely(ret)) {
1332 dev_err(queue->ctrl->ctrl.device, 1331 dev_err(queue->ctrl->ctrl.device,
1333 "%s failed with error code %d\n", __func__, ret); 1332 "%s failed with error code %d\n", __func__, ret);
@@ -1338,7 +1337,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
1338static int nvme_rdma_post_recv(struct nvme_rdma_queue *queue, 1337static int nvme_rdma_post_recv(struct nvme_rdma_queue *queue,
1339 struct nvme_rdma_qe *qe) 1338 struct nvme_rdma_qe *qe)
1340{ 1339{
1341 struct ib_recv_wr wr, *bad_wr; 1340 struct ib_recv_wr wr;
1342 struct ib_sge list; 1341 struct ib_sge list;
1343 int ret; 1342 int ret;
1344 1343
@@ -1353,7 +1352,7 @@ static int nvme_rdma_post_recv(struct nvme_rdma_queue *queue,
1353 wr.sg_list = &list; 1352 wr.sg_list = &list;
1354 wr.num_sge = 1; 1353 wr.num_sge = 1;
1355 1354
1356 ret = ib_post_recv(queue->qp, &wr, &bad_wr); 1355 ret = ib_post_recv(queue->qp, &wr, NULL);
1357 if (unlikely(ret)) { 1356 if (unlikely(ret)) {
1358 dev_err(queue->ctrl->ctrl.device, 1357 dev_err(queue->ctrl->ctrl.device,
1359 "%s failed with error code %d\n", __func__, ret); 1358 "%s failed with error code %d\n", __func__, ret);
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index e7f43d1e1779..3533e918ea37 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -435,7 +435,6 @@ static void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue)
435static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, 435static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
436 struct nvmet_rdma_cmd *cmd) 436 struct nvmet_rdma_cmd *cmd)
437{ 437{
438 struct ib_recv_wr *bad_wr;
439 int ret; 438 int ret;
440 439
441 ib_dma_sync_single_for_device(ndev->device, 440 ib_dma_sync_single_for_device(ndev->device,
@@ -443,9 +442,9 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
443 DMA_FROM_DEVICE); 442 DMA_FROM_DEVICE);
444 443
445 if (ndev->srq) 444 if (ndev->srq)
446 ret = ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr); 445 ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL);
447 else 446 else
448 ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr); 447 ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, NULL);
449 448
450 if (unlikely(ret)) 449 if (unlikely(ret))
451 pr_err("post_recv cmd failed\n"); 450 pr_err("post_recv cmd failed\n");
@@ -532,7 +531,7 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
532 struct nvmet_rdma_rsp *rsp = 531 struct nvmet_rdma_rsp *rsp =
533 container_of(req, struct nvmet_rdma_rsp, req); 532 container_of(req, struct nvmet_rdma_rsp, req);
534 struct rdma_cm_id *cm_id = rsp->queue->cm_id; 533 struct rdma_cm_id *cm_id = rsp->queue->cm_id;
535 struct ib_send_wr *first_wr, *bad_wr; 534 struct ib_send_wr *first_wr;
536 535
537 if (rsp->flags & NVMET_RDMA_REQ_INVALIDATE_RKEY) { 536 if (rsp->flags & NVMET_RDMA_REQ_INVALIDATE_RKEY) {
538 rsp->send_wr.opcode = IB_WR_SEND_WITH_INV; 537 rsp->send_wr.opcode = IB_WR_SEND_WITH_INV;
@@ -553,7 +552,7 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
553 rsp->send_sge.addr, rsp->send_sge.length, 552 rsp->send_sge.addr, rsp->send_sge.length,
554 DMA_TO_DEVICE); 553 DMA_TO_DEVICE);
555 554
556 if (unlikely(ib_post_send(cm_id->qp, first_wr, &bad_wr))) { 555 if (unlikely(ib_post_send(cm_id->qp, first_wr, NULL))) {
557 pr_err("sending cmd response failed\n"); 556 pr_err("sending cmd response failed\n");
558 nvmet_rdma_release_rsp(rsp); 557 nvmet_rdma_release_rsp(rsp);
559 } 558 }
@@ -892,7 +891,7 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
892 891
893 inline_page_count = num_pages(port->inline_data_size); 892 inline_page_count = num_pages(port->inline_data_size);
894 inline_sge_count = max(cm_id->device->attrs.max_sge_rd, 893 inline_sge_count = max(cm_id->device->attrs.max_sge_rd,
895 cm_id->device->attrs.max_sge) - 1; 894 cm_id->device->attrs.max_recv_sge) - 1;
896 if (inline_page_count > inline_sge_count) { 895 if (inline_page_count > inline_sge_count) {
897 pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n", 896 pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n",
898 port->inline_data_size, cm_id->device->name, 897 port->inline_data_size, cm_id->device->name,
@@ -969,7 +968,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
969 qp_attr.cap.max_send_wr = queue->send_queue_size + 1; 968 qp_attr.cap.max_send_wr = queue->send_queue_size + 1;
970 qp_attr.cap.max_rdma_ctxs = queue->send_queue_size; 969 qp_attr.cap.max_rdma_ctxs = queue->send_queue_size;
971 qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd, 970 qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd,
972 ndev->device->attrs.max_sge); 971 ndev->device->attrs.max_send_sge);
973 972
974 if (ndev->srq) { 973 if (ndev->srq) {
975 qp_attr.srq = ndev->srq; 974 qp_attr.srq = ndev->srq;
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index c55ea4e6201b..5fdb9a509a97 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -802,7 +802,7 @@ out1:
802 */ 802 */
803static int smbd_post_send_negotiate_req(struct smbd_connection *info) 803static int smbd_post_send_negotiate_req(struct smbd_connection *info)
804{ 804{
805 struct ib_send_wr send_wr, *send_wr_fail; 805 struct ib_send_wr send_wr;
806 int rc = -ENOMEM; 806 int rc = -ENOMEM;
807 struct smbd_request *request; 807 struct smbd_request *request;
808 struct smbd_negotiate_req *packet; 808 struct smbd_negotiate_req *packet;
@@ -854,7 +854,7 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info)
854 854
855 request->has_payload = false; 855 request->has_payload = false;
856 atomic_inc(&info->send_pending); 856 atomic_inc(&info->send_pending);
857 rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail); 857 rc = ib_post_send(info->id->qp, &send_wr, NULL);
858 if (!rc) 858 if (!rc)
859 return 0; 859 return 0;
860 860
@@ -1024,7 +1024,7 @@ static void smbd_destroy_header(struct smbd_connection *info,
1024static int smbd_post_send(struct smbd_connection *info, 1024static int smbd_post_send(struct smbd_connection *info,
1025 struct smbd_request *request, bool has_payload) 1025 struct smbd_request *request, bool has_payload)
1026{ 1026{
1027 struct ib_send_wr send_wr, *send_wr_fail; 1027 struct ib_send_wr send_wr;
1028 int rc, i; 1028 int rc, i;
1029 1029
1030 for (i = 0; i < request->num_sge; i++) { 1030 for (i = 0; i < request->num_sge; i++) {
@@ -1055,7 +1055,7 @@ static int smbd_post_send(struct smbd_connection *info,
1055 atomic_inc(&info->send_pending); 1055 atomic_inc(&info->send_pending);
1056 } 1056 }
1057 1057
1058 rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail); 1058 rc = ib_post_send(info->id->qp, &send_wr, NULL);
1059 if (rc) { 1059 if (rc) {
1060 log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); 1060 log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
1061 if (has_payload) { 1061 if (has_payload) {
@@ -1184,7 +1184,7 @@ static int smbd_post_send_data(
1184static int smbd_post_recv( 1184static int smbd_post_recv(
1185 struct smbd_connection *info, struct smbd_response *response) 1185 struct smbd_connection *info, struct smbd_response *response)
1186{ 1186{
1187 struct ib_recv_wr recv_wr, *recv_wr_fail = NULL; 1187 struct ib_recv_wr recv_wr;
1188 int rc = -EIO; 1188 int rc = -EIO;
1189 1189
1190 response->sge.addr = ib_dma_map_single( 1190 response->sge.addr = ib_dma_map_single(
@@ -1203,7 +1203,7 @@ static int smbd_post_recv(
1203 recv_wr.sg_list = &response->sge; 1203 recv_wr.sg_list = &response->sge;
1204 recv_wr.num_sge = 1; 1204 recv_wr.num_sge = 1;
1205 1205
1206 rc = ib_post_recv(info->id->qp, &recv_wr, &recv_wr_fail); 1206 rc = ib_post_recv(info->id->qp, &recv_wr, NULL);
1207 if (rc) { 1207 if (rc) {
1208 ib_dma_unmap_single(info->id->device, response->sge.addr, 1208 ib_dma_unmap_single(info->id->device, response->sge.addr,
1209 response->sge.length, DMA_FROM_DEVICE); 1209 response->sge.length, DMA_FROM_DEVICE);
@@ -1662,9 +1662,16 @@ static struct smbd_connection *_smbd_get_connection(
1662 info->max_receive_size = smbd_max_receive_size; 1662 info->max_receive_size = smbd_max_receive_size;
1663 info->keep_alive_interval = smbd_keep_alive_interval; 1663 info->keep_alive_interval = smbd_keep_alive_interval;
1664 1664
1665 if (info->id->device->attrs.max_sge < SMBDIRECT_MAX_SGE) { 1665 if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SGE) {
1666 log_rdma_event(ERR, "warning: device max_sge = %d too small\n", 1666 log_rdma_event(ERR,
1667 info->id->device->attrs.max_sge); 1667 "warning: device max_send_sge = %d too small\n",
1668 info->id->device->attrs.max_send_sge);
1669 log_rdma_event(ERR, "Queue Pair creation may fail\n");
1670 }
1671 if (info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_SGE) {
1672 log_rdma_event(ERR,
1673 "warning: device max_recv_sge = %d too small\n",
1674 info->id->device->attrs.max_recv_sge);
1668 log_rdma_event(ERR, "Queue Pair creation may fail\n"); 1675 log_rdma_event(ERR, "Queue Pair creation may fail\n");
1669 } 1676 }
1670 1677
@@ -2473,7 +2480,6 @@ struct smbd_mr *smbd_register_mr(
2473 int rc, i; 2480 int rc, i;
2474 enum dma_data_direction dir; 2481 enum dma_data_direction dir;
2475 struct ib_reg_wr *reg_wr; 2482 struct ib_reg_wr *reg_wr;
2476 struct ib_send_wr *bad_wr;
2477 2483
2478 if (num_pages > info->max_frmr_depth) { 2484 if (num_pages > info->max_frmr_depth) {
2479 log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n", 2485 log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n",
@@ -2547,7 +2553,7 @@ skip_multiple_pages:
2547 * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution 2553 * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution
2548 * on the next ib_post_send when we actaully send I/O to remote peer 2554 * on the next ib_post_send when we actaully send I/O to remote peer
2549 */ 2555 */
2550 rc = ib_post_send(info->id->qp, &reg_wr->wr, &bad_wr); 2556 rc = ib_post_send(info->id->qp, &reg_wr->wr, NULL);
2551 if (!rc) 2557 if (!rc)
2552 return smbdirect_mr; 2558 return smbdirect_mr;
2553 2559
@@ -2592,7 +2598,7 @@ static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc)
2592 */ 2598 */
2593int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) 2599int smbd_deregister_mr(struct smbd_mr *smbdirect_mr)
2594{ 2600{
2595 struct ib_send_wr *wr, *bad_wr; 2601 struct ib_send_wr *wr;
2596 struct smbd_connection *info = smbdirect_mr->conn; 2602 struct smbd_connection *info = smbdirect_mr->conn;
2597 int rc = 0; 2603 int rc = 0;
2598 2604
@@ -2607,7 +2613,7 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr)
2607 wr->send_flags = IB_SEND_SIGNALED; 2613 wr->send_flags = IB_SEND_SIGNALED;
2608 2614
2609 init_completion(&smbdirect_mr->invalidate_done); 2615 init_completion(&smbdirect_mr->invalidate_done);
2610 rc = ib_post_send(info->id->qp, wr, &bad_wr); 2616 rc = ib_post_send(info->id->qp, wr, NULL);
2611 if (rc) { 2617 if (rc) {
2612 log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); 2618 log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc);
2613 smbd_disconnect_rdma_connection(info); 2619 smbd_disconnect_rdma_connection(info);
diff --git a/include/linux/idr.h b/include/linux/idr.h
index e856f4e0ab35..3e8215b2c371 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -98,6 +98,17 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val)
98 * period). 98 * period).
99 */ 99 */
100 100
101#define idr_lock(idr) xa_lock(&(idr)->idr_rt)
102#define idr_unlock(idr) xa_unlock(&(idr)->idr_rt)
103#define idr_lock_bh(idr) xa_lock_bh(&(idr)->idr_rt)
104#define idr_unlock_bh(idr) xa_unlock_bh(&(idr)->idr_rt)
105#define idr_lock_irq(idr) xa_lock_irq(&(idr)->idr_rt)
106#define idr_unlock_irq(idr) xa_unlock_irq(&(idr)->idr_rt)
107#define idr_lock_irqsave(idr, flags) \
108 xa_lock_irqsave(&(idr)->idr_rt, flags)
109#define idr_unlock_irqrestore(idr, flags) \
110 xa_unlock_irqrestore(&(idr)->idr_rt, flags)
111
101void idr_preload(gfp_t gfp_mask); 112void idr_preload(gfp_t gfp_mask);
102 113
103int idr_alloc(struct idr *, void *ptr, int start, int end, gfp_t); 114int idr_alloc(struct idr *, void *ptr, int start, int end, gfp_t);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 54f385cc8811..7a452716de4b 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1235,14 +1235,11 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
1235{ 1235{
1236 return ERR_PTR(-EOPNOTSUPP); 1236 return ERR_PTR(-EOPNOTSUPP);
1237} 1237}
1238
1239static inline void mlx5_rdma_netdev_free(struct net_device *netdev) {}
1240#else 1238#else
1241struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, 1239struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
1242 struct ib_device *ibdev, 1240 struct ib_device *ibdev,
1243 const char *name, 1241 const char *name,
1244 void (*setup)(struct net_device *)); 1242 void (*setup)(struct net_device *));
1245void mlx5_rdma_netdev_free(struct net_device *netdev);
1246#endif /* CONFIG_MLX5_CORE_IPOIB */ 1243#endif /* CONFIG_MLX5_CORE_IPOIB */
1247 1244
1248struct mlx5_profile { 1245struct mlx5_profile {
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 71fb503b2b52..804516e4f483 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -89,6 +89,7 @@ struct mlx5_flow_destination {
89 enum mlx5_flow_destination_type type; 89 enum mlx5_flow_destination_type type;
90 union { 90 union {
91 u32 tir_num; 91 u32 tir_num;
92 u32 ft_num;
92 struct mlx5_flow_table *ft; 93 struct mlx5_flow_table *ft;
93 struct mlx5_fc *counter; 94 struct mlx5_fc *counter;
94 struct { 95 struct {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 6ead9c1a5396..f043d65b9bac 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -82,6 +82,7 @@ enum {
82 82
83enum { 83enum {
84 MLX5_OBJ_TYPE_UCTX = 0x0004, 84 MLX5_OBJ_TYPE_UCTX = 0x0004,
85 MLX5_OBJ_TYPE_UMEM = 0x0005,
85}; 86};
86 87
87enum { 88enum {
@@ -246,12 +247,15 @@ enum {
246 MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e, 247 MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e,
247 MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940, 248 MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940,
248 MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941, 249 MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
250 MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT = 0x942,
249 MLX5_CMD_OP_FPGA_CREATE_QP = 0x960, 251 MLX5_CMD_OP_FPGA_CREATE_QP = 0x960,
250 MLX5_CMD_OP_FPGA_MODIFY_QP = 0x961, 252 MLX5_CMD_OP_FPGA_MODIFY_QP = 0x961,
251 MLX5_CMD_OP_FPGA_QUERY_QP = 0x962, 253 MLX5_CMD_OP_FPGA_QUERY_QP = 0x962,
252 MLX5_CMD_OP_FPGA_DESTROY_QP = 0x963, 254 MLX5_CMD_OP_FPGA_DESTROY_QP = 0x963,
253 MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS = 0x964, 255 MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS = 0x964,
254 MLX5_CMD_OP_CREATE_GENERAL_OBJECT = 0xa00, 256 MLX5_CMD_OP_CREATE_GENERAL_OBJECT = 0xa00,
257 MLX5_CMD_OP_MODIFY_GENERAL_OBJECT = 0xa01,
258 MLX5_CMD_OP_QUERY_GENERAL_OBJECT = 0xa02,
255 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT = 0xa03, 259 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT = 0xa03,
256 MLX5_CMD_OP_MAX 260 MLX5_CMD_OP_MAX
257}; 261};
@@ -1185,6 +1189,7 @@ enum mlx5_flow_destination_type {
1185 1189
1186 MLX5_FLOW_DESTINATION_TYPE_PORT = 0x99, 1190 MLX5_FLOW_DESTINATION_TYPE_PORT = 0x99,
1187 MLX5_FLOW_DESTINATION_TYPE_COUNTER = 0x100, 1191 MLX5_FLOW_DESTINATION_TYPE_COUNTER = 0x100,
1192 MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM = 0x101,
1188}; 1193};
1189 1194
1190struct mlx5_ifc_dest_format_struct_bits { 1195struct mlx5_ifc_dest_format_struct_bits {
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 8712ff70995f..40b48e2133cb 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -202,6 +202,37 @@
202 202
203#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ 203#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
204 204
205/** check_shl_overflow() - Calculate a left-shifted value and check overflow
206 *
207 * @a: Value to be shifted
208 * @s: How many bits left to shift
209 * @d: Pointer to where to store the result
210 *
211 * Computes *@d = (@a << @s)
212 *
213 * Returns true if '*d' cannot hold the result or when 'a << s' doesn't
214 * make sense. Example conditions:
215 * - 'a << s' causes bits to be lost when stored in *d.
216 * - 's' is garbage (e.g. negative) or so large that the result of
217 * 'a << s' is guaranteed to be 0.
218 * - 'a' is negative.
219 * - 'a << s' sets the sign bit, if any, in '*d'.
220 *
221 * '*d' will hold the results of the attempted shift, but is not
222 * considered "safe for use" if false is returned.
223 */
224#define check_shl_overflow(a, s, d) ({ \
225 typeof(a) _a = a; \
226 typeof(s) _s = s; \
227 typeof(d) _d = d; \
228 u64 _a_full = _a; \
229 unsigned int _to_shift = \
230 _s >= 0 && _s < 8 * sizeof(*d) ? _s : 0; \
231 *_d = (_a_full << _to_shift); \
232 (_to_shift != _s || *_d < 0 || _a < 0 || \
233 (*_d >> _to_shift) != _a); \
234})
235
205/** 236/**
206 * array_size() - Calculate size of 2-dimensional array. 237 * array_size() - Calculate size of 2-dimensional array.
207 * 238 *
diff --git a/include/rdma/ib.h b/include/rdma/ib.h
index 66dbed0c146d..4f385ec54f80 100644
--- a/include/rdma/ib.h
+++ b/include/rdma/ib.h
@@ -53,12 +53,12 @@ struct ib_addr {
53#define sib_interface_id ib_u.uib_addr64[1] 53#define sib_interface_id ib_u.uib_addr64[1]
54}; 54};
55 55
56static inline int ib_addr_any(const struct ib_addr *a) 56static inline bool ib_addr_any(const struct ib_addr *a)
57{ 57{
58 return ((a->sib_addr64[0] | a->sib_addr64[1]) == 0); 58 return ((a->sib_addr64[0] | a->sib_addr64[1]) == 0);
59} 59}
60 60
61static inline int ib_addr_loopback(const struct ib_addr *a) 61static inline bool ib_addr_loopback(const struct ib_addr *a)
62{ 62{
63 return ((a->sib_addr32[0] | a->sib_addr32[1] | 63 return ((a->sib_addr32[0] | a->sib_addr32[1] |
64 a->sib_addr32[2] | (a->sib_addr32[3] ^ htonl(1))) == 0); 64 a->sib_addr32[2] | (a->sib_addr32[3] ^ htonl(1))) == 0);
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index c2c8b1fdeead..77c7908b7d73 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -58,6 +58,7 @@
58 * @bound_dev_if: An optional device interface index. 58 * @bound_dev_if: An optional device interface index.
59 * @transport: The transport type used. 59 * @transport: The transport type used.
60 * @net: Network namespace containing the bound_dev_if net_dev. 60 * @net: Network namespace containing the bound_dev_if net_dev.
61 * @sgid_attr: GID attribute to use for identified SGID
61 */ 62 */
62struct rdma_dev_addr { 63struct rdma_dev_addr {
63 unsigned char src_dev_addr[MAX_ADDR_LEN]; 64 unsigned char src_dev_addr[MAX_ADDR_LEN];
@@ -67,6 +68,7 @@ struct rdma_dev_addr {
67 int bound_dev_if; 68 int bound_dev_if;
68 enum rdma_transport_type transport; 69 enum rdma_transport_type transport;
69 struct net *net; 70 struct net *net;
71 const struct ib_gid_attr *sgid_attr;
70 enum rdma_network_type network; 72 enum rdma_network_type network;
71 int hoplimit; 73 int hoplimit;
72}; 74};
@@ -95,7 +97,7 @@ int rdma_translate_ip(const struct sockaddr *addr,
95 * or been canceled. A status of 0 indicates success. 97 * or been canceled. A status of 0 indicates success.
96 * @context: User-specified context associated with the call. 98 * @context: User-specified context associated with the call.
97 */ 99 */
98int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr, 100int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
99 struct rdma_dev_addr *addr, int timeout_ms, 101 struct rdma_dev_addr *addr, int timeout_ms,
100 void (*callback)(int status, struct sockaddr *src_addr, 102 void (*callback)(int status, struct sockaddr *src_addr,
101 struct rdma_dev_addr *addr, void *context), 103 struct rdma_dev_addr *addr, void *context),
@@ -107,7 +109,7 @@ void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
107 const struct net_device *dev, 109 const struct net_device *dev,
108 const unsigned char *dst_dev_addr); 110 const unsigned char *dst_dev_addr);
109 111
110int rdma_addr_size(struct sockaddr *addr); 112int rdma_addr_size(const struct sockaddr *addr);
111int rdma_addr_size_in6(struct sockaddr_in6 *addr); 113int rdma_addr_size_in6(struct sockaddr_in6 *addr);
112int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr); 114int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr);
113 115
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index a5f249828115..3e11e7cc60b7 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -37,45 +37,23 @@
37 37
38#include <rdma/ib_verbs.h> 38#include <rdma/ib_verbs.h>
39 39
40/** 40int rdma_query_gid(struct ib_device *device, u8 port_num, int index,
41 * ib_get_cached_gid - Returns a cached GID table entry 41 union ib_gid *gid);
42 * @device: The device to query. 42const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
43 * @port_num: The port number of the device to query. 43 const union ib_gid *gid,
44 * @index: The index into the cached GID table to query. 44 enum ib_gid_type gid_type,
45 * @gid: The GID value found at the specified index. 45 struct net_device *ndev);
46 * @attr: The GID attribute found at the specified index (only in RoCE). 46const struct ib_gid_attr *rdma_find_gid_by_port(struct ib_device *ib_dev,
47 * NULL means ignore (output parameter). 47 const union ib_gid *gid,
48 * 48 enum ib_gid_type gid_type,
49 * ib_get_cached_gid() fetches the specified GID table entry stored in 49 u8 port,
50 * the local software cache. 50 struct net_device *ndev);
51 */ 51const struct ib_gid_attr *rdma_find_gid_by_filter(
52int ib_get_cached_gid(struct ib_device *device, 52 struct ib_device *device, const union ib_gid *gid, u8 port_num,
53 u8 port_num, 53 bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
54 int index, 54 void *),
55 union ib_gid *gid, 55 void *context);
56 struct ib_gid_attr *attr);
57
58int ib_find_cached_gid(struct ib_device *device,
59 const union ib_gid *gid,
60 enum ib_gid_type gid_type,
61 struct net_device *ndev,
62 u8 *port_num,
63 u16 *index);
64
65int ib_find_cached_gid_by_port(struct ib_device *device,
66 const union ib_gid *gid,
67 enum ib_gid_type gid_type,
68 u8 port_num,
69 struct net_device *ndev,
70 u16 *index);
71 56
72int ib_find_gid_by_filter(struct ib_device *device,
73 const union ib_gid *gid,
74 u8 port_num,
75 bool (*filter)(const union ib_gid *gid,
76 const struct ib_gid_attr *,
77 void *),
78 void *context, u16 *index);
79/** 57/**
80 * ib_get_cached_pkey - Returns a cached PKey table entry 58 * ib_get_cached_pkey - Returns a cached PKey table entry
81 * @device: The device to query. 59 * @device: The device to query.
@@ -150,4 +128,33 @@ int ib_get_cached_port_state(struct ib_device *device,
150 enum ib_port_state *port_active); 128 enum ib_port_state *port_active);
151 129
152bool rdma_is_zero_gid(const union ib_gid *gid); 130bool rdma_is_zero_gid(const union ib_gid *gid);
131const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device,
132 u8 port_num, int index);
133void rdma_put_gid_attr(const struct ib_gid_attr *attr);
134void rdma_hold_gid_attr(const struct ib_gid_attr *attr);
135
136/*
137 * This is to be removed. It only exists to make merging rdma and smc simpler.
138 */
139static inline __deprecated int ib_query_gid(struct ib_device *device,
140 u8 port_num, int index,
141 union ib_gid *gid,
142 struct ib_gid_attr *attr_out)
143{
144 const struct ib_gid_attr *attr;
145
146 memset(attr_out, 0, sizeof(*attr_out));
147 attr = rdma_get_gid_attr(device, port_num, index);
148 if (IS_ERR(attr))
149 return PTR_ERR(attr);
150
151 if (attr->ndev)
152 dev_hold(attr->ndev);
153 *attr_out = *attr;
154
155 rdma_put_gid_attr(attr);
156
157 return 0;
158}
159
153#endif /* _IB_CACHE_H */ 160#endif /* _IB_CACHE_H */
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 7979cb04f529..c10f4b5ea8ab 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -120,6 +120,13 @@ struct ib_cm_req_event_param {
120 struct sa_path_rec *primary_path; 120 struct sa_path_rec *primary_path;
121 struct sa_path_rec *alternate_path; 121 struct sa_path_rec *alternate_path;
122 122
123 /*
124 * SGID attribute of the primary path. Currently only
125 * useful for RoCE. Alternate path GID attributes
126 * are not yet supported.
127 */
128 const struct ib_gid_attr *ppath_sgid_attr;
129
123 __be64 remote_ca_guid; 130 __be64 remote_ca_guid;
124 u32 remote_qkey; 131 u32 remote_qkey;
125 u32 remote_qpn; 132 u32 remote_qpn;
@@ -226,6 +233,12 @@ struct ib_cm_apr_event_param {
226struct ib_cm_sidr_req_event_param { 233struct ib_cm_sidr_req_event_param {
227 struct ib_cm_id *listen_id; 234 struct ib_cm_id *listen_id;
228 __be64 service_id; 235 __be64 service_id;
236
237 /*
238 * SGID attribute of the request. Currently only
239 * useful for RoCE.
240 */
241 const struct ib_gid_attr *sgid_attr;
229 /* P_Key that was used by the GMP's BTH header */ 242 /* P_Key that was used by the GMP's BTH header */
230 u16 bth_pkey; 243 u16 bth_pkey;
231 u8 port; 244 u8 port;
@@ -246,6 +259,7 @@ struct ib_cm_sidr_rep_event_param {
246 u32 qkey; 259 u32 qkey;
247 u32 qpn; 260 u32 qpn;
248 void *info; 261 void *info;
262 const struct ib_gid_attr *sgid_attr;
249 u8 info_len; 263 u8 info_len;
250}; 264};
251 265
@@ -297,7 +311,7 @@ struct ib_cm_event {
297 * destroy the @cm_id after the callback completes. 311 * destroy the @cm_id after the callback completes.
298 */ 312 */
299typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id, 313typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id,
300 struct ib_cm_event *event); 314 const struct ib_cm_event *event);
301 315
302struct ib_cm_id { 316struct ib_cm_id {
303 ib_cm_handler cm_handler; 317 ib_cm_handler cm_handler;
@@ -365,6 +379,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
365struct ib_cm_req_param { 379struct ib_cm_req_param {
366 struct sa_path_rec *primary_path; 380 struct sa_path_rec *primary_path;
367 struct sa_path_rec *alternate_path; 381 struct sa_path_rec *alternate_path;
382 const struct ib_gid_attr *ppath_sgid_attr;
368 __be64 service_id; 383 __be64 service_id;
369 u32 qp_num; 384 u32 qp_num;
370 enum ib_qp_type qp_type; 385 enum ib_qp_type qp_type;
@@ -566,6 +581,7 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id,
566 581
567struct ib_cm_sidr_req_param { 582struct ib_cm_sidr_req_param {
568 struct sa_path_rec *path; 583 struct sa_path_rec *path;
584 const struct ib_gid_attr *sgid_attr;
569 __be64 service_id; 585 __be64 service_id;
570 int timeout_ms; 586 int timeout_ms;
571 const void *private_data; 587 const void *private_data;
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 2f4f1768ded4..f6ba366051c7 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -262,6 +262,39 @@ struct ib_class_port_info {
262 __be32 trap_qkey; 262 __be32 trap_qkey;
263}; 263};
264 264
265/* PortInfo CapabilityMask */
266enum ib_port_capability_mask_bits {
267 IB_PORT_SM = 1 << 1,
268 IB_PORT_NOTICE_SUP = 1 << 2,
269 IB_PORT_TRAP_SUP = 1 << 3,
270 IB_PORT_OPT_IPD_SUP = 1 << 4,
271 IB_PORT_AUTO_MIGR_SUP = 1 << 5,
272 IB_PORT_SL_MAP_SUP = 1 << 6,
273 IB_PORT_MKEY_NVRAM = 1 << 7,
274 IB_PORT_PKEY_NVRAM = 1 << 8,
275 IB_PORT_LED_INFO_SUP = 1 << 9,
276 IB_PORT_SM_DISABLED = 1 << 10,
277 IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
278 IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
279 IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
280 IB_PORT_CM_SUP = 1 << 16,
281 IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
282 IB_PORT_REINIT_SUP = 1 << 18,
283 IB_PORT_DEVICE_MGMT_SUP = 1 << 19,
284 IB_PORT_VENDOR_CLASS_SUP = 1 << 20,
285 IB_PORT_DR_NOTICE_SUP = 1 << 21,
286 IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22,
287 IB_PORT_BOOT_MGMT_SUP = 1 << 23,
288 IB_PORT_LINK_LATENCY_SUP = 1 << 24,
289 IB_PORT_CLIENT_REG_SUP = 1 << 25,
290 IB_PORT_OTHER_LOCAL_CHANGES_SUP = 1 << 26,
291 IB_PORT_LINK_SPEED_WIDTH_TABLE_SUP = 1 << 27,
292 IB_PORT_VENDOR_SPECIFIC_MADS_TABLE_SUP = 1 << 28,
293 IB_PORT_MCAST_PKEY_TRAP_SUPPRESSION_SUP = 1 << 29,
294 IB_PORT_MCAST_FDB_TOP_SUP = 1 << 30,
295 IB_PORT_HIERARCHY_INFO_SUP = 1ULL << 31,
296};
297
265#define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26) 298#define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26)
266 299
267struct opa_class_port_info { 300struct opa_class_port_info {
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index bacb144f7780..b6ddf2a1b9d8 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -172,12 +172,7 @@ struct sa_path_rec_ib {
172 */ 172 */
173struct sa_path_rec_roce { 173struct sa_path_rec_roce {
174 bool route_resolved; 174 bool route_resolved;
175 u8 dmac[ETH_ALEN]; 175 u8 dmac[ETH_ALEN];
176 /* ignored in IB */
177 int ifindex;
178 /* ignored in IB */
179 struct net *net;
180
181}; 176};
182 177
183struct sa_path_rec_opa { 178struct sa_path_rec_opa {
@@ -556,13 +551,10 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
556 enum ib_gid_type gid_type, 551 enum ib_gid_type gid_type,
557 struct rdma_ah_attr *ah_attr); 552 struct rdma_ah_attr *ah_attr);
558 553
559/**
560 * ib_init_ah_attr_from_path - Initialize address handle attributes based on
561 * an SA path record.
562 */
563int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, 554int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
564 struct sa_path_rec *rec, 555 struct sa_path_rec *rec,
565 struct rdma_ah_attr *ah_attr); 556 struct rdma_ah_attr *ah_attr,
557 const struct ib_gid_attr *sgid_attr);
566 558
567/** 559/**
568 * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec 560 * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec
@@ -667,45 +659,10 @@ static inline void sa_path_set_dmac_zero(struct sa_path_rec *rec)
667 eth_zero_addr(rec->roce.dmac); 659 eth_zero_addr(rec->roce.dmac);
668} 660}
669 661
670static inline void sa_path_set_ifindex(struct sa_path_rec *rec, int ifindex)
671{
672 if (sa_path_is_roce(rec))
673 rec->roce.ifindex = ifindex;
674}
675
676static inline void sa_path_set_ndev(struct sa_path_rec *rec, struct net *net)
677{
678 if (sa_path_is_roce(rec))
679 rec->roce.net = net;
680}
681
682static inline u8 *sa_path_get_dmac(struct sa_path_rec *rec) 662static inline u8 *sa_path_get_dmac(struct sa_path_rec *rec)
683{ 663{
684 if (sa_path_is_roce(rec)) 664 if (sa_path_is_roce(rec))
685 return rec->roce.dmac; 665 return rec->roce.dmac;
686 return NULL; 666 return NULL;
687} 667}
688
689static inline int sa_path_get_ifindex(struct sa_path_rec *rec)
690{
691 if (sa_path_is_roce(rec))
692 return rec->roce.ifindex;
693 return 0;
694}
695
696static inline struct net *sa_path_get_ndev(struct sa_path_rec *rec)
697{
698 if (sa_path_is_roce(rec))
699 return rec->roce.net;
700 return NULL;
701}
702
703static inline struct net_device *ib_get_ndev_from_path(struct sa_path_rec *rec)
704{
705 return sa_path_get_ndev(rec) ?
706 dev_get_by_index(sa_path_get_ndev(rec),
707 sa_path_get_ifindex(rec))
708 : NULL;
709}
710
711#endif /* IB_SA_H */ 668#endif /* IB_SA_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 6c003995347a..e950c2a68f06 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -94,6 +94,7 @@ enum ib_gid_type {
94struct ib_gid_attr { 94struct ib_gid_attr {
95 struct net_device *ndev; 95 struct net_device *ndev;
96 struct ib_device *device; 96 struct ib_device *device;
97 union ib_gid gid;
97 enum ib_gid_type gid_type; 98 enum ib_gid_type gid_type;
98 u16 index; 99 u16 index;
99 u8 port_num; 100 u8 port_num;
@@ -148,13 +149,13 @@ static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type net
148 return IB_GID_TYPE_IB; 149 return IB_GID_TYPE_IB;
149} 150}
150 151
151static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type, 152static inline enum rdma_network_type
152 union ib_gid *gid) 153rdma_gid_attr_network_type(const struct ib_gid_attr *attr)
153{ 154{
154 if (gid_type == IB_GID_TYPE_IB) 155 if (attr->gid_type == IB_GID_TYPE_IB)
155 return RDMA_NETWORK_IB; 156 return RDMA_NETWORK_IB;
156 157
157 if (ipv6_addr_v4mapped((struct in6_addr *)gid)) 158 if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid))
158 return RDMA_NETWORK_IPV4; 159 return RDMA_NETWORK_IPV4;
159 else 160 else
160 return RDMA_NETWORK_IPV6; 161 return RDMA_NETWORK_IPV6;
@@ -344,7 +345,8 @@ struct ib_device_attr {
344 int max_qp; 345 int max_qp;
345 int max_qp_wr; 346 int max_qp_wr;
346 u64 device_cap_flags; 347 u64 device_cap_flags;
347 int max_sge; 348 int max_send_sge;
349 int max_recv_sge;
348 int max_sge_rd; 350 int max_sge_rd;
349 int max_cq; 351 int max_cq;
350 int max_cqe; 352 int max_cqe;
@@ -430,33 +432,6 @@ enum ib_port_state {
430 IB_PORT_ACTIVE_DEFER = 5 432 IB_PORT_ACTIVE_DEFER = 5
431}; 433};
432 434
433enum ib_port_cap_flags {
434 IB_PORT_SM = 1 << 1,
435 IB_PORT_NOTICE_SUP = 1 << 2,
436 IB_PORT_TRAP_SUP = 1 << 3,
437 IB_PORT_OPT_IPD_SUP = 1 << 4,
438 IB_PORT_AUTO_MIGR_SUP = 1 << 5,
439 IB_PORT_SL_MAP_SUP = 1 << 6,
440 IB_PORT_MKEY_NVRAM = 1 << 7,
441 IB_PORT_PKEY_NVRAM = 1 << 8,
442 IB_PORT_LED_INFO_SUP = 1 << 9,
443 IB_PORT_SM_DISABLED = 1 << 10,
444 IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
445 IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
446 IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
447 IB_PORT_CM_SUP = 1 << 16,
448 IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
449 IB_PORT_REINIT_SUP = 1 << 18,
450 IB_PORT_DEVICE_MGMT_SUP = 1 << 19,
451 IB_PORT_VENDOR_CLASS_SUP = 1 << 20,
452 IB_PORT_DR_NOTICE_SUP = 1 << 21,
453 IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22,
454 IB_PORT_BOOT_MGMT_SUP = 1 << 23,
455 IB_PORT_LINK_LATENCY_SUP = 1 << 24,
456 IB_PORT_CLIENT_REG_SUP = 1 << 25,
457 IB_PORT_IP_BASED_GIDS = 1 << 26,
458};
459
460enum ib_port_width { 435enum ib_port_width {
461 IB_WIDTH_1X = 1, 436 IB_WIDTH_1X = 1,
462 IB_WIDTH_4X = 2, 437 IB_WIDTH_4X = 2,
@@ -554,6 +529,7 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
554#define RDMA_CORE_CAP_AF_IB 0x00001000 529#define RDMA_CORE_CAP_AF_IB 0x00001000
555#define RDMA_CORE_CAP_ETH_AH 0x00002000 530#define RDMA_CORE_CAP_ETH_AH 0x00002000
556#define RDMA_CORE_CAP_OPA_AH 0x00004000 531#define RDMA_CORE_CAP_OPA_AH 0x00004000
532#define RDMA_CORE_CAP_IB_GRH_REQUIRED 0x00008000
557 533
558/* Protocol 0xFFF00000 */ 534/* Protocol 0xFFF00000 */
559#define RDMA_CORE_CAP_PROT_IB 0x00100000 535#define RDMA_CORE_CAP_PROT_IB 0x00100000
@@ -563,6 +539,10 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
563#define RDMA_CORE_CAP_PROT_RAW_PACKET 0x01000000 539#define RDMA_CORE_CAP_PROT_RAW_PACKET 0x01000000
564#define RDMA_CORE_CAP_PROT_USNIC 0x02000000 540#define RDMA_CORE_CAP_PROT_USNIC 0x02000000
565 541
542#define RDMA_CORE_PORT_IB_GRH_REQUIRED (RDMA_CORE_CAP_IB_GRH_REQUIRED \
543 | RDMA_CORE_CAP_PROT_ROCE \
544 | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP)
545
566#define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \ 546#define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \
567 | RDMA_CORE_CAP_IB_MAD \ 547 | RDMA_CORE_CAP_IB_MAD \
568 | RDMA_CORE_CAP_IB_SMI \ 548 | RDMA_CORE_CAP_IB_SMI \
@@ -595,6 +575,8 @@ struct ib_port_attr {
595 enum ib_mtu max_mtu; 575 enum ib_mtu max_mtu;
596 enum ib_mtu active_mtu; 576 enum ib_mtu active_mtu;
597 int gid_tbl_len; 577 int gid_tbl_len;
578 unsigned int ip_gids:1;
579 /* This is the value from PortInfo CapabilityMask, defined by IBA */
598 u32 port_cap_flags; 580 u32 port_cap_flags;
599 u32 max_msg_sz; 581 u32 max_msg_sz;
600 u32 bad_pkey_cntr; 582 u32 bad_pkey_cntr;
@@ -610,7 +592,6 @@ struct ib_port_attr {
610 u8 active_width; 592 u8 active_width;
611 u8 active_speed; 593 u8 active_speed;
612 u8 phys_state; 594 u8 phys_state;
613 bool grh_required;
614}; 595};
615 596
616enum ib_device_modify_flags { 597enum ib_device_modify_flags {
@@ -689,6 +670,7 @@ struct ib_event_handler {
689 } while (0) 670 } while (0)
690 671
691struct ib_global_route { 672struct ib_global_route {
673 const struct ib_gid_attr *sgid_attr;
692 union ib_gid dgid; 674 union ib_gid dgid;
693 u32 flow_label; 675 u32 flow_label;
694 u8 sgid_index; 676 u8 sgid_index;
@@ -1370,7 +1352,7 @@ struct ib_rdma_wr {
1370 u32 rkey; 1352 u32 rkey;
1371}; 1353};
1372 1354
1373static inline struct ib_rdma_wr *rdma_wr(struct ib_send_wr *wr) 1355static inline const struct ib_rdma_wr *rdma_wr(const struct ib_send_wr *wr)
1374{ 1356{
1375 return container_of(wr, struct ib_rdma_wr, wr); 1357 return container_of(wr, struct ib_rdma_wr, wr);
1376} 1358}
@@ -1385,7 +1367,7 @@ struct ib_atomic_wr {
1385 u32 rkey; 1367 u32 rkey;
1386}; 1368};
1387 1369
1388static inline struct ib_atomic_wr *atomic_wr(struct ib_send_wr *wr) 1370static inline const struct ib_atomic_wr *atomic_wr(const struct ib_send_wr *wr)
1389{ 1371{
1390 return container_of(wr, struct ib_atomic_wr, wr); 1372 return container_of(wr, struct ib_atomic_wr, wr);
1391} 1373}
@@ -1402,7 +1384,7 @@ struct ib_ud_wr {
1402 u8 port_num; /* valid for DR SMPs on switch only */ 1384 u8 port_num; /* valid for DR SMPs on switch only */
1403}; 1385};
1404 1386
1405static inline struct ib_ud_wr *ud_wr(struct ib_send_wr *wr) 1387static inline const struct ib_ud_wr *ud_wr(const struct ib_send_wr *wr)
1406{ 1388{
1407 return container_of(wr, struct ib_ud_wr, wr); 1389 return container_of(wr, struct ib_ud_wr, wr);
1408} 1390}
@@ -1414,7 +1396,7 @@ struct ib_reg_wr {
1414 int access; 1396 int access;
1415}; 1397};
1416 1398
1417static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr) 1399static inline const struct ib_reg_wr *reg_wr(const struct ib_send_wr *wr)
1418{ 1400{
1419 return container_of(wr, struct ib_reg_wr, wr); 1401 return container_of(wr, struct ib_reg_wr, wr);
1420} 1402}
@@ -1427,7 +1409,8 @@ struct ib_sig_handover_wr {
1427 struct ib_sge *prot; 1409 struct ib_sge *prot;
1428}; 1410};
1429 1411
1430static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr) 1412static inline const struct ib_sig_handover_wr *
1413sig_handover_wr(const struct ib_send_wr *wr)
1431{ 1414{
1432 return container_of(wr, struct ib_sig_handover_wr, wr); 1415 return container_of(wr, struct ib_sig_handover_wr, wr);
1433} 1416}
@@ -1443,14 +1426,16 @@ struct ib_recv_wr {
1443}; 1426};
1444 1427
1445enum ib_access_flags { 1428enum ib_access_flags {
1446 IB_ACCESS_LOCAL_WRITE = 1, 1429 IB_ACCESS_LOCAL_WRITE = IB_UVERBS_ACCESS_LOCAL_WRITE,
1447 IB_ACCESS_REMOTE_WRITE = (1<<1), 1430 IB_ACCESS_REMOTE_WRITE = IB_UVERBS_ACCESS_REMOTE_WRITE,
1448 IB_ACCESS_REMOTE_READ = (1<<2), 1431 IB_ACCESS_REMOTE_READ = IB_UVERBS_ACCESS_REMOTE_READ,
1449 IB_ACCESS_REMOTE_ATOMIC = (1<<3), 1432 IB_ACCESS_REMOTE_ATOMIC = IB_UVERBS_ACCESS_REMOTE_ATOMIC,
1450 IB_ACCESS_MW_BIND = (1<<4), 1433 IB_ACCESS_MW_BIND = IB_UVERBS_ACCESS_MW_BIND,
1451 IB_ZERO_BASED = (1<<5), 1434 IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED,
1452 IB_ACCESS_ON_DEMAND = (1<<6), 1435 IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
1453 IB_ACCESS_HUGETLB = (1<<7), 1436 IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
1437
1438 IB_ACCESS_SUPPORTED = ((IB_ACCESS_HUGETLB << 1) - 1)
1454}; 1439};
1455 1440
1456/* 1441/*
@@ -1473,14 +1458,17 @@ struct ib_fmr_attr {
1473struct ib_umem; 1458struct ib_umem;
1474 1459
1475enum rdma_remove_reason { 1460enum rdma_remove_reason {
1476 /* Userspace requested uobject deletion. Call could fail */ 1461 /*
1462 * Userspace requested uobject deletion or initial try
1463 * to remove uobject via cleanup. Call could fail
1464 */
1477 RDMA_REMOVE_DESTROY, 1465 RDMA_REMOVE_DESTROY,
1478 /* Context deletion. This call should delete the actual object itself */ 1466 /* Context deletion. This call should delete the actual object itself */
1479 RDMA_REMOVE_CLOSE, 1467 RDMA_REMOVE_CLOSE,
1480 /* Driver is being hot-unplugged. This call should delete the actual object itself */ 1468 /* Driver is being hot-unplugged. This call should delete the actual object itself */
1481 RDMA_REMOVE_DRIVER_REMOVE, 1469 RDMA_REMOVE_DRIVER_REMOVE,
1482 /* Context is being cleaned-up, but commit was just completed */ 1470 /* uobj is being cleaned-up before being committed */
1483 RDMA_REMOVE_DURING_CLEANUP, 1471 RDMA_REMOVE_ABORT,
1484}; 1472};
1485 1473
1486struct ib_rdmacg_object { 1474struct ib_rdmacg_object {
@@ -1492,14 +1480,14 @@ struct ib_rdmacg_object {
1492struct ib_ucontext { 1480struct ib_ucontext {
1493 struct ib_device *device; 1481 struct ib_device *device;
1494 struct ib_uverbs_file *ufile; 1482 struct ib_uverbs_file *ufile;
1483 /*
1484 * 'closing' can be read by the driver only during a destroy callback,
1485 * it is set when we are closing the file descriptor and indicates
1486 * that mm_sem may be locked.
1487 */
1495 int closing; 1488 int closing;
1496 1489
1497 /* locking the uobjects_list */ 1490 bool cleanup_retryable;
1498 struct mutex uobjects_lock;
1499 struct list_head uobjects;
1500 /* protects cleanup process from other actions */
1501 struct rw_semaphore cleanup_rwsem;
1502 enum rdma_remove_reason cleanup_reason;
1503 1491
1504 struct pid *tgid; 1492 struct pid *tgid;
1505#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1493#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
@@ -1524,6 +1512,9 @@ struct ib_ucontext {
1524 1512
1525struct ib_uobject { 1513struct ib_uobject {
1526 u64 user_handle; /* handle given to us by userspace */ 1514 u64 user_handle; /* handle given to us by userspace */
1515 /* ufile & ucontext owning this object */
1516 struct ib_uverbs_file *ufile;
1517 /* FIXME, save memory: ufile->context == context */
1527 struct ib_ucontext *context; /* associated user context */ 1518 struct ib_ucontext *context; /* associated user context */
1528 void *object; /* containing object */ 1519 void *object; /* containing object */
1529 struct list_head list; /* link to context's list */ 1520 struct list_head list; /* link to context's list */
@@ -1533,13 +1524,7 @@ struct ib_uobject {
1533 atomic_t usecnt; /* protects exclusive access */ 1524 atomic_t usecnt; /* protects exclusive access */
1534 struct rcu_head rcu; /* kfree_rcu() overhead */ 1525 struct rcu_head rcu; /* kfree_rcu() overhead */
1535 1526
1536 const struct uverbs_obj_type *type; 1527 const struct uverbs_api_object *uapi_object;
1537};
1538
1539struct ib_uobject_file {
1540 struct ib_uobject uobj;
1541 /* ufile contains the lock between context release and file close */
1542 struct ib_uverbs_file *ufile;
1543}; 1528};
1544 1529
1545struct ib_udata { 1530struct ib_udata {
@@ -1578,6 +1563,7 @@ struct ib_ah {
1578 struct ib_device *device; 1563 struct ib_device *device;
1579 struct ib_pd *pd; 1564 struct ib_pd *pd;
1580 struct ib_uobject *uobject; 1565 struct ib_uobject *uobject;
1566 const struct ib_gid_attr *sgid_attr;
1581 enum rdma_ah_attr_type type; 1567 enum rdma_ah_attr_type type;
1582}; 1568};
1583 1569
@@ -1776,6 +1762,9 @@ struct ib_qp {
1776 struct ib_uobject *uobject; 1762 struct ib_uobject *uobject;
1777 void (*event_handler)(struct ib_event *, void *); 1763 void (*event_handler)(struct ib_event *, void *);
1778 void *qp_context; 1764 void *qp_context;
1765 /* sgid_attrs associated with the AV's */
1766 const struct ib_gid_attr *av_sgid_attr;
1767 const struct ib_gid_attr *alt_path_sgid_attr;
1779 u32 qp_num; 1768 u32 qp_num;
1780 u32 max_write_sge; 1769 u32 max_write_sge;
1781 u32 max_read_sge; 1770 u32 max_read_sge;
@@ -2098,6 +2087,7 @@ struct ib_flow_attr {
2098 2087
2099struct ib_flow { 2088struct ib_flow {
2100 struct ib_qp *qp; 2089 struct ib_qp *qp;
2090 struct ib_device *device;
2101 struct ib_uobject *uobject; 2091 struct ib_uobject *uobject;
2102}; 2092};
2103 2093
@@ -2213,7 +2203,11 @@ struct rdma_netdev {
2213 struct ib_device *hca; 2203 struct ib_device *hca;
2214 u8 port_num; 2204 u8 port_num;
2215 2205
2216 /* cleanup function must be specified */ 2206 /*
2207 * cleanup function must be specified.
2208 * FIXME: This is only used for OPA_VNIC and that usage should be
2209 * removed too.
2210 */
2217 void (*free_rdma_netdev)(struct net_device *netdev); 2211 void (*free_rdma_netdev)(struct net_device *netdev);
2218 2212
2219 /* control functions */ 2213 /* control functions */
@@ -2242,11 +2236,6 @@ struct ib_counters {
2242 atomic_t usecnt; 2236 atomic_t usecnt;
2243}; 2237};
2244 2238
2245enum ib_read_counters_flags {
2246 /* prefer read values from driver cache */
2247 IB_READ_COUNTERS_ATTR_PREFER_CACHED = 1 << 0,
2248};
2249
2250struct ib_counters_read_attr { 2239struct ib_counters_read_attr {
2251 u64 *counters_buff; 2240 u64 *counters_buff;
2252 u32 ncounters; 2241 u32 ncounters;
@@ -2341,8 +2330,7 @@ struct ib_device {
2341 * concurrently for different ports. This function is only called when 2330 * concurrently for different ports. This function is only called when
2342 * roce_gid_table is used. 2331 * roce_gid_table is used.
2343 */ 2332 */
2344 int (*add_gid)(const union ib_gid *gid, 2333 int (*add_gid)(const struct ib_gid_attr *attr,
2345 const struct ib_gid_attr *attr,
2346 void **context); 2334 void **context);
2347 /* When calling del_gid, the HW vendor's driver should delete the 2335 /* When calling del_gid, the HW vendor's driver should delete the
2348 * gid of device @device at gid index gid_index of port port_num 2336 * gid of device @device at gid index gid_index of port port_num
@@ -2389,8 +2377,8 @@ struct ib_device {
2389 struct ib_srq_attr *srq_attr); 2377 struct ib_srq_attr *srq_attr);
2390 int (*destroy_srq)(struct ib_srq *srq); 2378 int (*destroy_srq)(struct ib_srq *srq);
2391 int (*post_srq_recv)(struct ib_srq *srq, 2379 int (*post_srq_recv)(struct ib_srq *srq,
2392 struct ib_recv_wr *recv_wr, 2380 const struct ib_recv_wr *recv_wr,
2393 struct ib_recv_wr **bad_recv_wr); 2381 const struct ib_recv_wr **bad_recv_wr);
2394 struct ib_qp * (*create_qp)(struct ib_pd *pd, 2382 struct ib_qp * (*create_qp)(struct ib_pd *pd,
2395 struct ib_qp_init_attr *qp_init_attr, 2383 struct ib_qp_init_attr *qp_init_attr,
2396 struct ib_udata *udata); 2384 struct ib_udata *udata);
@@ -2404,11 +2392,11 @@ struct ib_device {
2404 struct ib_qp_init_attr *qp_init_attr); 2392 struct ib_qp_init_attr *qp_init_attr);
2405 int (*destroy_qp)(struct ib_qp *qp); 2393 int (*destroy_qp)(struct ib_qp *qp);
2406 int (*post_send)(struct ib_qp *qp, 2394 int (*post_send)(struct ib_qp *qp,
2407 struct ib_send_wr *send_wr, 2395 const struct ib_send_wr *send_wr,
2408 struct ib_send_wr **bad_send_wr); 2396 const struct ib_send_wr **bad_send_wr);
2409 int (*post_recv)(struct ib_qp *qp, 2397 int (*post_recv)(struct ib_qp *qp,
2410 struct ib_recv_wr *recv_wr, 2398 const struct ib_recv_wr *recv_wr,
2411 struct ib_recv_wr **bad_recv_wr); 2399 const struct ib_recv_wr **bad_recv_wr);
2412 struct ib_cq * (*create_cq)(struct ib_device *device, 2400 struct ib_cq * (*create_cq)(struct ib_device *device,
2413 const struct ib_cq_init_attr *attr, 2401 const struct ib_cq_init_attr *attr,
2414 struct ib_ucontext *context, 2402 struct ib_ucontext *context,
@@ -2592,7 +2580,7 @@ struct ib_device {
2592 const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev, 2580 const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
2593 int comp_vector); 2581 int comp_vector);
2594 2582
2595 struct uverbs_root_spec *specs_root; 2583 const struct uverbs_object_tree_def *const *driver_specs;
2596 enum rdma_driver_id driver_id; 2584 enum rdma_driver_id driver_id;
2597}; 2585};
2598 2586
@@ -2679,6 +2667,46 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata,
2679} 2667}
2680 2668
2681/** 2669/**
2670 * ib_is_destroy_retryable - Check whether the uobject destruction
2671 * is retryable.
2672 * @ret: The initial destruction return code
2673 * @why: remove reason
2674 * @uobj: The uobject that is destroyed
2675 *
2676 * This function is a helper function that IB layer and low-level drivers
2677 * can use to consider whether the destruction of the given uobject is
2678 * retry-able.
2679 * It checks the original return code, if it wasn't success the destruction
2680 * is retryable according to the ucontext state (i.e. cleanup_retryable) and
2681 * the remove reason. (i.e. why).
2682 * Must be called with the object locked for destroy.
2683 */
2684static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why,
2685 struct ib_uobject *uobj)
2686{
2687 return ret && (why == RDMA_REMOVE_DESTROY ||
2688 uobj->context->cleanup_retryable);
2689}
2690
2691/**
2692 * ib_destroy_usecnt - Called during destruction to check the usecnt
2693 * @usecnt: The usecnt atomic
2694 * @why: remove reason
2695 * @uobj: The uobject that is destroyed
2696 *
2697 * Non-zero usecnts will block destruction unless destruction was triggered by
2698 * a ucontext cleanup.
2699 */
2700static inline int ib_destroy_usecnt(atomic_t *usecnt,
2701 enum rdma_remove_reason why,
2702 struct ib_uobject *uobj)
2703{
2704 if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj))
2705 return -EBUSY;
2706 return 0;
2707}
2708
2709/**
2682 * ib_modify_qp_is_ok - Check that the supplied attribute mask 2710 * ib_modify_qp_is_ok - Check that the supplied attribute mask
2683 * contains all required attributes and no attributes not allowed for 2711 * contains all required attributes and no attributes not allowed for
2684 * the given QP state transition. 2712 * the given QP state transition.
@@ -2755,6 +2783,13 @@ static inline int rdma_is_port_valid(const struct ib_device *device,
2755 port <= rdma_end_port(device)); 2783 port <= rdma_end_port(device));
2756} 2784}
2757 2785
2786static inline bool rdma_is_grh_required(const struct ib_device *device,
2787 u8 port_num)
2788{
2789 return device->port_immutable[port_num].core_cap_flags &
2790 RDMA_CORE_PORT_IB_GRH_REQUIRED;
2791}
2792
2758static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num) 2793static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
2759{ 2794{
2760 return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB; 2795 return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB;
@@ -3046,10 +3081,6 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num)
3046 return rdma_protocol_iwarp(dev, port_num); 3081 return rdma_protocol_iwarp(dev, port_num);
3047} 3082}
3048 3083
3049int ib_query_gid(struct ib_device *device,
3050 u8 port_num, int index, union ib_gid *gid,
3051 struct ib_gid_attr *attr);
3052
3053int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, 3084int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
3054 int state); 3085 int state);
3055int ib_get_vf_config(struct ib_device *device, int vf, u8 port, 3086int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
@@ -3148,6 +3179,13 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr);
3148 * ignored unless the work completion indicates that the GRH is valid. 3179 * ignored unless the work completion indicates that the GRH is valid.
3149 * @ah_attr: Returned attributes that can be used when creating an address 3180 * @ah_attr: Returned attributes that can be used when creating an address
3150 * handle for replying to the message. 3181 * handle for replying to the message.
3182 * When ib_init_ah_attr_from_wc() returns success,
3183 * (a) for IB link layer it optionally contains a reference to SGID attribute
3184 * when GRH is present for IB link layer.
3185 * (b) for RoCE link layer it contains a reference to SGID attribute.
3186 * User must invoke rdma_cleanup_ah_attr_gid_attr() to release reference to SGID
3187 * attributes which are initialized using ib_init_ah_attr_from_wc().
3188 *
3151 */ 3189 */
3152int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, 3190int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
3153 const struct ib_wc *wc, const struct ib_grh *grh, 3191 const struct ib_wc *wc, const struct ib_grh *grh,
@@ -3247,10 +3285,12 @@ int ib_destroy_srq(struct ib_srq *srq);
3247 * the work request that failed to be posted on the QP. 3285 * the work request that failed to be posted on the QP.
3248 */ 3286 */
3249static inline int ib_post_srq_recv(struct ib_srq *srq, 3287static inline int ib_post_srq_recv(struct ib_srq *srq,
3250 struct ib_recv_wr *recv_wr, 3288 const struct ib_recv_wr *recv_wr,
3251 struct ib_recv_wr **bad_recv_wr) 3289 const struct ib_recv_wr **bad_recv_wr)
3252{ 3290{
3253 return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr); 3291 const struct ib_recv_wr *dummy;
3292
3293 return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr ? : &dummy);
3254} 3294}
3255 3295
3256/** 3296/**
@@ -3348,10 +3388,12 @@ int ib_close_qp(struct ib_qp *qp);
3348 * earlier work requests in the list. 3388 * earlier work requests in the list.
3349 */ 3389 */
3350static inline int ib_post_send(struct ib_qp *qp, 3390static inline int ib_post_send(struct ib_qp *qp,
3351 struct ib_send_wr *send_wr, 3391 const struct ib_send_wr *send_wr,
3352 struct ib_send_wr **bad_send_wr) 3392 const struct ib_send_wr **bad_send_wr)
3353{ 3393{
3354 return qp->device->post_send(qp, send_wr, bad_send_wr); 3394 const struct ib_send_wr *dummy;
3395
3396 return qp->device->post_send(qp, send_wr, bad_send_wr ? : &dummy);
3355} 3397}
3356 3398
3357/** 3399/**
@@ -3363,10 +3405,12 @@ static inline int ib_post_send(struct ib_qp *qp,
3363 * the work request that failed to be posted on the QP. 3405 * the work request that failed to be posted on the QP.
3364 */ 3406 */
3365static inline int ib_post_recv(struct ib_qp *qp, 3407static inline int ib_post_recv(struct ib_qp *qp,
3366 struct ib_recv_wr *recv_wr, 3408 const struct ib_recv_wr *recv_wr,
3367 struct ib_recv_wr **bad_recv_wr) 3409 const struct ib_recv_wr **bad_recv_wr)
3368{ 3410{
3369 return qp->device->post_recv(qp, recv_wr, bad_recv_wr); 3411 const struct ib_recv_wr *dummy;
3412
3413 return qp->device->post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
3370} 3414}
3371 3415
3372struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, 3416struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
@@ -3801,10 +3845,6 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller);
3801 */ 3845 */
3802int ib_dealloc_xrcd(struct ib_xrcd *xrcd); 3846int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
3803 3847
3804struct ib_flow *ib_create_flow(struct ib_qp *qp,
3805 struct ib_flow_attr *flow_attr, int domain);
3806int ib_destroy_flow(struct ib_flow *flow_id);
3807
3808static inline int ib_check_mr_access(int flags) 3848static inline int ib_check_mr_access(int flags)
3809{ 3849{
3810 /* 3850 /*
@@ -4033,8 +4073,19 @@ static inline void rdma_ah_set_grh(struct rdma_ah_attr *attr,
4033 grh->sgid_index = sgid_index; 4073 grh->sgid_index = sgid_index;
4034 grh->hop_limit = hop_limit; 4074 grh->hop_limit = hop_limit;
4035 grh->traffic_class = traffic_class; 4075 grh->traffic_class = traffic_class;
4076 grh->sgid_attr = NULL;
4036} 4077}
4037 4078
4079void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr);
4080void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
4081 u32 flow_label, u8 hop_limit, u8 traffic_class,
4082 const struct ib_gid_attr *sgid_attr);
4083void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
4084 const struct rdma_ah_attr *src);
4085void rdma_replace_ah_attr(struct rdma_ah_attr *old,
4086 const struct rdma_ah_attr *new);
4087void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src);
4088
4038/** 4089/**
4039 * rdma_ah_find_type - Return address handle type. 4090 * rdma_ah_find_type - Return address handle type.
4040 * 4091 *
@@ -4102,6 +4153,20 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
4102 4153
4103} 4154}
4104 4155
4156static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
4157 struct ib_qp *qp, struct ib_device *device)
4158{
4159 uobj->object = ibflow;
4160 ibflow->uobject = uobj;
4161
4162 if (qp) {
4163 atomic_inc(&qp->usecnt);
4164 ibflow->qp = qp;
4165 }
4166
4167 ibflow->device = device;
4168}
4169
4105/** 4170/**
4106 * rdma_roce_rescan_device - Rescan all of the network devices in the system 4171 * rdma_roce_rescan_device - Rescan all of the network devices in the system
4107 * and add their gids, as needed, to the relevant RoCE devices. 4172 * and add their gids, as needed, to the relevant RoCE devices.
@@ -4110,4 +4175,8 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
4110 */ 4175 */
4111void rdma_roce_rescan_device(struct ib_device *ibdev); 4176void rdma_roce_rescan_device(struct ib_device *ibdev);
4112 4177
4178struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile);
4179
4180int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
4181 struct uverbs_attr_bundle *attrs);
4113#endif /* IB_VERBS_H */ 4182#endif /* IB_VERBS_H */
diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h
index 2bbb7a67e643..66d4393d339c 100644
--- a/include/rdma/opa_addr.h
+++ b/include/rdma/opa_addr.h
@@ -120,7 +120,7 @@ static inline bool rdma_is_valid_unicast_lid(struct rdma_ah_attr *attr)
120 if (attr->type == RDMA_AH_ATTR_TYPE_IB) { 120 if (attr->type == RDMA_AH_ATTR_TYPE_IB) {
121 if (!rdma_ah_get_dlid(attr) || 121 if (!rdma_ah_get_dlid(attr) ||
122 rdma_ah_get_dlid(attr) >= 122 rdma_ah_get_dlid(attr) >=
123 be32_to_cpu(IB_MULTICAST_LID_BASE)) 123 be16_to_cpu(IB_MULTICAST_LID_BASE))
124 return false; 124 return false;
125 } else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) { 125 } else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) {
126 if (!rdma_ah_get_dlid(attr) || 126 if (!rdma_ah_get_dlid(attr) ||
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index c5c1435c129a..5d71a7f51a9f 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -192,7 +192,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr);
192 * @timeout_ms: Time to wait for resolution to complete. 192 * @timeout_ms: Time to wait for resolution to complete.
193 */ 193 */
194int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 194int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
195 struct sockaddr *dst_addr, int timeout_ms); 195 const struct sockaddr *dst_addr, int timeout_ms);
196 196
197/** 197/**
198 * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier 198 * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 1145a4c154b2..927f6d5b6d0f 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -2,7 +2,7 @@
2#define DEF_RDMAVT_INCQP_H 2#define DEF_RDMAVT_INCQP_H
3 3
4/* 4/*
5 * Copyright(c) 2016, 2017 Intel Corporation. 5 * Copyright(c) 2016 - 2018 Intel Corporation.
6 * 6 *
7 * This file is provided under a dual BSD/GPLv2 license. When using or 7 * This file is provided under a dual BSD/GPLv2 license. When using or
8 * redistributing this file, you may do so under either license. 8 * redistributing this file, you may do so under either license.
@@ -91,6 +91,7 @@
91 * RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests 91 * RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests
92 * RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK 92 * RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK
93 * RVT_S_ECN - a BECN was queued to the send engine 93 * RVT_S_ECN - a BECN was queued to the send engine
94 * RVT_S_MAX_BIT_MASK - The max bit that can be used by rdmavt
94 */ 95 */
95#define RVT_S_SIGNAL_REQ_WR 0x0001 96#define RVT_S_SIGNAL_REQ_WR 0x0001
96#define RVT_S_BUSY 0x0002 97#define RVT_S_BUSY 0x0002
@@ -103,23 +104,26 @@
103#define RVT_S_WAIT_SSN_CREDIT 0x0100 104#define RVT_S_WAIT_SSN_CREDIT 0x0100
104#define RVT_S_WAIT_DMA 0x0200 105#define RVT_S_WAIT_DMA 0x0200
105#define RVT_S_WAIT_PIO 0x0400 106#define RVT_S_WAIT_PIO 0x0400
106#define RVT_S_WAIT_PIO_DRAIN 0x0800 107#define RVT_S_WAIT_TX 0x0800
107#define RVT_S_WAIT_TX 0x1000 108#define RVT_S_WAIT_DMA_DESC 0x1000
108#define RVT_S_WAIT_DMA_DESC 0x2000 109#define RVT_S_WAIT_KMEM 0x2000
109#define RVT_S_WAIT_KMEM 0x4000 110#define RVT_S_WAIT_PSN 0x4000
110#define RVT_S_WAIT_PSN 0x8000 111#define RVT_S_WAIT_ACK 0x8000
111#define RVT_S_WAIT_ACK 0x10000 112#define RVT_S_SEND_ONE 0x10000
112#define RVT_S_SEND_ONE 0x20000 113#define RVT_S_UNLIMITED_CREDIT 0x20000
113#define RVT_S_UNLIMITED_CREDIT 0x40000 114#define RVT_S_ECN 0x40000
114#define RVT_S_AHG_VALID 0x80000 115#define RVT_S_MAX_BIT_MASK 0x800000
115#define RVT_S_AHG_CLEAR 0x100000 116
116#define RVT_S_ECN 0x200000 117/*
118 * Drivers should use s_flags starting with bit 31 down to the bit next to
119 * RVT_S_MAX_BIT_MASK
120 */
117 121
118/* 122/*
119 * Wait flags that would prevent any packet type from being sent. 123 * Wait flags that would prevent any packet type from being sent.
120 */ 124 */
121#define RVT_S_ANY_WAIT_IO \ 125#define RVT_S_ANY_WAIT_IO \
122 (RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN | RVT_S_WAIT_TX | \ 126 (RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \
123 RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM) 127 RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM)
124 128
125/* 129/*
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index bd6bba3a6e04..9e997c3c2f04 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -61,103 +61,195 @@ enum uverbs_obj_access {
61 UVERBS_ACCESS_DESTROY 61 UVERBS_ACCESS_DESTROY
62}; 62};
63 63
64enum {
65 UVERBS_ATTR_SPEC_F_MANDATORY = 1U << 0,
66 /* Support extending attributes by length, validate all unknown size == zero */
67 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO = 1U << 1,
68};
69
70/* Specification of a single attribute inside the ioctl message */ 64/* Specification of a single attribute inside the ioctl message */
65/* good size 16 */
71struct uverbs_attr_spec { 66struct uverbs_attr_spec {
67 u8 type;
68
69 /*
70 * Support extending attributes by length. Allow the user to provide
71 * more bytes than ptr.len, but check that everything after is zero'd
72 * by the user.
73 */
74 u8 zero_trailing:1;
75 /*
76 * Valid only for PTR_IN. Allocate and copy the data inside
77 * the parser
78 */
79 u8 alloc_and_copy:1;
80 u8 mandatory:1;
81
72 union { 82 union {
73 /* Header shared by all following union members - to reduce space. */
74 struct {
75 enum uverbs_attr_type type;
76 /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
77 u8 flags;
78 };
79 struct { 83 struct {
80 enum uverbs_attr_type type;
81 /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
82 u8 flags;
83 /* Current known size to kernel */ 84 /* Current known size to kernel */
84 u16 len; 85 u16 len;
85 /* User isn't allowed to provide something < min_len */ 86 /* User isn't allowed to provide something < min_len */
86 u16 min_len; 87 u16 min_len;
87 } ptr; 88 } ptr;
89
88 struct { 90 struct {
89 enum uverbs_attr_type type;
90 /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
91 u8 flags;
92 /* 91 /*
93 * higher bits mean the namespace and lower bits mean 92 * higher bits mean the namespace and lower bits mean
94 * the type id within the namespace. 93 * the type id within the namespace.
95 */ 94 */
96 u16 obj_type; 95 u16 obj_type;
97 u8 access; 96 u8 access;
98 } obj; 97 } obj;
98
99 struct {
100 u8 num_elems;
101 } enum_def;
102 } u;
103
104 /* This weird split of the enum lets us remove some padding */
105 union {
99 struct { 106 struct {
100 enum uverbs_attr_type type;
101 /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
102 u8 flags;
103 u8 num_elems;
104 /* 107 /*
105 * The enum attribute can select one of the attributes 108 * The enum attribute can select one of the attributes
106 * contained in the ids array. Currently only PTR_IN 109 * contained in the ids array. Currently only PTR_IN
107 * attributes are supported in the ids array. 110 * attributes are supported in the ids array.
108 */ 111 */
109 const struct uverbs_attr_spec *ids; 112 const struct uverbs_attr_spec *ids;
110 } enum_def; 113 } enum_def;
111 }; 114 } u2;
112}; 115};
113 116
114struct uverbs_attr_spec_hash { 117/*
115 size_t num_attrs; 118 * Information about the API is loaded into a radix tree. For IOCTL we start
116 unsigned long *mandatory_attrs_bitmask; 119 * with a tuple of:
117 struct uverbs_attr_spec attrs[0]; 120 * object_id, attr_id, method_id
121 *
122 * Which is a 48 bit value, with most of the bits guaranteed to be zero. Based
123 * on the current kernel support this is compressed into 16 bit key for the
124 * radix tree. Since this compression is entirely internal to the kernel the
125 * below limits can be revised if the kernel gains additional data.
126 *
127 * With 64 leafs per node this is a 3 level radix tree.
128 *
129 * The tree encodes multiple types, and uses a scheme where OBJ_ID,0,0 returns
130 * the object slot, and OBJ_ID,METH_ID,0 and returns the method slot.
131 */
132enum uapi_radix_data {
133 UVERBS_API_NS_FLAG = 1U << UVERBS_ID_NS_SHIFT,
134
135 UVERBS_API_ATTR_KEY_BITS = 6,
136 UVERBS_API_ATTR_KEY_MASK = GENMASK(UVERBS_API_ATTR_KEY_BITS - 1, 0),
137 UVERBS_API_ATTR_BKEY_LEN = (1 << UVERBS_API_ATTR_KEY_BITS) - 1,
138
139 UVERBS_API_METHOD_KEY_BITS = 5,
140 UVERBS_API_METHOD_KEY_SHIFT = UVERBS_API_ATTR_KEY_BITS,
141 UVERBS_API_METHOD_KEY_NUM_CORE = 24,
142 UVERBS_API_METHOD_KEY_NUM_DRIVER = (1 << UVERBS_API_METHOD_KEY_BITS) -
143 UVERBS_API_METHOD_KEY_NUM_CORE,
144 UVERBS_API_METHOD_KEY_MASK = GENMASK(
145 UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT - 1,
146 UVERBS_API_METHOD_KEY_SHIFT),
147
148 UVERBS_API_OBJ_KEY_BITS = 5,
149 UVERBS_API_OBJ_KEY_SHIFT =
150 UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT,
151 UVERBS_API_OBJ_KEY_NUM_CORE = 24,
152 UVERBS_API_OBJ_KEY_NUM_DRIVER =
153 (1 << UVERBS_API_OBJ_KEY_BITS) - UVERBS_API_OBJ_KEY_NUM_CORE,
154 UVERBS_API_OBJ_KEY_MASK = GENMASK(31, UVERBS_API_OBJ_KEY_SHIFT),
155
156 /* This id guaranteed to not exist in the radix tree */
157 UVERBS_API_KEY_ERR = 0xFFFFFFFF,
118}; 158};
119 159
120struct uverbs_attr_bundle; 160static inline __attribute_const__ u32 uapi_key_obj(u32 id)
121struct ib_uverbs_file; 161{
162 if (id & UVERBS_API_NS_FLAG) {
163 id &= ~UVERBS_API_NS_FLAG;
164 if (id >= UVERBS_API_OBJ_KEY_NUM_DRIVER)
165 return UVERBS_API_KEY_ERR;
166 id = id + UVERBS_API_OBJ_KEY_NUM_CORE;
167 } else {
168 if (id >= UVERBS_API_OBJ_KEY_NUM_CORE)
169 return UVERBS_API_KEY_ERR;
170 }
122 171
123enum { 172 return id << UVERBS_API_OBJ_KEY_SHIFT;
124 /* 173}
125 * Action marked with this flag creates a context (or root for all
126 * objects).
127 */
128 UVERBS_ACTION_FLAG_CREATE_ROOT = 1U << 0,
129};
130 174
131struct uverbs_method_spec { 175static inline __attribute_const__ bool uapi_key_is_object(u32 key)
132 /* Combination of bits from enum UVERBS_ACTION_FLAG_XXXX */ 176{
133 u32 flags; 177 return (key & ~UVERBS_API_OBJ_KEY_MASK) == 0;
134 size_t num_buckets; 178}
135 size_t num_child_attrs;
136 int (*handler)(struct ib_device *ib_dev, struct ib_uverbs_file *ufile,
137 struct uverbs_attr_bundle *ctx);
138 struct uverbs_attr_spec_hash *attr_buckets[0];
139};
140 179
141struct uverbs_method_spec_hash { 180static inline __attribute_const__ u32 uapi_key_ioctl_method(u32 id)
142 size_t num_methods; 181{
143 struct uverbs_method_spec *methods[0]; 182 if (id & UVERBS_API_NS_FLAG) {
144}; 183 id &= ~UVERBS_API_NS_FLAG;
184 if (id >= UVERBS_API_METHOD_KEY_NUM_DRIVER)
185 return UVERBS_API_KEY_ERR;
186 id = id + UVERBS_API_METHOD_KEY_NUM_CORE;
187 } else {
188 id++;
189 if (id >= UVERBS_API_METHOD_KEY_NUM_CORE)
190 return UVERBS_API_KEY_ERR;
191 }
145 192
146struct uverbs_object_spec { 193 return id << UVERBS_API_METHOD_KEY_SHIFT;
147 const struct uverbs_obj_type *type_attrs; 194}
148 size_t num_buckets;
149 struct uverbs_method_spec_hash *method_buckets[0];
150};
151 195
152struct uverbs_object_spec_hash { 196static inline __attribute_const__ u32 uapi_key_attr_to_method(u32 attr_key)
153 size_t num_objects; 197{
154 struct uverbs_object_spec *objects[0]; 198 return attr_key &
155}; 199 (UVERBS_API_OBJ_KEY_MASK | UVERBS_API_METHOD_KEY_MASK);
200}
156 201
157struct uverbs_root_spec { 202static inline __attribute_const__ bool uapi_key_is_ioctl_method(u32 key)
158 size_t num_buckets; 203{
159 struct uverbs_object_spec_hash *object_buckets[0]; 204 return (key & UVERBS_API_METHOD_KEY_MASK) != 0 &&
160}; 205 (key & UVERBS_API_ATTR_KEY_MASK) == 0;
206}
207
208static inline __attribute_const__ u32 uapi_key_attrs_start(u32 ioctl_method_key)
209{
210 /* 0 is the method slot itself */
211 return ioctl_method_key + 1;
212}
213
214static inline __attribute_const__ u32 uapi_key_attr(u32 id)
215{
216 /*
217 * The attr is designed to fit in the typical single radix tree node
218 * of 64 entries. Since allmost all methods have driver attributes we
219 * organize things so that the driver and core attributes interleave to
220 * reduce the length of the attributes array in typical cases.
221 */
222 if (id & UVERBS_API_NS_FLAG) {
223 id &= ~UVERBS_API_NS_FLAG;
224 id++;
225 if (id >= 1 << (UVERBS_API_ATTR_KEY_BITS - 1))
226 return UVERBS_API_KEY_ERR;
227 id = (id << 1) | 0;
228 } else {
229 if (id >= 1 << (UVERBS_API_ATTR_KEY_BITS - 1))
230 return UVERBS_API_KEY_ERR;
231 id = (id << 1) | 1;
232 }
233
234 return id;
235}
236
237static inline __attribute_const__ bool uapi_key_is_attr(u32 key)
238{
239 return (key & UVERBS_API_METHOD_KEY_MASK) != 0 &&
240 (key & UVERBS_API_ATTR_KEY_MASK) != 0;
241}
242
243/*
244 * This returns a value in the range [0 to UVERBS_API_ATTR_BKEY_LEN),
245 * basically it undoes the reservation of 0 in the ID numbering. attr_key
246 * must already be masked with UVERBS_API_ATTR_KEY_MASK, or be the output of
247 * uapi_key_attr().
248 */
249static inline __attribute_const__ u32 uapi_bkey_attr(u32 attr_key)
250{
251 return attr_key - 1;
252}
161 253
162/* 254/*
163 * ======================================= 255 * =======================================
@@ -176,7 +268,7 @@ struct uverbs_method_def {
176 u32 flags; 268 u32 flags;
177 size_t num_attrs; 269 size_t num_attrs;
178 const struct uverbs_attr_def * const (*attrs)[]; 270 const struct uverbs_attr_def * const (*attrs)[];
179 int (*handler)(struct ib_device *ib_dev, struct ib_uverbs_file *ufile, 271 int (*handler)(struct ib_uverbs_file *ufile,
180 struct uverbs_attr_bundle *ctx); 272 struct uverbs_attr_bundle *ctx);
181}; 273};
182 274
@@ -192,196 +284,171 @@ struct uverbs_object_tree_def {
192 const struct uverbs_object_def * const (*objects)[]; 284 const struct uverbs_object_def * const (*objects)[];
193}; 285};
194 286
195#define UA_FLAGS(_flags) .flags = _flags 287/*
196#define __UVERBS_ATTR0(_id, _type, _fld, _attr, ...) \ 288 * =======================================
197 ((const struct uverbs_attr_def) \ 289 * Attribute Specifications
198 {.id = _id, .attr = {{._fld = {.type = _type, _attr, .flags = 0, } }, } }) 290 * =======================================
199#define __UVERBS_ATTR1(_id, _type, _fld, _attr, _extra1, ...) \ 291 */
200 ((const struct uverbs_attr_def) \
201 {.id = _id, .attr = {{._fld = {.type = _type, _attr, _extra1 } },} })
202#define __UVERBS_ATTR2(_id, _type, _fld, _attr, _extra1, _extra2) \
203 ((const struct uverbs_attr_def) \
204 {.id = _id, .attr = {{._fld = {.type = _type, _attr, _extra1, _extra2 } },} })
205#define __UVERBS_ATTR(_id, _type, _fld, _attr, _extra1, _extra2, _n, ...) \
206 __UVERBS_ATTR##_n(_id, _type, _fld, _attr, _extra1, _extra2)
207 292
208#define UVERBS_ATTR_TYPE(_type) \
209 .min_len = sizeof(_type), .len = sizeof(_type)
210#define UVERBS_ATTR_STRUCT(_type, _last) \
211 .min_len = ((uintptr_t)(&((_type *)0)->_last + 1)), .len = sizeof(_type)
212#define UVERBS_ATTR_SIZE(_min_len, _len) \ 293#define UVERBS_ATTR_SIZE(_min_len, _len) \
213 .min_len = _min_len, .len = _len 294 .u.ptr.min_len = _min_len, .u.ptr.len = _len
295
296#define UVERBS_ATTR_NO_DATA() UVERBS_ATTR_SIZE(0, 0)
214 297
215/* 298/*
216 * In new compiler, UVERBS_ATTR could be simplified by declaring it as 299 * Specifies a uapi structure that cannot be extended. The user must always
217 * [_id] = {.type = _type, .len = _len, ##__VA_ARGS__} 300 * supply the whole structure and nothing more. The structure must be declared
218 * But since we support older compilers too, we need the more complex code. 301 * in a header under include/uapi/rdma.
219 */ 302 */
220#define UVERBS_ATTR(_id, _type, _fld, _attr, ...) \ 303#define UVERBS_ATTR_TYPE(_type) \
221 __UVERBS_ATTR(_id, _type, _fld, _attr, ##__VA_ARGS__, 2, 1, 0) 304 .u.ptr.min_len = sizeof(_type), .u.ptr.len = sizeof(_type)
222#define UVERBS_ATTR_PTR_IN_SZ(_id, _len, ...) \ 305/*
223 UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_PTR_IN, ptr, _len, ##__VA_ARGS__) 306 * Specifies a uapi structure where the user must provide at least up to
224/* If sizeof(_type) <= sizeof(u64), this will be inlined rather than a pointer */ 307 * member 'last'. Anything after last and up until the end of the structure
225#define UVERBS_ATTR_PTR_IN(_id, _type, ...) \ 308 * can be non-zero, anything longer than the end of the structure must be
226 UVERBS_ATTR_PTR_IN_SZ(_id, _type, ##__VA_ARGS__) 309 * zero. The structure must be declared in a header under include/uapi/rdma.
227#define UVERBS_ATTR_PTR_OUT_SZ(_id, _len, ...) \ 310 */
228 UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_PTR_OUT, ptr, _len, ##__VA_ARGS__) 311#define UVERBS_ATTR_STRUCT(_type, _last) \
229#define UVERBS_ATTR_PTR_OUT(_id, _type, ...) \ 312 .zero_trailing = 1, \
230 UVERBS_ATTR_PTR_OUT_SZ(_id, _type, ##__VA_ARGS__) 313 UVERBS_ATTR_SIZE(((uintptr_t)(&((_type *)0)->_last + 1)), \
231#define UVERBS_ATTR_ENUM_IN(_id, _enum_arr, ...) \ 314 sizeof(_type))
232 UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_ENUM_IN, enum_def, \ 315/*
233 .ids = (_enum_arr), \ 316 * Specifies at least min_len bytes must be passed in, but the amount can be
234 .num_elems = ARRAY_SIZE(_enum_arr), ##__VA_ARGS__) 317 * larger, up to the protocol maximum size. No check for zeroing is done.
318 */
319#define UVERBS_ATTR_MIN_SIZE(_min_len) UVERBS_ATTR_SIZE(_min_len, USHRT_MAX)
320
321/* Must be used in the '...' of any UVERBS_ATTR */
322#define UA_ALLOC_AND_COPY .alloc_and_copy = 1
323#define UA_MANDATORY .mandatory = 1
324#define UA_OPTIONAL .mandatory = 0
325
326#define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \
327 (&(const struct uverbs_attr_def){ \
328 .id = _attr_id, \
329 .attr = { .type = UVERBS_ATTR_TYPE_IDR, \
330 .u.obj.obj_type = _idr_type, \
331 .u.obj.access = _access, \
332 __VA_ARGS__ } })
333
334#define UVERBS_ATTR_FD(_attr_id, _fd_type, _access, ...) \
335 (&(const struct uverbs_attr_def){ \
336 .id = (_attr_id) + \
337 BUILD_BUG_ON_ZERO((_access) != UVERBS_ACCESS_NEW && \
338 (_access) != UVERBS_ACCESS_READ), \
339 .attr = { .type = UVERBS_ATTR_TYPE_FD, \
340 .u.obj.obj_type = _fd_type, \
341 .u.obj.access = _access, \
342 __VA_ARGS__ } })
343
344#define UVERBS_ATTR_PTR_IN(_attr_id, _type, ...) \
345 (&(const struct uverbs_attr_def){ \
346 .id = _attr_id, \
347 .attr = { .type = UVERBS_ATTR_TYPE_PTR_IN, \
348 _type, \
349 __VA_ARGS__ } })
350
351#define UVERBS_ATTR_PTR_OUT(_attr_id, _type, ...) \
352 (&(const struct uverbs_attr_def){ \
353 .id = _attr_id, \
354 .attr = { .type = UVERBS_ATTR_TYPE_PTR_OUT, \
355 _type, \
356 __VA_ARGS__ } })
357
358/* _enum_arry should be a 'static const union uverbs_attr_spec[]' */
359#define UVERBS_ATTR_ENUM_IN(_attr_id, _enum_arr, ...) \
360 (&(const struct uverbs_attr_def){ \
361 .id = _attr_id, \
362 .attr = { .type = UVERBS_ATTR_TYPE_ENUM_IN, \
363 .u2.enum_def.ids = _enum_arr, \
364 .u.enum_def.num_elems = ARRAY_SIZE(_enum_arr), \
365 __VA_ARGS__ }, \
366 })
235 367
236/* 368/*
237 * In new compiler, UVERBS_ATTR_IDR (and FD) could be simplified by declaring 369 * An input value that is a bitwise combination of values of _enum_type.
238 * it as 370 * This permits the flag value to be passed as either a u32 or u64, it must
239 * {.id = _id, \ 371 * be retrieved via uverbs_get_flag().
240 * .attr {.type = __obj_class, \
241 * .obj = {.obj_type = _idr_type, \
242 * .access = _access \
243 * }, ##__VA_ARGS__ } }
244 * But since we support older compilers too, we need the more complex code.
245 */ 372 */
246#define ___UVERBS_ATTR_OBJ0(_id, _obj_class, _obj_type, _access, ...)\ 373#define UVERBS_ATTR_FLAGS_IN(_attr_id, _enum_type, ...) \
247 ((const struct uverbs_attr_def) \ 374 UVERBS_ATTR_PTR_IN( \
248 {.id = _id, \ 375 _attr_id, \
249 .attr = { {.obj = {.type = _obj_class, .obj_type = _obj_type, \ 376 UVERBS_ATTR_SIZE(sizeof(u32) + BUILD_BUG_ON_ZERO( \
250 .access = _access, .flags = 0 } }, } }) 377 !sizeof(_enum_type *)), \
251#define ___UVERBS_ATTR_OBJ1(_id, _obj_class, _obj_type, _access, _flags)\ 378 sizeof(u64)), \
252 ((const struct uverbs_attr_def) \ 379 __VA_ARGS__)
253 {.id = _id, \ 380
254 .attr = { {.obj = {.type = _obj_class, .obj_type = _obj_type, \ 381/*
255 .access = _access, _flags} }, } }) 382 * This spec is used in order to pass information to the hardware driver in a
256#define ___UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, _flags, \ 383 * legacy way. Every verb that could get driver specific data should get this
257 _n, ...) \ 384 * spec.
258 ___UVERBS_ATTR_OBJ##_n(_id, _obj_class, _obj_type, _access, _flags) 385 */
259#define __UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, ...) \ 386#define UVERBS_ATTR_UHW() \
260 ___UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, \ 387 UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, \
261 ##__VA_ARGS__, 1, 0) 388 UVERBS_ATTR_MIN_SIZE(0), \
262#define UVERBS_ATTR_IDR(_id, _idr_type, _access, ...) \ 389 UA_OPTIONAL), \
263 __UVERBS_ATTR_OBJ(_id, UVERBS_ATTR_TYPE_IDR, _idr_type, _access,\ 390 UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, \
264 ##__VA_ARGS__) 391 UVERBS_ATTR_MIN_SIZE(0), \
265#define UVERBS_ATTR_FD(_id, _fd_type, _access, ...) \ 392 UA_OPTIONAL)
266 __UVERBS_ATTR_OBJ(_id, UVERBS_ATTR_TYPE_FD, _fd_type, \ 393
267 (_access) + BUILD_BUG_ON_ZERO( \ 394/*
268 (_access) != UVERBS_ACCESS_NEW && \ 395 * =======================================
269 (_access) != UVERBS_ACCESS_READ), \ 396 * Declaration helpers
270 ##__VA_ARGS__) 397 * =======================================
271#define DECLARE_UVERBS_ATTR_SPEC(_name, ...) \ 398 */
272 const struct uverbs_attr_def _name = __VA_ARGS__ 399
273 400#define DECLARE_UVERBS_OBJECT_TREE(_name, ...) \
274#define DECLARE_UVERBS_ENUM(_name, ...) \ 401 static const struct uverbs_object_def *const _name##_ptr[] = { \
275 const struct uverbs_enum_spec _name = { \ 402 __VA_ARGS__, \
276 .len = ARRAY_SIZE(((struct uverbs_attr_spec[]){__VA_ARGS__})),\ 403 }; \
277 .ids = {__VA_ARGS__}, \ 404 static const struct uverbs_object_tree_def _name = { \
405 .num_objects = ARRAY_SIZE(_name##_ptr), \
406 .objects = &_name##_ptr, \
278 } 407 }
279#define _UVERBS_METHOD_ATTRS_SZ(...) \
280 (sizeof((const struct uverbs_attr_def * const []){__VA_ARGS__}) /\
281 sizeof(const struct uverbs_attr_def *))
282#define _UVERBS_METHOD(_id, _handler, _flags, ...) \
283 ((const struct uverbs_method_def) { \
284 .id = _id, \
285 .flags = _flags, \
286 .handler = _handler, \
287 .num_attrs = _UVERBS_METHOD_ATTRS_SZ(__VA_ARGS__), \
288 .attrs = &(const struct uverbs_attr_def * const []){__VA_ARGS__} })
289#define DECLARE_UVERBS_METHOD(_name, _id, _handler, ...) \
290 const struct uverbs_method_def _name = \
291 _UVERBS_METHOD(_id, _handler, 0, ##__VA_ARGS__)
292#define DECLARE_UVERBS_CTX_METHOD(_name, _id, _handler, _flags, ...) \
293 const struct uverbs_method_def _name = \
294 _UVERBS_METHOD(_id, _handler, \
295 UVERBS_ACTION_FLAG_CREATE_ROOT, \
296 ##__VA_ARGS__)
297#define _UVERBS_OBJECT_METHODS_SZ(...) \
298 (sizeof((const struct uverbs_method_def * const []){__VA_ARGS__}) / \
299 sizeof(const struct uverbs_method_def *))
300#define _UVERBS_OBJECT(_id, _type_attrs, ...) \
301 ((const struct uverbs_object_def) { \
302 .id = _id, \
303 .type_attrs = _type_attrs, \
304 .num_methods = _UVERBS_OBJECT_METHODS_SZ(__VA_ARGS__), \
305 .methods = &(const struct uverbs_method_def * const []){__VA_ARGS__} })
306#define DECLARE_UVERBS_OBJECT(_name, _id, _type_attrs, ...) \
307 const struct uverbs_object_def _name = \
308 _UVERBS_OBJECT(_id, _type_attrs, ##__VA_ARGS__)
309#define _UVERBS_TREE_OBJECTS_SZ(...) \
310 (sizeof((const struct uverbs_object_def * const []){__VA_ARGS__}) / \
311 sizeof(const struct uverbs_object_def *))
312#define _UVERBS_OBJECT_TREE(...) \
313 ((const struct uverbs_object_tree_def) { \
314 .num_objects = _UVERBS_TREE_OBJECTS_SZ(__VA_ARGS__), \
315 .objects = &(const struct uverbs_object_def * const []){__VA_ARGS__} })
316#define DECLARE_UVERBS_OBJECT_TREE(_name, ...) \
317 const struct uverbs_object_tree_def _name = \
318 _UVERBS_OBJECT_TREE(__VA_ARGS__)
319 408
320/* ================================================= 409/* =================================================
321 * Parsing infrastructure 410 * Parsing infrastructure
322 * ================================================= 411 * =================================================
323 */ 412 */
324 413
414
325struct uverbs_ptr_attr { 415struct uverbs_ptr_attr {
326 u64 data; 416 /*
417 * If UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY is set then the 'ptr' is
418 * used.
419 */
420 union {
421 void *ptr;
422 u64 data;
423 };
327 u16 len; 424 u16 len;
328 /* Combination of bits from enum UVERBS_ATTR_F_XXXX */ 425 u16 uattr_idx;
329 u16 flags;
330 u8 enum_id; 426 u8 enum_id;
331}; 427};
332 428
333struct uverbs_obj_attr { 429struct uverbs_obj_attr {
334 /* pointer to the kernel descriptor -> type, access, etc */
335 const struct uverbs_obj_type *type;
336 struct ib_uobject *uobject; 430 struct ib_uobject *uobject;
337 /* fd or id in idr of this object */ 431 const struct uverbs_api_attr *attr_elm;
338 int id;
339}; 432};
340 433
341struct uverbs_attr { 434struct uverbs_attr {
342 /*
343 * pointer to the user-space given attribute, in order to write the
344 * new uobject's id or update flags.
345 */
346 struct ib_uverbs_attr __user *uattr;
347 union { 435 union {
348 struct uverbs_ptr_attr ptr_attr; 436 struct uverbs_ptr_attr ptr_attr;
349 struct uverbs_obj_attr obj_attr; 437 struct uverbs_obj_attr obj_attr;
350 }; 438 };
351}; 439};
352 440
353struct uverbs_attr_bundle_hash {
354 /* if bit i is set, it means attrs[i] contains valid information */
355 unsigned long *valid_bitmap;
356 size_t num_attrs;
357 /*
358 * arrays of attributes, each element corresponds to the specification
359 * of the attribute in the same index.
360 */
361 struct uverbs_attr *attrs;
362};
363
364struct uverbs_attr_bundle { 441struct uverbs_attr_bundle {
365 size_t num_buckets; 442 struct ib_uverbs_file *ufile;
366 struct uverbs_attr_bundle_hash hash[]; 443 DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN);
444 struct uverbs_attr attrs[];
367}; 445};
368 446
369static inline bool uverbs_attr_is_valid_in_hash(const struct uverbs_attr_bundle_hash *attrs_hash,
370 unsigned int idx)
371{
372 return test_bit(idx, attrs_hash->valid_bitmap);
373}
374
375static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_bundle, 447static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_bundle,
376 unsigned int idx) 448 unsigned int idx)
377{ 449{
378 u16 idx_bucket = idx >> UVERBS_ID_NS_SHIFT; 450 return test_bit(uapi_bkey_attr(uapi_key_attr(idx)),
379 451 attrs_bundle->attr_present);
380 if (attrs_bundle->num_buckets <= idx_bucket)
381 return false;
382
383 return uverbs_attr_is_valid_in_hash(&attrs_bundle->hash[idx_bucket],
384 idx & ~UVERBS_ID_NS_MASK);
385} 452}
386 453
387#define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT) 454#define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT)
@@ -389,12 +456,10 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b
389static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle, 456static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle,
390 u16 idx) 457 u16 idx)
391{ 458{
392 u16 idx_bucket = idx >> UVERBS_ID_NS_SHIFT;
393
394 if (!uverbs_attr_is_valid(attrs_bundle, idx)) 459 if (!uverbs_attr_is_valid(attrs_bundle, idx))
395 return ERR_PTR(-ENOENT); 460 return ERR_PTR(-ENOENT);
396 461
397 return &attrs_bundle->hash[idx_bucket].attrs[idx & ~UVERBS_ID_NS_MASK]; 462 return &attrs_bundle->attrs[uapi_bkey_attr(uapi_key_attr(idx))];
398} 463}
399 464
400static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs_bundle, 465static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs_bundle,
@@ -431,25 +496,15 @@ static inline struct ib_uobject *uverbs_attr_get_uobject(const struct uverbs_att
431 return attr->obj_attr.uobject; 496 return attr->obj_attr.uobject;
432} 497}
433 498
434static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, 499static inline int
435 size_t idx, const void *from, size_t size) 500uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx)
436{ 501{
437 const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); 502 const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
438 u16 flags;
439 size_t min_size;
440 503
441 if (IS_ERR(attr)) 504 if (IS_ERR(attr))
442 return PTR_ERR(attr); 505 return PTR_ERR(attr);
443 506
444 min_size = min_t(size_t, attr->ptr_attr.len, size); 507 return attr->ptr_attr.len;
445 if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size))
446 return -EFAULT;
447
448 flags = attr->ptr_attr.flags | UVERBS_ATTR_F_VALID_OUTPUT;
449 if (put_user(flags, &attr->uattr->flags))
450 return -EFAULT;
451
452 return 0;
453} 508}
454 509
455static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr) 510static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr)
@@ -457,6 +512,18 @@ static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr)
457 return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data); 512 return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data);
458} 513}
459 514
515static inline void *uverbs_attr_get_alloced_ptr(
516 const struct uverbs_attr_bundle *attrs_bundle, u16 idx)
517{
518 const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
519
520 if (IS_ERR(attr))
521 return (void *)attr;
522
523 return uverbs_attr_ptr_is_inline(attr) ? (void *)&attr->ptr_attr.data :
524 attr->ptr_attr.ptr;
525}
526
460static inline int _uverbs_copy_from(void *to, 527static inline int _uverbs_copy_from(void *to,
461 const struct uverbs_attr_bundle *attrs_bundle, 528 const struct uverbs_attr_bundle *attrs_bundle,
462 size_t idx, 529 size_t idx,
@@ -515,54 +582,54 @@ static inline int _uverbs_copy_from_or_zero(void *to,
515#define uverbs_copy_from_or_zero(to, attrs_bundle, idx) \ 582#define uverbs_copy_from_or_zero(to, attrs_bundle, idx) \
516 _uverbs_copy_from_or_zero(to, attrs_bundle, idx, sizeof(*to)) 583 _uverbs_copy_from_or_zero(to, attrs_bundle, idx, sizeof(*to))
517 584
518/* =================================================
519 * Definitions -> Specs infrastructure
520 * =================================================
521 */
522
523/*
524 * uverbs_alloc_spec_tree - Merges different common and driver specific feature
525 * into one parsing tree that every uverbs command will be parsed upon.
526 *
527 * @num_trees: Number of trees in the array @trees.
528 * @trees: Array of pointers to tree root definitions to merge. Each such tree
529 * possibly contains objects, methods and attributes definitions.
530 *
531 * Returns:
532 * uverbs_root_spec *: The root of the merged parsing tree.
533 * On error, we return an error code. Error is checked via IS_ERR.
534 *
535 * The following merges could take place:
536 * a. Two trees representing the same method with different handler
537 * -> We take the handler of the tree that its handler != NULL
538 * and its index in the trees array is greater. The incentive for that
539 * is that developers are expected to first merge common trees and then
540 * merge trees that gives specialized the behaviour.
541 * b. Two trees representing the same object with different
542 * type_attrs (struct uverbs_obj_type):
543 * -> We take the type_attrs of the tree that its type_attr != NULL
544 * and its index in the trees array is greater. This could be used
545 * in order to override the free function, allocation size, etc.
546 * c. Two trees representing the same method attribute (same id but possibly
547 * different attributes):
548 * -> ERROR (-ENOENT), we believe that's not the programmer's intent.
549 *
550 * An object without any methods is considered invalid and will abort the
551 * function with -ENOENT error.
552 */
553#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) 585#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
554struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees, 586int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
555 const struct uverbs_object_tree_def **trees); 587 size_t idx, u64 allowed_bits);
556void uverbs_free_spec_tree(struct uverbs_root_spec *root); 588int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
557#else 589 size_t idx, u64 allowed_bits);
558static inline struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees, 590int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, size_t idx,
559 const struct uverbs_object_tree_def **trees) 591 const void *from, size_t size);
592__malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size,
593 gfp_t flags);
594
595static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle,
596 size_t size)
560{ 597{
561 return NULL; 598 return _uverbs_alloc(bundle, size, GFP_KERNEL);
562} 599}
563 600
564static inline void uverbs_free_spec_tree(struct uverbs_root_spec *root) 601static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
602 size_t size)
603{
604 return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO);
605}
606#else
607static inline int
608uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
609 size_t idx, u64 allowed_bits)
610{
611 return -EINVAL;
612}
613static inline int
614uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
615 size_t idx, u64 allowed_bits)
616{
617 return -EINVAL;
618}
619static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
620 size_t idx, const void *from, size_t size)
621{
622 return -EINVAL;
623}
624static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle,
625 size_t size)
626{
627 return ERR_PTR(-EINVAL);
628}
629static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
630 size_t size)
565{ 631{
632 return ERR_PTR(-EINVAL);
566} 633}
567#endif 634#endif
568 635
diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h
index c5bb4ebdb0b0..b3b21733cc55 100644
--- a/include/rdma/uverbs_named_ioctl.h
+++ b/include/rdma/uverbs_named_ioctl.h
@@ -43,48 +43,89 @@
43#define _UVERBS_NAME(x, y) _UVERBS_PASTE(x, y) 43#define _UVERBS_NAME(x, y) _UVERBS_PASTE(x, y)
44#define UVERBS_METHOD(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id) 44#define UVERBS_METHOD(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id)
45#define UVERBS_HANDLER(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id) 45#define UVERBS_HANDLER(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id)
46#define UVERBS_OBJECT(id) _UVERBS_NAME(UVERBS_MOUDLE_NAME, _object_##id)
46 47
47#define DECLARE_UVERBS_NAMED_METHOD(id, ...) \ 48/* These are static so they do not need to be qualified */
48 DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, UVERBS_HANDLER(id), ##__VA_ARGS__) 49#define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id
50#define UVERBS_OBJECT_METHODS(object_id) _object_methods_##object_id
49 51
50#define DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(id, handler, ...) \ 52#define DECLARE_UVERBS_NAMED_METHOD(_method_id, ...) \
51 DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, handler, ##__VA_ARGS__) 53 static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \
54 _method_id)[] = { __VA_ARGS__ }; \
55 static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \
56 .id = _method_id, \
57 .handler = UVERBS_HANDLER(_method_id), \
58 .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \
59 .attrs = &UVERBS_METHOD_ATTRS(_method_id), \
60 }
52 61
53#define DECLARE_UVERBS_NAMED_METHOD_NO_OVERRIDE(id, handler, ...) \ 62/* Create a standard destroy method using the default handler. The handle_attr
54 DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, NULL, ##__VA_ARGS__) 63 * argument must be the attribute specifying the handle to destroy, the
55 64 * default handler does not support any other attributes.
56#define DECLARE_UVERBS_NAMED_OBJECT(id, ...) \ 65 */
57 DECLARE_UVERBS_OBJECT(UVERBS_OBJECT(id), id, ##__VA_ARGS__) 66#define DECLARE_UVERBS_NAMED_METHOD_DESTROY(_method_id, _handle_attr) \
67 static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \
68 _method_id)[] = { _handle_attr }; \
69 static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \
70 .id = _method_id, \
71 .handler = uverbs_destroy_def_handler, \
72 .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \
73 .attrs = &UVERBS_METHOD_ATTRS(_method_id), \
74 }
58 75
59#define _UVERBS_COMP_NAME(x, y, z) _UVERBS_NAME(_UVERBS_NAME(x, y), z) 76#define DECLARE_UVERBS_NAMED_OBJECT(_object_id, _type_attrs, ...) \
77 static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \
78 _object_id)[] = { __VA_ARGS__ }; \
79 const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \
80 .id = _object_id, \
81 .type_attrs = &_type_attrs, \
82 .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \
83 .methods = &UVERBS_OBJECT_METHODS(_object_id) \
84 }
60 85
61#define UVERBS_NO_OVERRIDE NULL 86/*
87 * Declare global methods. These still have a unique object_id because we
88 * identify all uapi methods with a (object,method) tuple. However, they have
89 * no type pointer.
90 */
91#define DECLARE_UVERBS_GLOBAL_METHODS(_object_id, ...) \
92 static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \
93 _object_id)[] = { __VA_ARGS__ }; \
94 const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \
95 .id = _object_id, \
96 .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \
97 .methods = &UVERBS_OBJECT_METHODS(_object_id) \
98 }
62 99
63/* This declares a parsing tree with one object and one method. This is usually 100/* Used by drivers to declare a complete parsing tree for new methods
64 * used for merging driver attributes to the common attributes. The driver has
65 * a chance to override the handler and type attrs of the original object.
66 * The __VA_ARGS__ just contains a list of attributes.
67 */ 101 */
68#define ADD_UVERBS_ATTRIBUTES(_name, _object, _method, _type_attrs, _handler, ...) \ 102#define ADD_UVERBS_METHODS(_name, _object_id, ...) \
69static DECLARE_UVERBS_METHOD(_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \ 103 static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \
70 _method_, _name), \ 104 _object_id)[] = { __VA_ARGS__ }; \
71 _method, _handler, ##__VA_ARGS__); \ 105 static const struct uverbs_object_def _name##_struct = { \
72 \ 106 .id = _object_id, \
73static DECLARE_UVERBS_OBJECT(_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \ 107 .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \
74 _object_, _name), \ 108 .methods = &UVERBS_OBJECT_METHODS(_object_id) \
75 _object, _type_attrs, \ 109 }; \
76 &_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \ 110 static const struct uverbs_object_def *const _name##_ptrs[] = { \
77 _method_, _name)); \ 111 &_name##_struct, \
78 \ 112 }; \
79static DECLARE_UVERBS_OBJECT_TREE(_name, \ 113 static const struct uverbs_object_tree_def _name = { \
80 &_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \ 114 .num_objects = 1, \
81 _object_, _name)) 115 .objects = &_name##_ptrs, \
116 }
82 117
83/* A very common use case is that the driver doesn't override the handler and 118/* Used by drivers to declare a complete parsing tree for a single method that
84 * type_attrs. Therefore, we provide a simplified macro for this common case. 119 * differs only in having additional driver specific attributes.
85 */ 120 */
86#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object, _method, ...) \ 121#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object_id, _method_id, ...) \
87 ADD_UVERBS_ATTRIBUTES(_name, _object, _method, UVERBS_NO_OVERRIDE, \ 122 static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \
88 UVERBS_NO_OVERRIDE, ##__VA_ARGS__) 123 _method_id)[] = { __VA_ARGS__ }; \
124 static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \
125 .id = _method_id, \
126 .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \
127 .attrs = &UVERBS_METHOD_ATTRS(_method_id), \
128 }; \
129 ADD_UVERBS_METHODS(_name, _object_id, &UVERBS_METHOD(_method_id))
89 130
90#endif 131#endif
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 9d56cdb84655..3b00231cc084 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -37,8 +37,6 @@
37#include <rdma/uverbs_ioctl.h> 37#include <rdma/uverbs_ioctl.h>
38#include <rdma/ib_user_ioctl_verbs.h> 38#include <rdma/ib_user_ioctl_verbs.h>
39 39
40#define UVERBS_OBJECT(id) uverbs_object_##id
41
42#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) 40#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
43const struct uverbs_object_tree_def *uverbs_default_get_objects(void); 41const struct uverbs_object_tree_def *uverbs_default_get_objects(void);
44#else 42#else
@@ -48,34 +46,61 @@ static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(vo
48} 46}
49#endif 47#endif
50 48
51static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type, 49/* Returns _id, or causes a compile error if _id is not a u32.
52 bool write, 50 *
53 struct ib_ucontext *ucontext, 51 * The uobj APIs should only be used with the write based uAPI to access
54 int id) 52 * object IDs. The write API must use a u32 for the object handle, which is
53 * checked by this macro.
54 */
55#define _uobj_check_id(_id) ((_id) * typecheck(u32, _id))
56
57#define uobj_get_type(_ufile, _object) \
58 uapi_get_object((_ufile)->device->uapi, _object)
59
60#define uobj_get_read(_type, _id, _ufile) \
61 rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile, \
62 _uobj_check_id(_id), UVERBS_LOOKUP_READ)
63
64#define ufd_get_read(_type, _fdnum, _ufile) \
65 rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile, \
66 (_fdnum)*typecheck(s32, _fdnum), \
67 UVERBS_LOOKUP_READ)
68
69static inline void *_uobj_get_obj_read(struct ib_uobject *uobj)
55{ 70{
56 return rdma_lookup_get_uobject(type, ucontext, id, write); 71 if (IS_ERR(uobj))
72 return NULL;
73 return uobj->object;
57} 74}
75#define uobj_get_obj_read(_object, _type, _id, _ufile) \
76 ((struct ib_##_object *)_uobj_get_obj_read( \
77 uobj_get_read(_type, _id, _ufile)))
58 78
59#define uobj_get_type(_object) UVERBS_OBJECT(_object).type_attrs 79#define uobj_get_write(_type, _id, _ufile) \
80 rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile, \
81 _uobj_check_id(_id), UVERBS_LOOKUP_WRITE)
60 82
61#define uobj_get_read(_type, _id, _ucontext) \ 83int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
62 __uobj_get(uobj_get_type(_type), false, _ucontext, _id) 84 struct ib_uverbs_file *ufile, int success_res);
85#define uobj_perform_destroy(_type, _id, _ufile, _success_res) \
86 __uobj_perform_destroy(uobj_get_type(_ufile, _type), \
87 _uobj_check_id(_id), _ufile, _success_res)
63 88
64#define uobj_get_obj_read(_object, _type, _id, _ucontext) \ 89struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
65({ \ 90 u32 id, struct ib_uverbs_file *ufile);
66 struct ib_uobject *__uobj = \
67 __uobj_get(uobj_get_type(_type), \
68 false, _ucontext, _id); \
69 \
70 (struct ib_##_object *)(IS_ERR(__uobj) ? NULL : __uobj->object);\
71})
72 91
73#define uobj_get_write(_type, _id, _ucontext) \ 92#define uobj_get_destroy(_type, _id, _ufile) \
74 __uobj_get(uobj_get_type(_type), true, _ucontext, _id) 93 __uobj_get_destroy(uobj_get_type(_ufile, _type), _uobj_check_id(_id), \
94 _ufile)
95
96static inline void uobj_put_destroy(struct ib_uobject *uobj)
97{
98 rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
99}
75 100
76static inline void uobj_put_read(struct ib_uobject *uobj) 101static inline void uobj_put_read(struct ib_uobject *uobj)
77{ 102{
78 rdma_lookup_put_uobject(uobj, false); 103 rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ);
79} 104}
80 105
81#define uobj_put_obj_read(_obj) \ 106#define uobj_put_obj_read(_obj) \
@@ -83,17 +108,17 @@ static inline void uobj_put_read(struct ib_uobject *uobj)
83 108
84static inline void uobj_put_write(struct ib_uobject *uobj) 109static inline void uobj_put_write(struct ib_uobject *uobj)
85{ 110{
86 rdma_lookup_put_uobject(uobj, true); 111 rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
87} 112}
88 113
89static inline int __must_check uobj_remove_commit(struct ib_uobject *uobj) 114static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj,
115 int success_res)
90{ 116{
91 return rdma_remove_commit_uobject(uobj); 117 int ret = rdma_alloc_commit_uobject(uobj);
92}
93 118
94static inline void uobj_alloc_commit(struct ib_uobject *uobj) 119 if (ret)
95{ 120 return ret;
96 rdma_alloc_commit_uobject(uobj); 121 return success_res;
97} 122}
98 123
99static inline void uobj_alloc_abort(struct ib_uobject *uobj) 124static inline void uobj_alloc_abort(struct ib_uobject *uobj)
@@ -101,14 +126,19 @@ static inline void uobj_alloc_abort(struct ib_uobject *uobj)
101 rdma_alloc_abort_uobject(uobj); 126 rdma_alloc_abort_uobject(uobj);
102} 127}
103 128
104static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type, 129static inline struct ib_uobject *
105 struct ib_ucontext *ucontext) 130__uobj_alloc(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile,
131 struct ib_device **ib_dev)
106{ 132{
107 return rdma_alloc_begin_uobject(type, ucontext); 133 struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, ufile);
134
135 if (!IS_ERR(uobj))
136 *ib_dev = uobj->context->device;
137 return uobj;
108} 138}
109 139
110#define uobj_alloc(_type, ucontext) \ 140#define uobj_alloc(_type, _ufile, _ib_dev) \
111 __uobj_alloc(uobj_get_type(_type), ucontext) 141 __uobj_alloc(uobj_get_type(_ufile, _type), _ufile, _ib_dev)
112 142
113#endif 143#endif
114 144
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index cc04ec65588d..acb1bfa3cc99 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -37,63 +37,72 @@
37#include <rdma/ib_verbs.h> 37#include <rdma/ib_verbs.h>
38 38
39struct uverbs_obj_type; 39struct uverbs_obj_type;
40struct uverbs_api_object;
40 41
41struct uverbs_obj_type_class { 42enum rdma_lookup_mode {
43 UVERBS_LOOKUP_READ,
44 UVERBS_LOOKUP_WRITE,
42 /* 45 /*
43 * Get an ib_uobject that corresponds to the given id from ucontext, 46 * Destroy is like LOOKUP_WRITE, except that the uobject is not
44 * These functions could create or destroy objects if required. 47 * locked. uobj_destroy is used to convert a LOOKUP_DESTROY lock into
45 * The action will be finalized only when commit, abort or put fops are 48 * a LOOKUP_WRITE lock.
46 * called.
47 * The flow of the different actions is:
48 * [alloc]: Starts with alloc_begin. The handlers logic is than
49 * executed. If the handler is successful, alloc_commit
50 * is called and the object is inserted to the repository.
51 * Once alloc_commit completes the object is visible to
52 * other threads and userspace.
53 e Otherwise, alloc_abort is called and the object is
54 * destroyed.
55 * [lookup]: Starts with lookup_get which fetches and locks the
56 * object. After the handler finished using the object, it
57 * needs to call lookup_put to unlock it. The exclusive
58 * flag indicates if the object is locked for exclusive
59 * access.
60 * [remove]: Starts with lookup_get with exclusive flag set. This
61 * locks the object for exclusive access. If the handler
62 * code completed successfully, remove_commit is called
63 * and the ib_uobject is removed from the context's
64 * uobjects repository and put. The object itself is
65 * destroyed as well. Once remove succeeds new krefs to
66 * the object cannot be acquired by other threads or
67 * userspace and the hardware driver is removed from the
68 * object. Other krefs on the object may still exist.
69 * If the handler code failed, lookup_put should be
70 * called. This callback is used when the context
71 * is destroyed as well (process termination,
72 * reset flow).
73 */ 49 */
74 struct ib_uobject *(*alloc_begin)(const struct uverbs_obj_type *type, 50 UVERBS_LOOKUP_DESTROY,
75 struct ib_ucontext *ucontext); 51};
76 void (*alloc_commit)(struct ib_uobject *uobj); 52
53/*
54 * The following sequences are valid:
55 * Success flow:
56 * alloc_begin
57 * alloc_commit
58 * [..]
59 * Access flow:
60 * lookup_get(exclusive=false) & uverbs_try_lock_object
61 * lookup_put(exclusive=false) via rdma_lookup_put_uobject
62 * Destruction flow:
63 * lookup_get(exclusive=true) & uverbs_try_lock_object
64 * remove_commit
65 * remove_handle (optional)
66 * lookup_put(exclusive=true) via rdma_lookup_put_uobject
67 *
68 * Allocate Error flow #1
69 * alloc_begin
70 * alloc_abort
71 * Allocate Error flow #2
72 * alloc_begin
73 * remove_commit
74 * alloc_abort
75 * Allocate Error flow #3
76 * alloc_begin
77 * alloc_commit (fails)
78 * remove_commit
79 * alloc_abort
80 *
81 * In all cases the caller must hold the ufile kref until alloc_commit or
82 * alloc_abort returns.
83 */
84struct uverbs_obj_type_class {
85 struct ib_uobject *(*alloc_begin)(const struct uverbs_api_object *obj,
86 struct ib_uverbs_file *ufile);
87 /* This consumes the kref on uobj */
88 int (*alloc_commit)(struct ib_uobject *uobj);
89 /* This does not consume the kref on uobj */
77 void (*alloc_abort)(struct ib_uobject *uobj); 90 void (*alloc_abort)(struct ib_uobject *uobj);
78 91
79 struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type, 92 struct ib_uobject *(*lookup_get)(const struct uverbs_api_object *obj,
80 struct ib_ucontext *ucontext, int id, 93 struct ib_uverbs_file *ufile, s64 id,
81 bool exclusive); 94 enum rdma_lookup_mode mode);
82 void (*lookup_put)(struct ib_uobject *uobj, bool exclusive); 95 void (*lookup_put)(struct ib_uobject *uobj, enum rdma_lookup_mode mode);
83 /* 96 /* This does not consume the kref on uobj */
84 * Must be called with the exclusive lock held. If successful uobj is 97 int __must_check (*destroy_hw)(struct ib_uobject *uobj,
85 * invalid on return. On failure uobject is left completely 98 enum rdma_remove_reason why);
86 * unchanged 99 void (*remove_handle)(struct ib_uobject *uobj);
87 */
88 int __must_check (*remove_commit)(struct ib_uobject *uobj,
89 enum rdma_remove_reason why);
90 u8 needs_kfree_rcu; 100 u8 needs_kfree_rcu;
91}; 101};
92 102
93struct uverbs_obj_type { 103struct uverbs_obj_type {
94 const struct uverbs_obj_type_class * const type_class; 104 const struct uverbs_obj_type_class * const type_class;
95 size_t obj_size; 105 size_t obj_size;
96 unsigned int destroy_order;
97}; 106};
98 107
99/* 108/*
@@ -120,16 +129,15 @@ struct uverbs_obj_idr_type {
120 enum rdma_remove_reason why); 129 enum rdma_remove_reason why);
121}; 130};
122 131
123struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type, 132struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
124 struct ib_ucontext *ucontext, 133 struct ib_uverbs_file *ufile, s64 id,
125 int id, bool exclusive); 134 enum rdma_lookup_mode mode);
126void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive); 135void rdma_lookup_put_uobject(struct ib_uobject *uobj,
127struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type, 136 enum rdma_lookup_mode mode);
128 struct ib_ucontext *ucontext); 137struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
138 struct ib_uverbs_file *ufile);
129void rdma_alloc_abort_uobject(struct ib_uobject *uobj); 139void rdma_alloc_abort_uobject(struct ib_uobject *uobj);
130int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj); 140int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj);
131int rdma_alloc_commit_uobject(struct ib_uobject *uobj);
132int rdma_explicit_destroy(struct ib_uobject *uobject);
133 141
134struct uverbs_obj_fd_type { 142struct uverbs_obj_fd_type {
135 /* 143 /*
@@ -140,7 +148,7 @@ struct uverbs_obj_fd_type {
140 * the driver is removed or the process terminated. 148 * the driver is removed or the process terminated.
141 */ 149 */
142 struct uverbs_obj_type type; 150 struct uverbs_obj_type type;
143 int (*context_closed)(struct ib_uobject_file *uobj_file, 151 int (*context_closed)(struct ib_uobject *uobj,
144 enum rdma_remove_reason why); 152 enum rdma_remove_reason why);
145 const struct file_operations *fops; 153 const struct file_operations *fops;
146 const char *name; 154 const char *name;
@@ -152,30 +160,29 @@ extern const struct uverbs_obj_type_class uverbs_fd_class;
152 160
153#define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \ 161#define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \
154 sizeof(char)) 162 sizeof(char))
155#define UVERBS_TYPE_ALLOC_FD(_order, _obj_size, _context_closed, _fops, _name, _flags)\ 163#define UVERBS_TYPE_ALLOC_FD(_obj_size, _context_closed, _fops, _name, _flags)\
156 ((&((const struct uverbs_obj_fd_type) \ 164 ((&((const struct uverbs_obj_fd_type) \
157 {.type = { \ 165 {.type = { \
158 .destroy_order = _order, \
159 .type_class = &uverbs_fd_class, \ 166 .type_class = &uverbs_fd_class, \
160 .obj_size = (_obj_size) + \ 167 .obj_size = (_obj_size) + \
161 UVERBS_BUILD_BUG_ON((_obj_size) < sizeof(struct ib_uobject_file)), \ 168 UVERBS_BUILD_BUG_ON((_obj_size) < \
169 sizeof(struct ib_uobject)), \
162 }, \ 170 }, \
163 .context_closed = _context_closed, \ 171 .context_closed = _context_closed, \
164 .fops = _fops, \ 172 .fops = _fops, \
165 .name = _name, \ 173 .name = _name, \
166 .flags = _flags}))->type) 174 .flags = _flags}))->type)
167#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _order, _destroy_object) \ 175#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _destroy_object) \
168 ((&((const struct uverbs_obj_idr_type) \ 176 ((&((const struct uverbs_obj_idr_type) \
169 {.type = { \ 177 {.type = { \
170 .destroy_order = _order, \
171 .type_class = &uverbs_idr_class, \ 178 .type_class = &uverbs_idr_class, \
172 .obj_size = (_size) + \ 179 .obj_size = (_size) + \
173 UVERBS_BUILD_BUG_ON((_size) < \ 180 UVERBS_BUILD_BUG_ON((_size) < \
174 sizeof(struct ib_uobject)) \ 181 sizeof(struct ib_uobject)) \
175 }, \ 182 }, \
176 .destroy_object = _destroy_object,}))->type) 183 .destroy_object = _destroy_object,}))->type)
177#define UVERBS_TYPE_ALLOC_IDR(_order, _destroy_object) \ 184#define UVERBS_TYPE_ALLOC_IDR(_destroy_object) \
178 UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), _order, \ 185 UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), \
179 _destroy_object) 186 _destroy_object)
180 187
181#endif 188#endif
diff --git a/include/uapi/rdma/cxgb4-abi.h b/include/uapi/rdma/cxgb4-abi.h
index a159ba8dcf8f..f85ec1a3f727 100644
--- a/include/uapi/rdma/cxgb4-abi.h
+++ b/include/uapi/rdma/cxgb4-abi.h
@@ -44,6 +44,16 @@
44 * In particular do not use pointer types -- pass pointers in __aligned_u64 44 * In particular do not use pointer types -- pass pointers in __aligned_u64
45 * instead. 45 * instead.
46 */ 46 */
47
48enum {
49 C4IW_64B_CQE = (1 << 0)
50};
51
52struct c4iw_create_cq {
53 __u32 flags;
54 __u32 reserved;
55};
56
47struct c4iw_create_cq_resp { 57struct c4iw_create_cq_resp {
48 __aligned_u64 key; 58 __aligned_u64 key;
49 __aligned_u64 gts_key; 59 __aligned_u64 gts_key;
@@ -51,11 +61,12 @@ struct c4iw_create_cq_resp {
51 __u32 cqid; 61 __u32 cqid;
52 __u32 size; 62 __u32 size;
53 __u32 qid_mask; 63 __u32 qid_mask;
54 __u32 reserved; /* explicit padding (optional for i386) */ 64 __u32 flags;
55}; 65};
56 66
57enum { 67enum {
58 C4IW_QPF_ONCHIP = (1 << 0) 68 C4IW_QPF_ONCHIP = (1 << 0),
69 C4IW_QPF_WRITE_W_IMM = (1 << 1)
59}; 70};
60 71
61struct c4iw_create_qp_resp { 72struct c4iw_create_qp_resp {
@@ -74,6 +85,23 @@ struct c4iw_create_qp_resp {
74 __u32 flags; 85 __u32 flags;
75}; 86};
76 87
88struct c4iw_create_srq_resp {
89 __aligned_u64 srq_key;
90 __aligned_u64 srq_db_gts_key;
91 __aligned_u64 srq_memsize;
92 __u32 srqid;
93 __u32 srq_size;
94 __u32 rqt_abs_idx;
95 __u32 qid_mask;
96 __u32 flags;
97 __u32 reserved; /* explicit padding */
98};
99
100enum {
101 /* HW supports SRQ_LIMIT_REACHED event */
102 T4_SRQ_LIMIT_SUPPORT = 1 << 0,
103};
104
77struct c4iw_alloc_ucontext_resp { 105struct c4iw_alloc_ucontext_resp {
78 __aligned_u64 status_page_key; 106 __aligned_u64 status_page_key;
79 __u32 status_page_size; 107 __u32 status_page_size;
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index 78613b609fa8..c1f87735514f 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -53,6 +53,7 @@ struct hns_roce_ib_create_qp {
53 __u8 log_sq_stride; 53 __u8 log_sq_stride;
54 __u8 sq_no_prefetch; 54 __u8 sq_no_prefetch;
55 __u8 reserved[5]; 55 __u8 reserved[5];
56 __aligned_u64 sdb_addr;
56}; 57};
57 58
58struct hns_roce_ib_create_qp_resp { 59struct hns_roce_ib_create_qp_resp {
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index 888ac5975a6c..2c881aaf05c2 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -79,7 +79,7 @@ enum uverbs_attrs_destroy_cq_cmd_attr_ids {
79}; 79};
80 80
81enum uverbs_attrs_create_flow_action_esp { 81enum uverbs_attrs_create_flow_action_esp {
82 UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, 82 UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
83 UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS, 83 UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
84 UVERBS_ATTR_FLOW_ACTION_ESP_ESN, 84 UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
85 UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT, 85 UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
@@ -87,6 +87,11 @@ enum uverbs_attrs_create_flow_action_esp {
87 UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP, 87 UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
88}; 88};
89 89
90enum uverbs_attrs_modify_flow_action_esp {
91 UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE =
92 UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
93};
94
90enum uverbs_attrs_destroy_flow_action_esp { 95enum uverbs_attrs_destroy_flow_action_esp {
91 UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE, 96 UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
92}; 97};
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 625545d862d7..6cdf192070a2 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -40,6 +40,59 @@
40#define RDMA_UAPI_PTR(_type, _name) __aligned_u64 _name 40#define RDMA_UAPI_PTR(_type, _name) __aligned_u64 _name
41#endif 41#endif
42 42
43enum ib_uverbs_access_flags {
44 IB_UVERBS_ACCESS_LOCAL_WRITE = 1 << 0,
45 IB_UVERBS_ACCESS_REMOTE_WRITE = 1 << 1,
46 IB_UVERBS_ACCESS_REMOTE_READ = 1 << 2,
47 IB_UVERBS_ACCESS_REMOTE_ATOMIC = 1 << 3,
48 IB_UVERBS_ACCESS_MW_BIND = 1 << 4,
49 IB_UVERBS_ACCESS_ZERO_BASED = 1 << 5,
50 IB_UVERBS_ACCESS_ON_DEMAND = 1 << 6,
51 IB_UVERBS_ACCESS_HUGETLB = 1 << 7,
52};
53
54enum ib_uverbs_query_port_cap_flags {
55 IB_UVERBS_PCF_SM = 1 << 1,
56 IB_UVERBS_PCF_NOTICE_SUP = 1 << 2,
57 IB_UVERBS_PCF_TRAP_SUP = 1 << 3,
58 IB_UVERBS_PCF_OPT_IPD_SUP = 1 << 4,
59 IB_UVERBS_PCF_AUTO_MIGR_SUP = 1 << 5,
60 IB_UVERBS_PCF_SL_MAP_SUP = 1 << 6,
61 IB_UVERBS_PCF_MKEY_NVRAM = 1 << 7,
62 IB_UVERBS_PCF_PKEY_NVRAM = 1 << 8,
63 IB_UVERBS_PCF_LED_INFO_SUP = 1 << 9,
64 IB_UVERBS_PCF_SM_DISABLED = 1 << 10,
65 IB_UVERBS_PCF_SYS_IMAGE_GUID_SUP = 1 << 11,
66 IB_UVERBS_PCF_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
67 IB_UVERBS_PCF_EXTENDED_SPEEDS_SUP = 1 << 14,
68 IB_UVERBS_PCF_CM_SUP = 1 << 16,
69 IB_UVERBS_PCF_SNMP_TUNNEL_SUP = 1 << 17,
70 IB_UVERBS_PCF_REINIT_SUP = 1 << 18,
71 IB_UVERBS_PCF_DEVICE_MGMT_SUP = 1 << 19,
72 IB_UVERBS_PCF_VENDOR_CLASS_SUP = 1 << 20,
73 IB_UVERBS_PCF_DR_NOTICE_SUP = 1 << 21,
74 IB_UVERBS_PCF_CAP_MASK_NOTICE_SUP = 1 << 22,
75 IB_UVERBS_PCF_BOOT_MGMT_SUP = 1 << 23,
76 IB_UVERBS_PCF_LINK_LATENCY_SUP = 1 << 24,
77 IB_UVERBS_PCF_CLIENT_REG_SUP = 1 << 25,
78 /*
79 * IsOtherLocalChangesNoticeSupported is aliased by IP_BASED_GIDS and
80 * is inaccessible
81 */
82 IB_UVERBS_PCF_LINK_SPEED_WIDTH_TABLE_SUP = 1 << 27,
83 IB_UVERBS_PCF_VENDOR_SPECIFIC_MADS_TABLE_SUP = 1 << 28,
84 IB_UVERBS_PCF_MCAST_PKEY_TRAP_SUPPRESSION_SUP = 1 << 29,
85 IB_UVERBS_PCF_MCAST_FDB_TOP_SUP = 1 << 30,
86 IB_UVERBS_PCF_HIERARCHY_INFO_SUP = 1ULL << 31,
87
88 /* NOTE this is an internal flag, not an IBA flag */
89 IB_UVERBS_PCF_IP_BASED_GIDS = 1 << 26,
90};
91
92enum ib_uverbs_query_port_flags {
93 IB_UVERBS_QPF_GRH_REQUIRED = 1 << 0,
94};
95
43enum ib_uverbs_flow_action_esp_keymat { 96enum ib_uverbs_flow_action_esp_keymat {
44 IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM, 97 IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM,
45}; 98};
@@ -99,4 +152,9 @@ struct ib_uverbs_flow_action_esp {
99 __aligned_u64 hard_limit_pkts; 152 __aligned_u64 hard_limit_pkts;
100}; 153};
101 154
155enum ib_uverbs_read_counters_flags {
156 /* prefer read values from driver cache */
157 IB_UVERBS_READ_COUNTERS_PREFER_CACHED = 1 << 0,
158};
159
102#endif 160#endif
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 4f9991de8e3a..25a16760de2a 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -279,7 +279,7 @@ struct ib_uverbs_query_port {
279}; 279};
280 280
281struct ib_uverbs_query_port_resp { 281struct ib_uverbs_query_port_resp {
282 __u32 port_cap_flags; 282 __u32 port_cap_flags; /* see ib_uverbs_query_port_cap_flags */
283 __u32 max_msg_sz; 283 __u32 max_msg_sz;
284 __u32 bad_pkey_cntr; 284 __u32 bad_pkey_cntr;
285 __u32 qkey_viol_cntr; 285 __u32 qkey_viol_cntr;
@@ -299,7 +299,8 @@ struct ib_uverbs_query_port_resp {
299 __u8 active_speed; 299 __u8 active_speed;
300 __u8 phys_state; 300 __u8 phys_state;
301 __u8 link_layer; 301 __u8 link_layer;
302 __u8 reserved[2]; 302 __u8 flags; /* see ib_uverbs_query_port_flags */
303 __u8 reserved;
303}; 304};
304 305
305struct ib_uverbs_alloc_pd { 306struct ib_uverbs_alloc_pd {
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index 8daec1fa49cf..addbb9c4529e 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -76,6 +76,9 @@ enum mlx5_lib_caps {
76 MLX5_LIB_CAP_4K_UAR = (__u64)1 << 0, 76 MLX5_LIB_CAP_4K_UAR = (__u64)1 << 0,
77}; 77};
78 78
79enum mlx5_ib_alloc_uctx_v2_flags {
80 MLX5_IB_ALLOC_UCTX_DEVX = 1 << 0,
81};
79struct mlx5_ib_alloc_ucontext_req_v2 { 82struct mlx5_ib_alloc_ucontext_req_v2 {
80 __u32 total_num_bfregs; 83 __u32 total_num_bfregs;
81 __u32 num_low_latency_bfregs; 84 __u32 num_low_latency_bfregs;
@@ -90,6 +93,7 @@ struct mlx5_ib_alloc_ucontext_req_v2 {
90 93
91enum mlx5_ib_alloc_ucontext_resp_mask { 94enum mlx5_ib_alloc_ucontext_resp_mask {
92 MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0, 95 MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
96 MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY = 1UL << 1,
93}; 97};
94 98
95enum mlx5_user_cmds_supp_uhw { 99enum mlx5_user_cmds_supp_uhw {
@@ -138,7 +142,7 @@ struct mlx5_ib_alloc_ucontext_resp {
138 __u32 log_uar_size; 142 __u32 log_uar_size;
139 __u32 num_uars_per_page; 143 __u32 num_uars_per_page;
140 __u32 num_dyn_bfregs; 144 __u32 num_dyn_bfregs;
141 __u32 reserved3; 145 __u32 dump_fill_mkey;
142}; 146};
143 147
144struct mlx5_ib_alloc_pd_resp { 148struct mlx5_ib_alloc_pd_resp {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index f7d685ef2d1f..9c51801b9e64 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -33,6 +33,7 @@
33#ifndef MLX5_USER_IOCTL_CMDS_H 33#ifndef MLX5_USER_IOCTL_CMDS_H
34#define MLX5_USER_IOCTL_CMDS_H 34#define MLX5_USER_IOCTL_CMDS_H
35 35
36#include <linux/types.h>
36#include <rdma/ib_user_ioctl_cmds.h> 37#include <rdma/ib_user_ioctl_cmds.h>
37 38
38enum mlx5_ib_create_flow_action_attrs { 39enum mlx5_ib_create_flow_action_attrs {
@@ -45,4 +46,124 @@ enum mlx5_ib_alloc_dm_attrs {
45 MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX, 46 MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
46}; 47};
47 48
49enum mlx5_ib_devx_methods {
50 MLX5_IB_METHOD_DEVX_OTHER = (1U << UVERBS_ID_NS_SHIFT),
51 MLX5_IB_METHOD_DEVX_QUERY_UAR,
52 MLX5_IB_METHOD_DEVX_QUERY_EQN,
53};
54
55enum mlx5_ib_devx_other_attrs {
56 MLX5_IB_ATTR_DEVX_OTHER_CMD_IN = (1U << UVERBS_ID_NS_SHIFT),
57 MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
58};
59
60enum mlx5_ib_devx_obj_create_attrs {
61 MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
62 MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
63 MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
64};
65
66enum mlx5_ib_devx_query_uar_attrs {
67 MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX = (1U << UVERBS_ID_NS_SHIFT),
68 MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
69};
70
71enum mlx5_ib_devx_obj_destroy_attrs {
72 MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
73};
74
75enum mlx5_ib_devx_obj_modify_attrs {
76 MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
77 MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
78 MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
79};
80
81enum mlx5_ib_devx_obj_query_attrs {
82 MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
83 MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
84 MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
85};
86
87enum mlx5_ib_devx_query_eqn_attrs {
88 MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC = (1U << UVERBS_ID_NS_SHIFT),
89 MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
90};
91
92enum mlx5_ib_devx_obj_methods {
93 MLX5_IB_METHOD_DEVX_OBJ_CREATE = (1U << UVERBS_ID_NS_SHIFT),
94 MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
95 MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
96 MLX5_IB_METHOD_DEVX_OBJ_QUERY,
97};
98
99enum mlx5_ib_devx_umem_reg_attrs {
100 MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
101 MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
102 MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
103 MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
104 MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
105};
106
107enum mlx5_ib_devx_umem_dereg_attrs {
108 MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
109};
110
111enum mlx5_ib_devx_umem_methods {
112 MLX5_IB_METHOD_DEVX_UMEM_REG = (1U << UVERBS_ID_NS_SHIFT),
113 MLX5_IB_METHOD_DEVX_UMEM_DEREG,
114};
115
116enum mlx5_ib_objects {
117 MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT),
118 MLX5_IB_OBJECT_DEVX_OBJ,
119 MLX5_IB_OBJECT_DEVX_UMEM,
120 MLX5_IB_OBJECT_FLOW_MATCHER,
121};
122
123enum mlx5_ib_flow_matcher_create_attrs {
124 MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
125 MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
126 MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
127 MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
128};
129
130enum mlx5_ib_flow_matcher_destroy_attrs {
131 MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
132};
133
134enum mlx5_ib_flow_matcher_methods {
135 MLX5_IB_METHOD_FLOW_MATCHER_CREATE = (1U << UVERBS_ID_NS_SHIFT),
136 MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
137};
138
139#define MLX5_IB_DW_MATCH_PARAM 0x80
140
141struct mlx5_ib_match_params {
142 __u32 match_params[MLX5_IB_DW_MATCH_PARAM];
143};
144
145enum mlx5_ib_flow_type {
146 MLX5_IB_FLOW_TYPE_NORMAL,
147 MLX5_IB_FLOW_TYPE_SNIFFER,
148 MLX5_IB_FLOW_TYPE_ALL_DEFAULT,
149 MLX5_IB_FLOW_TYPE_MC_DEFAULT,
150};
151
152enum mlx5_ib_create_flow_attrs {
153 MLX5_IB_ATTR_CREATE_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
154 MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
155 MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
156 MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
157 MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
158};
159
160enum mlx5_ib_destoy_flow_attrs {
161 MLX5_IB_ATTR_DESTROY_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
162};
163
164enum mlx5_ib_flow_methods {
165 MLX5_IB_METHOD_CREATE_FLOW = (1U << UVERBS_ID_NS_SHIFT),
166 MLX5_IB_METHOD_DESTROY_FLOW,
167};
168
48#endif 169#endif
diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h
index 24c658b3c790..7a10b3a325fa 100644
--- a/include/uapi/rdma/qedr-abi.h
+++ b/include/uapi/rdma/qedr-abi.h
@@ -111,4 +111,21 @@ struct qedr_create_qp_uresp {
111 __u32 reserved; 111 __u32 reserved;
112}; 112};
113 113
114struct qedr_create_srq_ureq {
115 /* user space virtual address of producer pair */
116 __aligned_u64 prod_pair_addr;
117
118 /* user space virtual address of SRQ buffer */
119 __aligned_u64 srq_addr;
120
121 /* length of SRQ buffer */
122 __aligned_u64 srq_len;
123};
124
125struct qedr_create_srq_uresp {
126 __u16 srq_id;
127 __u16 reserved0;
128 __u32 reserved1;
129};
130
114#endif /* __QEDR_USER_H__ */ 131#endif /* __QEDR_USER_H__ */
diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h
index 1da5a1e1f3a8..24800c6c1f32 100644
--- a/include/uapi/rdma/rdma_user_ioctl_cmds.h
+++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h
@@ -62,7 +62,12 @@ struct ib_uverbs_attr {
62 } enum_data; 62 } enum_data;
63 __u16 reserved; 63 __u16 reserved;
64 } attr_data; 64 } attr_data;
65 __aligned_u64 data; /* ptr to command, inline data or idr/fd */ 65 union {
66 /* Used by PTR_IN/OUT, ENUM_IN and IDR */
67 __aligned_u64 data;
68 /* Used by FD_IN and FD_OUT */
69 __s64 data_s64;
70 };
66}; 71};
67 72
68struct ib_uverbs_ioctl_hdr { 73struct ib_uverbs_ioctl_hdr {
diff --git a/lib/test_overflow.c b/lib/test_overflow.c
index 2278fe05a1b0..fc680562d8b6 100644
--- a/lib/test_overflow.c
+++ b/lib/test_overflow.c
@@ -252,7 +252,8 @@ static int __init test_ ## t ## _overflow(void) { \
252 int err = 0; \ 252 int err = 0; \
253 unsigned i; \ 253 unsigned i; \
254 \ 254 \
255 pr_info("%-3s: %zu tests\n", #t, ARRAY_SIZE(t ## _tests)); \ 255 pr_info("%-3s: %zu arithmetic tests\n", #t, \
256 ARRAY_SIZE(t ## _tests)); \
256 for (i = 0; i < ARRAY_SIZE(t ## _tests); ++i) \ 257 for (i = 0; i < ARRAY_SIZE(t ## _tests); ++i) \
257 err |= do_test_ ## t(&t ## _tests[i]); \ 258 err |= do_test_ ## t(&t ## _tests[i]); \
258 return err; \ 259 return err; \
@@ -287,6 +288,200 @@ static int __init test_overflow_calculation(void)
287 return err; 288 return err;
288} 289}
289 290
291static int __init test_overflow_shift(void)
292{
293 int err = 0;
294
295/* Args are: value, shift, type, expected result, overflow expected */
296#define TEST_ONE_SHIFT(a, s, t, expect, of) ({ \
297 int __failed = 0; \
298 typeof(a) __a = (a); \
299 typeof(s) __s = (s); \
300 t __e = (expect); \
301 t __d; \
302 bool __of = check_shl_overflow(__a, __s, &__d); \
303 if (__of != of) { \
304 pr_warn("expected (%s)(%s << %s) to%s overflow\n", \
305 #t, #a, #s, of ? "" : " not"); \
306 __failed = 1; \
307 } else if (!__of && __d != __e) { \
308 pr_warn("expected (%s)(%s << %s) == %s\n", \
309 #t, #a, #s, #expect); \
310 if ((t)-1 < 0) \
311 pr_warn("got %lld\n", (s64)__d); \
312 else \
313 pr_warn("got %llu\n", (u64)__d); \
314 __failed = 1; \
315 } \
316 if (!__failed) \
317 pr_info("ok: (%s)(%s << %s) == %s\n", #t, #a, #s, \
318 of ? "overflow" : #expect); \
319 __failed; \
320})
321
322 /* Sane shifts. */
323 err |= TEST_ONE_SHIFT(1, 0, u8, 1 << 0, false);
324 err |= TEST_ONE_SHIFT(1, 4, u8, 1 << 4, false);
325 err |= TEST_ONE_SHIFT(1, 7, u8, 1 << 7, false);
326 err |= TEST_ONE_SHIFT(0xF, 4, u8, 0xF << 4, false);
327 err |= TEST_ONE_SHIFT(1, 0, u16, 1 << 0, false);
328 err |= TEST_ONE_SHIFT(1, 10, u16, 1 << 10, false);
329 err |= TEST_ONE_SHIFT(1, 15, u16, 1 << 15, false);
330 err |= TEST_ONE_SHIFT(0xFF, 8, u16, 0xFF << 8, false);
331 err |= TEST_ONE_SHIFT(1, 0, int, 1 << 0, false);
332 err |= TEST_ONE_SHIFT(1, 16, int, 1 << 16, false);
333 err |= TEST_ONE_SHIFT(1, 30, int, 1 << 30, false);
334 err |= TEST_ONE_SHIFT(1, 0, s32, 1 << 0, false);
335 err |= TEST_ONE_SHIFT(1, 16, s32, 1 << 16, false);
336 err |= TEST_ONE_SHIFT(1, 30, s32, 1 << 30, false);
337 err |= TEST_ONE_SHIFT(1, 0, unsigned int, 1U << 0, false);
338 err |= TEST_ONE_SHIFT(1, 20, unsigned int, 1U << 20, false);
339 err |= TEST_ONE_SHIFT(1, 31, unsigned int, 1U << 31, false);
340 err |= TEST_ONE_SHIFT(0xFFFFU, 16, unsigned int, 0xFFFFU << 16, false);
341 err |= TEST_ONE_SHIFT(1, 0, u32, 1U << 0, false);
342 err |= TEST_ONE_SHIFT(1, 20, u32, 1U << 20, false);
343 err |= TEST_ONE_SHIFT(1, 31, u32, 1U << 31, false);
344 err |= TEST_ONE_SHIFT(0xFFFFU, 16, u32, 0xFFFFU << 16, false);
345 err |= TEST_ONE_SHIFT(1, 0, u64, 1ULL << 0, false);
346 err |= TEST_ONE_SHIFT(1, 40, u64, 1ULL << 40, false);
347 err |= TEST_ONE_SHIFT(1, 63, u64, 1ULL << 63, false);
348 err |= TEST_ONE_SHIFT(0xFFFFFFFFULL, 32, u64,
349 0xFFFFFFFFULL << 32, false);
350
351 /* Sane shift: start and end with 0, without a too-wide shift. */
352 err |= TEST_ONE_SHIFT(0, 7, u8, 0, false);
353 err |= TEST_ONE_SHIFT(0, 15, u16, 0, false);
354 err |= TEST_ONE_SHIFT(0, 31, unsigned int, 0, false);
355 err |= TEST_ONE_SHIFT(0, 31, u32, 0, false);
356 err |= TEST_ONE_SHIFT(0, 63, u64, 0, false);
357
358 /* Sane shift: start and end with 0, without reaching signed bit. */
359 err |= TEST_ONE_SHIFT(0, 6, s8, 0, false);
360 err |= TEST_ONE_SHIFT(0, 14, s16, 0, false);
361 err |= TEST_ONE_SHIFT(0, 30, int, 0, false);
362 err |= TEST_ONE_SHIFT(0, 30, s32, 0, false);
363 err |= TEST_ONE_SHIFT(0, 62, s64, 0, false);
364
365 /* Overflow: shifted the bit off the end. */
366 err |= TEST_ONE_SHIFT(1, 8, u8, 0, true);
367 err |= TEST_ONE_SHIFT(1, 16, u16, 0, true);
368 err |= TEST_ONE_SHIFT(1, 32, unsigned int, 0, true);
369 err |= TEST_ONE_SHIFT(1, 32, u32, 0, true);
370 err |= TEST_ONE_SHIFT(1, 64, u64, 0, true);
371
372 /* Overflow: shifted into the signed bit. */
373 err |= TEST_ONE_SHIFT(1, 7, s8, 0, true);
374 err |= TEST_ONE_SHIFT(1, 15, s16, 0, true);
375 err |= TEST_ONE_SHIFT(1, 31, int, 0, true);
376 err |= TEST_ONE_SHIFT(1, 31, s32, 0, true);
377 err |= TEST_ONE_SHIFT(1, 63, s64, 0, true);
378
379 /* Overflow: high bit falls off unsigned types. */
380 /* 10010110 */
381 err |= TEST_ONE_SHIFT(150, 1, u8, 0, true);
382 /* 1000100010010110 */
383 err |= TEST_ONE_SHIFT(34966, 1, u16, 0, true);
384 /* 10000100000010001000100010010110 */
385 err |= TEST_ONE_SHIFT(2215151766U, 1, u32, 0, true);
386 err |= TEST_ONE_SHIFT(2215151766U, 1, unsigned int, 0, true);
387 /* 1000001000010000010000000100000010000100000010001000100010010110 */
388 err |= TEST_ONE_SHIFT(9372061470395238550ULL, 1, u64, 0, true);
389
390 /* Overflow: bit shifted into signed bit on signed types. */
391 /* 01001011 */
392 err |= TEST_ONE_SHIFT(75, 1, s8, 0, true);
393 /* 0100010001001011 */
394 err |= TEST_ONE_SHIFT(17483, 1, s16, 0, true);
395 /* 01000010000001000100010001001011 */
396 err |= TEST_ONE_SHIFT(1107575883, 1, s32, 0, true);
397 err |= TEST_ONE_SHIFT(1107575883, 1, int, 0, true);
398 /* 0100000100001000001000000010000001000010000001000100010001001011 */
399 err |= TEST_ONE_SHIFT(4686030735197619275LL, 1, s64, 0, true);
400
401 /* Overflow: bit shifted past signed bit on signed types. */
402 /* 01001011 */
403 err |= TEST_ONE_SHIFT(75, 2, s8, 0, true);
404 /* 0100010001001011 */
405 err |= TEST_ONE_SHIFT(17483, 2, s16, 0, true);
406 /* 01000010000001000100010001001011 */
407 err |= TEST_ONE_SHIFT(1107575883, 2, s32, 0, true);
408 err |= TEST_ONE_SHIFT(1107575883, 2, int, 0, true);
409 /* 0100000100001000001000000010000001000010000001000100010001001011 */
410 err |= TEST_ONE_SHIFT(4686030735197619275LL, 2, s64, 0, true);
411
412 /* Overflow: values larger than destination type. */
413 err |= TEST_ONE_SHIFT(0x100, 0, u8, 0, true);
414 err |= TEST_ONE_SHIFT(0xFF, 0, s8, 0, true);
415 err |= TEST_ONE_SHIFT(0x10000U, 0, u16, 0, true);
416 err |= TEST_ONE_SHIFT(0xFFFFU, 0, s16, 0, true);
417 err |= TEST_ONE_SHIFT(0x100000000ULL, 0, u32, 0, true);
418 err |= TEST_ONE_SHIFT(0x100000000ULL, 0, unsigned int, 0, true);
419 err |= TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, s32, 0, true);
420 err |= TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, int, 0, true);
421 err |= TEST_ONE_SHIFT(0xFFFFFFFFFFFFFFFFULL, 0, s64, 0, true);
422
423 /* Nonsense: negative initial value. */
424 err |= TEST_ONE_SHIFT(-1, 0, s8, 0, true);
425 err |= TEST_ONE_SHIFT(-1, 0, u8, 0, true);
426 err |= TEST_ONE_SHIFT(-5, 0, s16, 0, true);
427 err |= TEST_ONE_SHIFT(-5, 0, u16, 0, true);
428 err |= TEST_ONE_SHIFT(-10, 0, int, 0, true);
429 err |= TEST_ONE_SHIFT(-10, 0, unsigned int, 0, true);
430 err |= TEST_ONE_SHIFT(-100, 0, s32, 0, true);
431 err |= TEST_ONE_SHIFT(-100, 0, u32, 0, true);
432 err |= TEST_ONE_SHIFT(-10000, 0, s64, 0, true);
433 err |= TEST_ONE_SHIFT(-10000, 0, u64, 0, true);
434
435 /* Nonsense: negative shift values. */
436 err |= TEST_ONE_SHIFT(0, -5, s8, 0, true);
437 err |= TEST_ONE_SHIFT(0, -5, u8, 0, true);
438 err |= TEST_ONE_SHIFT(0, -10, s16, 0, true);
439 err |= TEST_ONE_SHIFT(0, -10, u16, 0, true);
440 err |= TEST_ONE_SHIFT(0, -15, int, 0, true);
441 err |= TEST_ONE_SHIFT(0, -15, unsigned int, 0, true);
442 err |= TEST_ONE_SHIFT(0, -20, s32, 0, true);
443 err |= TEST_ONE_SHIFT(0, -20, u32, 0, true);
444 err |= TEST_ONE_SHIFT(0, -30, s64, 0, true);
445 err |= TEST_ONE_SHIFT(0, -30, u64, 0, true);
446
447 /* Overflow: shifted at or beyond entire type's bit width. */
448 err |= TEST_ONE_SHIFT(0, 8, u8, 0, true);
449 err |= TEST_ONE_SHIFT(0, 9, u8, 0, true);
450 err |= TEST_ONE_SHIFT(0, 8, s8, 0, true);
451 err |= TEST_ONE_SHIFT(0, 9, s8, 0, true);
452 err |= TEST_ONE_SHIFT(0, 16, u16, 0, true);
453 err |= TEST_ONE_SHIFT(0, 17, u16, 0, true);
454 err |= TEST_ONE_SHIFT(0, 16, s16, 0, true);
455 err |= TEST_ONE_SHIFT(0, 17, s16, 0, true);
456 err |= TEST_ONE_SHIFT(0, 32, u32, 0, true);
457 err |= TEST_ONE_SHIFT(0, 33, u32, 0, true);
458 err |= TEST_ONE_SHIFT(0, 32, int, 0, true);
459 err |= TEST_ONE_SHIFT(0, 33, int, 0, true);
460 err |= TEST_ONE_SHIFT(0, 32, s32, 0, true);
461 err |= TEST_ONE_SHIFT(0, 33, s32, 0, true);
462 err |= TEST_ONE_SHIFT(0, 64, u64, 0, true);
463 err |= TEST_ONE_SHIFT(0, 65, u64, 0, true);
464 err |= TEST_ONE_SHIFT(0, 64, s64, 0, true);
465 err |= TEST_ONE_SHIFT(0, 65, s64, 0, true);
466
467 /*
468 * Corner case: for unsigned types, we fail when we've shifted
469 * through the entire width of bits. For signed types, we might
470 * want to match this behavior, but that would mean noticing if
471 * we shift through all but the signed bit, and this is not
472 * currently detected (but we'll notice an overflow into the
473 * signed bit). So, for now, we will test this condition but
474 * mark it as not expected to overflow.
475 */
476 err |= TEST_ONE_SHIFT(0, 7, s8, 0, false);
477 err |= TEST_ONE_SHIFT(0, 15, s16, 0, false);
478 err |= TEST_ONE_SHIFT(0, 31, int, 0, false);
479 err |= TEST_ONE_SHIFT(0, 31, s32, 0, false);
480 err |= TEST_ONE_SHIFT(0, 63, s64, 0, false);
481
482 return err;
483}
484
290/* 485/*
291 * Deal with the various forms of allocator arguments. See comments above 486 * Deal with the various forms of allocator arguments. See comments above
292 * the DEFINE_TEST_ALLOC() instances for mapping of the "bits". 487 * the DEFINE_TEST_ALLOC() instances for mapping of the "bits".
@@ -397,6 +592,7 @@ static int __init test_module_init(void)
397 int err = 0; 592 int err = 0;
398 593
399 err |= test_overflow_calculation(); 594 err |= test_overflow_calculation();
595 err |= test_overflow_shift();
400 err |= test_overflow_allocation(); 596 err |= test_overflow_allocation();
401 597
402 if (err) { 598 if (err) {
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 3d414acb7015..b06286f253cb 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -396,7 +396,7 @@ static int
396post_recv(struct p9_client *client, struct p9_rdma_context *c) 396post_recv(struct p9_client *client, struct p9_rdma_context *c)
397{ 397{
398 struct p9_trans_rdma *rdma = client->trans; 398 struct p9_trans_rdma *rdma = client->trans;
399 struct ib_recv_wr wr, *bad_wr; 399 struct ib_recv_wr wr;
400 struct ib_sge sge; 400 struct ib_sge sge;
401 401
402 c->busa = ib_dma_map_single(rdma->cm_id->device, 402 c->busa = ib_dma_map_single(rdma->cm_id->device,
@@ -415,7 +415,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
415 wr.wr_cqe = &c->cqe; 415 wr.wr_cqe = &c->cqe;
416 wr.sg_list = &sge; 416 wr.sg_list = &sge;
417 wr.num_sge = 1; 417 wr.num_sge = 1;
418 return ib_post_recv(rdma->qp, &wr, &bad_wr); 418 return ib_post_recv(rdma->qp, &wr, NULL);
419 419
420 error: 420 error:
421 p9_debug(P9_DEBUG_ERROR, "EIO\n"); 421 p9_debug(P9_DEBUG_ERROR, "EIO\n");
@@ -425,7 +425,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
425static int rdma_request(struct p9_client *client, struct p9_req_t *req) 425static int rdma_request(struct p9_client *client, struct p9_req_t *req)
426{ 426{
427 struct p9_trans_rdma *rdma = client->trans; 427 struct p9_trans_rdma *rdma = client->trans;
428 struct ib_send_wr wr, *bad_wr; 428 struct ib_send_wr wr;
429 struct ib_sge sge; 429 struct ib_sge sge;
430 int err = 0; 430 int err = 0;
431 unsigned long flags; 431 unsigned long flags;
@@ -520,7 +520,7 @@ dont_need_post_recv:
520 * status in case of a very fast reply. 520 * status in case of a very fast reply.
521 */ 521 */
522 req->status = REQ_STATUS_SENT; 522 req->status = REQ_STATUS_SENT;
523 err = ib_post_send(rdma->qp, &wr, &bad_wr); 523 err = ib_post_send(rdma->qp, &wr, NULL);
524 if (err) 524 if (err)
525 goto send_error; 525 goto send_error;
526 526
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 7232274de334..af6ad467ed61 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -140,6 +140,7 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
140 &net_secret); 140 &net_secret);
141 return seq_scale(hash); 141 return seq_scale(hash);
142} 142}
143EXPORT_SYMBOL_GPL(secure_tcp_seq);
143 144
144u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) 145u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
145{ 146{
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 89c6333ecd39..c1d97640c0be 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -144,7 +144,7 @@ static void rds_ib_add_one(struct ib_device *device)
144 INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free); 144 INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
145 145
146 rds_ibdev->max_wrs = device->attrs.max_qp_wr; 146 rds_ibdev->max_wrs = device->attrs.max_qp_wr;
147 rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE); 147 rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE);
148 148
149 has_fr = (device->attrs.device_cap_flags & 149 has_fr = (device->attrs.device_cap_flags &
150 IB_DEVICE_MEM_MGT_EXTENSIONS); 150 IB_DEVICE_MEM_MGT_EXTENSIONS);
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 8596eed6d9a8..6431a023ac89 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -103,7 +103,6 @@ static void rds_ib_free_frmr(struct rds_ib_mr *ibmr, bool drop)
103static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) 103static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
104{ 104{
105 struct rds_ib_frmr *frmr = &ibmr->u.frmr; 105 struct rds_ib_frmr *frmr = &ibmr->u.frmr;
106 struct ib_send_wr *failed_wr;
107 struct ib_reg_wr reg_wr; 106 struct ib_reg_wr reg_wr;
108 int ret, off = 0; 107 int ret, off = 0;
109 108
@@ -136,9 +135,7 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
136 IB_ACCESS_REMOTE_WRITE; 135 IB_ACCESS_REMOTE_WRITE;
137 reg_wr.wr.send_flags = IB_SEND_SIGNALED; 136 reg_wr.wr.send_flags = IB_SEND_SIGNALED;
138 137
139 failed_wr = &reg_wr.wr; 138 ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, NULL);
140 ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, &failed_wr);
141 WARN_ON(failed_wr != &reg_wr.wr);
142 if (unlikely(ret)) { 139 if (unlikely(ret)) {
143 /* Failure here can be because of -ENOMEM as well */ 140 /* Failure here can be because of -ENOMEM as well */
144 frmr->fr_state = FRMR_IS_STALE; 141 frmr->fr_state = FRMR_IS_STALE;
@@ -231,7 +228,7 @@ out_unmap:
231 228
232static int rds_ib_post_inv(struct rds_ib_mr *ibmr) 229static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
233{ 230{
234 struct ib_send_wr *s_wr, *failed_wr; 231 struct ib_send_wr *s_wr;
235 struct rds_ib_frmr *frmr = &ibmr->u.frmr; 232 struct rds_ib_frmr *frmr = &ibmr->u.frmr;
236 struct rdma_cm_id *i_cm_id = ibmr->ic->i_cm_id; 233 struct rdma_cm_id *i_cm_id = ibmr->ic->i_cm_id;
237 int ret = -EINVAL; 234 int ret = -EINVAL;
@@ -256,9 +253,7 @@ static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
256 s_wr->ex.invalidate_rkey = frmr->mr->rkey; 253 s_wr->ex.invalidate_rkey = frmr->mr->rkey;
257 s_wr->send_flags = IB_SEND_SIGNALED; 254 s_wr->send_flags = IB_SEND_SIGNALED;
258 255
259 failed_wr = s_wr; 256 ret = ib_post_send(i_cm_id->qp, s_wr, NULL);
260 ret = ib_post_send(i_cm_id->qp, s_wr, &failed_wr);
261 WARN_ON(failed_wr != s_wr);
262 if (unlikely(ret)) { 257 if (unlikely(ret)) {
263 frmr->fr_state = FRMR_IS_STALE; 258 frmr->fr_state = FRMR_IS_STALE;
264 frmr->fr_inv = false; 259 frmr->fr_inv = false;
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index d300186b8dc0..2f16146e4ec9 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -381,7 +381,6 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
381{ 381{
382 struct rds_ib_connection *ic = conn->c_transport_data; 382 struct rds_ib_connection *ic = conn->c_transport_data;
383 struct rds_ib_recv_work *recv; 383 struct rds_ib_recv_work *recv;
384 struct ib_recv_wr *failed_wr;
385 unsigned int posted = 0; 384 unsigned int posted = 0;
386 int ret = 0; 385 int ret = 0;
387 bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM); 386 bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
@@ -415,7 +414,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
415 &recv->r_frag->f_sg)); 414 &recv->r_frag->f_sg));
416 415
417 /* XXX when can this fail? */ 416 /* XXX when can this fail? */
418 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); 417 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, NULL);
419 if (ret) { 418 if (ret) {
420 rds_ib_conn_error(conn, "recv post on " 419 rds_ib_conn_error(conn, "recv post on "
421 "%pI6c returned %d, disconnecting and " 420 "%pI6c returned %d, disconnecting and "
@@ -648,7 +647,6 @@ static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
648static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits) 647static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits)
649{ 648{
650 struct rds_header *hdr = ic->i_ack; 649 struct rds_header *hdr = ic->i_ack;
651 struct ib_send_wr *failed_wr;
652 u64 seq; 650 u64 seq;
653 int ret; 651 int ret;
654 652
@@ -661,7 +659,7 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi
661 rds_message_make_checksum(hdr); 659 rds_message_make_checksum(hdr);
662 ic->i_ack_queued = jiffies; 660 ic->i_ack_queued = jiffies;
663 661
664 ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, &failed_wr); 662 ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, NULL);
665 if (unlikely(ret)) { 663 if (unlikely(ret)) {
666 /* Failed to send. Release the WR, and 664 /* Failed to send. Release the WR, and
667 * force another ACK. 665 * force another ACK.
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index c8dd3125d398..2dcb555e6350 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -492,7 +492,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
492 struct rds_ib_send_work *send = NULL; 492 struct rds_ib_send_work *send = NULL;
493 struct rds_ib_send_work *first; 493 struct rds_ib_send_work *first;
494 struct rds_ib_send_work *prev; 494 struct rds_ib_send_work *prev;
495 struct ib_send_wr *failed_wr; 495 const struct ib_send_wr *failed_wr;
496 struct scatterlist *scat; 496 struct scatterlist *scat;
497 u32 pos; 497 u32 pos;
498 u32 i; 498 u32 i;
@@ -758,7 +758,7 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
758{ 758{
759 struct rds_ib_connection *ic = conn->c_transport_data; 759 struct rds_ib_connection *ic = conn->c_transport_data;
760 struct rds_ib_send_work *send = NULL; 760 struct rds_ib_send_work *send = NULL;
761 struct ib_send_wr *failed_wr; 761 const struct ib_send_wr *failed_wr;
762 u32 pos; 762 u32 pos;
763 u32 work_alloc; 763 u32 work_alloc;
764 int ret; 764 int ret;
@@ -846,7 +846,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
846 struct rds_ib_send_work *send = NULL; 846 struct rds_ib_send_work *send = NULL;
847 struct rds_ib_send_work *first; 847 struct rds_ib_send_work *first;
848 struct rds_ib_send_work *prev; 848 struct rds_ib_send_work *prev;
849 struct ib_send_wr *failed_wr; 849 const struct ib_send_wr *failed_wr;
850 struct scatterlist *scat; 850 struct scatterlist *scat;
851 unsigned long len; 851 unsigned long len;
852 u64 remote_addr = op->op_remote_addr; 852 u64 remote_addr = op->op_remote_addr;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index a46418f45ecd..e871368500e3 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -16,6 +16,7 @@
16#include <net/tcp.h> 16#include <net/tcp.h>
17#include <net/sock.h> 17#include <net/sock.h>
18#include <rdma/ib_verbs.h> 18#include <rdma/ib_verbs.h>
19#include <rdma/ib_cache.h>
19 20
20#include "smc.h" 21#include "smc.h"
21#include "smc_clc.h" 22#include "smc_clc.h"
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 2cc64bc8ae20..9bb5274a244e 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -16,6 +16,7 @@
16#include <linux/workqueue.h> 16#include <linux/workqueue.h>
17#include <linux/scatterlist.h> 17#include <linux/scatterlist.h>
18#include <rdma/ib_verbs.h> 18#include <rdma/ib_verbs.h>
19#include <rdma/ib_cache.h>
19 20
20#include "smc_pnet.h" 21#include "smc_pnet.h"
21#include "smc_ib.h" 22#include "smc_ib.h"
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 2f5e324e54b9..d8366ed51757 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -270,7 +270,6 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
270 int num_sges, struct ib_sge sges[]) 270 int num_sges, struct ib_sge sges[])
271{ 271{
272 struct smc_link_group *lgr = conn->lgr; 272 struct smc_link_group *lgr = conn->lgr;
273 struct ib_send_wr *failed_wr = NULL;
274 struct ib_rdma_wr rdma_wr; 273 struct ib_rdma_wr rdma_wr;
275 struct smc_link *link; 274 struct smc_link *link;
276 int rc; 275 int rc;
@@ -288,7 +287,7 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
288 /* offset within RMBE */ 287 /* offset within RMBE */
289 peer_rmbe_offset; 288 peer_rmbe_offset;
290 rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; 289 rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
291 rc = ib_post_send(link->roce_qp, &rdma_wr.wr, &failed_wr); 290 rc = ib_post_send(link->roce_qp, &rdma_wr.wr, NULL);
292 if (rc) { 291 if (rc) {
293 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 292 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
294 smc_lgr_terminate(lgr); 293 smc_lgr_terminate(lgr);
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index f856b8402b3f..3c458d279855 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -232,15 +232,13 @@ int smc_wr_tx_put_slot(struct smc_link *link,
232 */ 232 */
233int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) 233int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
234{ 234{
235 struct ib_send_wr *failed_wr = NULL;
236 struct smc_wr_tx_pend *pend; 235 struct smc_wr_tx_pend *pend;
237 int rc; 236 int rc;
238 237
239 ib_req_notify_cq(link->smcibdev->roce_cq_send, 238 ib_req_notify_cq(link->smcibdev->roce_cq_send,
240 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); 239 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
241 pend = container_of(priv, struct smc_wr_tx_pend, priv); 240 pend = container_of(priv, struct smc_wr_tx_pend, priv);
242 rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], 241 rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL);
243 &failed_wr);
244 if (rc) { 242 if (rc) {
245 smc_wr_tx_put_slot(link, priv); 243 smc_wr_tx_put_slot(link, priv);
246 smc_lgr_terminate(smc_get_lgr(link)); 244 smc_lgr_terminate(smc_get_lgr(link));
@@ -251,7 +249,6 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
251/* Register a memory region and wait for result. */ 249/* Register a memory region and wait for result. */
252int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) 250int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
253{ 251{
254 struct ib_send_wr *failed_wr = NULL;
255 int rc; 252 int rc;
256 253
257 ib_req_notify_cq(link->smcibdev->roce_cq_send, 254 ib_req_notify_cq(link->smcibdev->roce_cq_send,
@@ -260,9 +257,7 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
260 link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr; 257 link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr;
261 link->wr_reg.mr = mr; 258 link->wr_reg.mr = mr;
262 link->wr_reg.key = mr->rkey; 259 link->wr_reg.key = mr->rkey;
263 failed_wr = &link->wr_reg.wr; 260 rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, NULL);
264 rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, &failed_wr);
265 WARN_ON(failed_wr != &link->wr_reg.wr);
266 if (rc) 261 if (rc)
267 return rc; 262 return rc;
268 263
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 210bec3c3ebe..1d85bb14fd6f 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -63,7 +63,6 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val)
63/* post a new receive work request to fill a completed old work request entry */ 63/* post a new receive work request to fill a completed old work request entry */
64static inline int smc_wr_rx_post(struct smc_link *link) 64static inline int smc_wr_rx_post(struct smc_link *link)
65{ 65{
66 struct ib_recv_wr *bad_recv_wr = NULL;
67 int rc; 66 int rc;
68 u64 wr_id, temp_wr_id; 67 u64 wr_id, temp_wr_id;
69 u32 index; 68 u32 index;
@@ -72,7 +71,7 @@ static inline int smc_wr_rx_post(struct smc_link *link)
72 temp_wr_id = wr_id; 71 temp_wr_id = wr_id;
73 index = do_div(temp_wr_id, link->wr_rx_cnt); 72 index = do_div(temp_wr_id, link->wr_rx_cnt);
74 link->wr_rx_ibs[index].wr_id = wr_id; 73 link->wr_rx_ibs[index].wr_id = wr_id;
75 rc = ib_post_recv(link->roce_qp, &link->wr_rx_ibs[index], &bad_recv_wr); 74 rc = ib_post_recv(link->roce_qp, &link->wr_rx_ibs[index], NULL);
76 return rc; 75 return rc;
77} 76}
78 77
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 17fb1e025654..0f7c465d9a5a 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -279,9 +279,7 @@ out_maperr:
279static int 279static int
280fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) 280fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
281{ 281{
282 struct ib_send_wr *bad_wr; 282 return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, NULL);
283
284 return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, &bad_wr);
285} 283}
286 284
287/* Invalidate all memory regions that were registered for "req". 285/* Invalidate all memory regions that were registered for "req".
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index c040de196e13..1bb00dd6ccdb 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -464,7 +464,7 @@ out_mapmr_err:
464static int 464static int
465frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) 465frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
466{ 466{
467 struct ib_send_wr *post_wr, *bad_wr; 467 struct ib_send_wr *post_wr;
468 struct rpcrdma_mr *mr; 468 struct rpcrdma_mr *mr;
469 469
470 post_wr = &req->rl_sendctx->sc_wr; 470 post_wr = &req->rl_sendctx->sc_wr;
@@ -486,7 +486,7 @@ frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
486 /* If ib_post_send fails, the next ->send_request for 486 /* If ib_post_send fails, the next ->send_request for
487 * @req will queue these MWs for recovery. 487 * @req will queue these MWs for recovery.
488 */ 488 */
489 return ib_post_send(ia->ri_id->qp, post_wr, &bad_wr); 489 return ib_post_send(ia->ri_id->qp, post_wr, NULL);
490} 490}
491 491
492/* Handle a remotely invalidated mr on the @mrs list 492/* Handle a remotely invalidated mr on the @mrs list
@@ -517,7 +517,8 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
517static void 517static void
518frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) 518frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
519{ 519{
520 struct ib_send_wr *first, **prev, *last, *bad_wr; 520 struct ib_send_wr *first, **prev, *last;
521 const struct ib_send_wr *bad_wr;
521 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 522 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
522 struct rpcrdma_frwr *frwr; 523 struct rpcrdma_frwr *frwr;
523 struct rpcrdma_mr *mr; 524 struct rpcrdma_mr *mr;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 841fca143804..2ef75e885411 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -229,11 +229,10 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
229static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma, 229static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
230 struct svc_rdma_recv_ctxt *ctxt) 230 struct svc_rdma_recv_ctxt *ctxt)
231{ 231{
232 struct ib_recv_wr *bad_recv_wr;
233 int ret; 232 int ret;
234 233
235 svc_xprt_get(&rdma->sc_xprt); 234 svc_xprt_get(&rdma->sc_xprt);
236 ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, &bad_recv_wr); 235 ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL);
237 trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret); 236 trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret);
238 if (ret) 237 if (ret)
239 goto err_post; 238 goto err_post;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index ce3ea8419704..04cb3363172a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -307,7 +307,8 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
307{ 307{
308 struct svcxprt_rdma *rdma = cc->cc_rdma; 308 struct svcxprt_rdma *rdma = cc->cc_rdma;
309 struct svc_xprt *xprt = &rdma->sc_xprt; 309 struct svc_xprt *xprt = &rdma->sc_xprt;
310 struct ib_send_wr *first_wr, *bad_wr; 310 struct ib_send_wr *first_wr;
311 const struct ib_send_wr *bad_wr;
311 struct list_head *tmp; 312 struct list_head *tmp;
312 struct ib_cqe *cqe; 313 struct ib_cqe *cqe;
313 int ret; 314 int ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 4a3efaea277c..ffef0c508f1a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -291,7 +291,6 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
291 */ 291 */
292int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr) 292int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr)
293{ 293{
294 struct ib_send_wr *bad_wr;
295 int ret; 294 int ret;
296 295
297 might_sleep(); 296 might_sleep();
@@ -311,7 +310,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr)
311 } 310 }
312 311
313 svc_xprt_get(&rdma->sc_xprt); 312 svc_xprt_get(&rdma->sc_xprt);
314 ret = ib_post_send(rdma->sc_qp, wr, &bad_wr); 313 ret = ib_post_send(rdma->sc_qp, wr, NULL);
315 trace_svcrdma_post_send(wr, ret); 314 trace_svcrdma_post_send(wr, ret);
316 if (ret) { 315 if (ret) {
317 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); 316 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index e9535a66bab0..547b2cdf1427 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -476,7 +476,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
476 476
477 /* Qualify the transport resource defaults with the 477 /* Qualify the transport resource defaults with the
478 * capabilities of this particular device */ 478 * capabilities of this particular device */
479 newxprt->sc_max_send_sges = dev->attrs.max_sge; 479 newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
480 /* transport hdr, head iovec, one page list entry, tail iovec */ 480 /* transport hdr, head iovec, one page list entry, tail iovec */
481 if (newxprt->sc_max_send_sges < 4) { 481 if (newxprt->sc_max_send_sges < 4) {
482 pr_err("svcrdma: too few Send SGEs available (%d)\n", 482 pr_err("svcrdma: too few Send SGEs available (%d)\n",
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 16161a36dc73..5efeba08918b 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -508,7 +508,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
508 unsigned int max_sge; 508 unsigned int max_sge;
509 int rc; 509 int rc;
510 510
511 max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge, 511 max_sge = min_t(unsigned int, ia->ri_device->attrs.max_send_sge,
512 RPCRDMA_MAX_SEND_SGES); 512 RPCRDMA_MAX_SEND_SGES);
513 if (max_sge < RPCRDMA_MIN_SEND_SGES) { 513 if (max_sge < RPCRDMA_MIN_SEND_SGES) {
514 pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); 514 pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
@@ -1559,7 +1559,8 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
1559 if (!count) 1559 if (!count)
1560 return; 1560 return;
1561 1561
1562 rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, &bad_wr); 1562 rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr,
1563 (const struct ib_recv_wr **)&bad_wr);
1563 if (rc) { 1564 if (rc) {
1564 for (wr = bad_wr; wr; wr = wr->next) { 1565 for (wr = bad_wr; wr; wr = wr->next) {
1565 struct rpcrdma_rep *rep; 1566 struct rpcrdma_rep *rep;