aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-10-26 10:38:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-26 10:38:19 -0400
commitda19a102ce87bf3e0a7fe277a659d1fc35330d6d (patch)
treea6c1d40ef544e812b31f4b5f497c20d449d45ec3
parente5f6d9afa3415104e402cd69288bb03f7165eeba (diff)
parenta60109dc9a954ef9eddba6577e2d2e9e7952e487 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "This has been a smaller cycle with many of the commits being smallish code fixes and improvements across the drivers. - Driver updates for bnxt_re, cxgb4, hfi1, hns, mlx5, nes, qedr, and rxe - Memory window support in hns - mlx5 user API 'flow mutate/steering' allows accessing the full packet mangling and matching machinery from user space - Support inter-working with verbs API calls in the 'devx' mlx5 user API, and provide options to use devx with less privilege - Modernize the use of syfs and the device interface to use attribute groups and cdev properly for uverbs, and clean up some of the core code's device list management - More progress on net namespaces for RDMA devices - Consolidate driver BAR mmapping support into core code helpers and rework how RDMA holds poitners to mm_struct for get_user_pages cases - First pass to use 'dev_name' instead of ib_device->name - Device renaming for RDMA devices" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (242 commits) IB/mlx5: Add support for extended atomic operations RDMA/core: Fix comment for hw stats init for port == 0 RDMA/core: Refactor ib_register_device() function RDMA/core: Fix unwinding flow in case of error to register device ib_srp: Remove WARN_ON in srp_terminate_io() IB/mlx5: Allow scatter to CQE without global signaled WRs IB/mlx5: Verify that driver supports user flags IB/mlx5: Support scatter to CQE for DC transport type RDMA/drivers: Use core provided API for registering device attributes RDMA/core: Allow existing drivers to set one sysfs group per device IB/rxe: Remove unnecessary enum values RDMA/umad: Use kernel API to allocate umad indexes RDMA/uverbs: Use kernel API to allocate uverbs indexes RDMA/core: Increase total number of RDMA ports across all devices IB/mlx4: Add port and TID to MAD debug print IB/mlx4: Enable debug print of SMPs RDMA/core: Rename ports_parent to ports_kobj RDMA/core: Do not expose unsupported counters IB/mlx4: Refer to the device kobject instead of ports_parent RDMA/nldev: Allow IB device rename through RDMA netlink ...
-rw-r--r--Documentation/ABI/testing/sysfs-class-net18
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/core/addr.c406
-rw-r--r--drivers/infiniband/core/cache.c79
-rw-r--r--drivers/infiniband/core/cm.c9
-rw-r--r--drivers/infiniband/core/cma.c251
-rw-r--r--drivers/infiniband/core/cma_configfs.c2
-rw-r--r--drivers/infiniband/core/core_priv.h12
-rw-r--r--drivers/infiniband/core/cq.c10
-rw-r--r--drivers/infiniband/core/device.c264
-rw-r--r--drivers/infiniband/core/fmr_pool.c5
-rw-r--r--drivers/infiniband/core/iwcm.c2
-rw-r--r--drivers/infiniband/core/mad.c80
-rw-r--r--drivers/infiniband/core/mad_priv.h2
-rw-r--r--drivers/infiniband/core/netlink.c4
-rw-r--r--drivers/infiniband/core/nldev.c37
-rw-r--r--drivers/infiniband/core/rdma_core.c56
-rw-r--r--drivers/infiniband/core/rdma_core.h1
-rw-r--r--drivers/infiniband/core/restrack.c30
-rw-r--r--drivers/infiniband/core/sa.h8
-rw-r--r--drivers/infiniband/core/sa_query.c70
-rw-r--r--drivers/infiniband/core/security.c7
-rw-r--r--drivers/infiniband/core/sysfs.c101
-rw-r--r--drivers/infiniband/core/umem.c125
-rw-r--r--drivers/infiniband/core/umem_odp.c621
-rw-r--r--drivers/infiniband/core/user_mad.c13
-rw-r--r--drivers/infiniband/core/uverbs.h15
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c43
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c140
-rw-r--r--drivers/infiniband/core/uverbs_main.c340
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c7
-rw-r--r--drivers/infiniband/core/uverbs_uapi.c12
-rw-r--r--drivers/infiniband/core/verbs.c19
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h3
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c11
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.h3
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c4
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c125
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c134
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c88
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h4
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c29
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c77
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h10
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h5
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c55
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c50
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c10
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile42
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c4
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c486
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h71
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h4
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c4
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h48
-rw-r--r--drivers/infiniband/hw/hfi1/init.c113
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.c94
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.h192
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c4
-rw-r--r--drivers/infiniband/hw/hfi1/msix.c363
-rw-r--r--drivers/infiniband/hw/hfi1/msix.h64
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c74
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c8
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c100
-rw-r--r--drivers/infiniband/hw/hfi1/qp.h31
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c24
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c382
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c56
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h21
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c69
-rw-r--r--drivers/infiniband/hw/hfi1/trace.h3
-rw-r--r--drivers/infiniband/hw/hfi1/trace_iowait.h54
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c14
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c22
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c137
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h20
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c251
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h35
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.h11
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c12
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c21
-rw-r--r--drivers/infiniband/hw/hns/Kconfig1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c6
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h45
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c629
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h96
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c123
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c212
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c41
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c73
-rw-r--r--drivers/infiniband/hw/mlx4/Kconfig1
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c20
-rw-r--r--drivers/infiniband/hw/mlx4/main.c182
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c2
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h5
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c8
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c6
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c129
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h14
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c3
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c358
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c393
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c3
-rw-r--r--drivers/infiniband/hw/mlx5/main.c510
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c9
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h98
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c14
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c123
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c491
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c5
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c44
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c4
-rw-r--r--drivers/infiniband/hw/nes/nes.c3
-rw-r--r--drivers/infiniband/hw/nes/nes.h9
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c63
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c74
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c3
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c3
-rw-r--r--drivers/infiniband/hw/qedr/main.c73
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h2
-rw-r--r--drivers/infiniband/hw/qedr/qedr_roce_cm.c4
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c5
-rw-r--r--drivers/infiniband/hw/qib/qib.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c17
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c18
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c342
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c101
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c17
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c47
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h15
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.c3
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c39
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c74
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c16
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c91
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.h3
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c46
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/Kconfig2
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c677
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.h2
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_tx.h42
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c15
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c13
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c39
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c35
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c49
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c55
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c18
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c17
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c10
-rw-r--r--drivers/infiniband/sw/rxe/rxe_srq.c10
-rw-r--r--drivers/infiniband/sw/rxe/rxe_sysfs.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c36
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c2
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c18
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c9
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c2
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c3
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c3
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c19
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c28
-rw-r--r--include/linux/mlx5/driver.h23
-rw-r--r--include/linux/qed/qed_rdma_if.h11
-rw-r--r--include/rdma/ib_addr.h11
-rw-r--r--include/rdma/ib_cm.h2
-rw-r--r--include/rdma/ib_sa.h38
-rw-r--r--include/rdma/ib_umem.h9
-rw-r--r--include/rdma/ib_umem_odp.h75
-rw-r--r--include/rdma/ib_verbs.h149
-rw-r--r--include/rdma/rdma_cm.h11
-rw-r--r--include/rdma/rdma_netlink.h4
-rw-r--r--include/rdma/rdma_vt.h51
-rw-r--r--include/rdma/rdmavt_qp.h7
-rw-r--r--include/rdma/restrack.h12
-rw-r--r--include/rdma/uverbs_ioctl.h111
-rw-r--r--include/rdma/uverbs_std_types.h51
-rw-r--r--include/uapi/rdma/ib_user_verbs.h20
-rw-r--r--include/uapi/rdma/mlx5-abi.h16
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_cmds.h21
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_verbs.h12
-rw-r--r--include/uapi/rdma/rdma_netlink.h3
-rw-r--r--include/uapi/rdma/rdma_user_ioctl_cmds.h7
204 files changed, 7618 insertions, 5190 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index e2e0fe553ad8..664a8f6a634f 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -91,6 +91,24 @@ Description:
91 stacked (e.g: VLAN interfaces) but still have the same MAC 91 stacked (e.g: VLAN interfaces) but still have the same MAC
92 address as their parent device. 92 address as their parent device.
93 93
94What: /sys/class/net/<iface>/dev_port
95Date: February 2014
96KernelVersion: 3.15
97Contact: netdev@vger.kernel.org
98Description:
99 Indicates the port number of this network device, formatted
100 as a decimal value. Some NICs have multiple independent ports
101 on the same PCI bus, device and function. This attribute allows
102 userspace to distinguish the respective interfaces.
103
104 Note: some device drivers started to use 'dev_id' for this
105 purpose since long before 3.15 and have not adopted the new
106 attribute ever since. To query the port number, some tools look
107 exclusively at 'dev_port', while others only consult 'dev_id'.
108 If a network device has multiple client adapter ports as
109 described in the previous paragraph and does not set this
110 attribute to its port number, it's a kernel bug.
111
94What: /sys/class/net/<iface>/dormant 112What: /sys/class/net/<iface>/dormant
95Date: March 2006 113Date: March 2006
96KernelVersion: 2.6.17 114KernelVersion: 2.6.17
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index abb6660c099c..0a3ec7c726ec 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -26,6 +26,7 @@ config INFINIBAND_USER_MAD
26config INFINIBAND_USER_ACCESS 26config INFINIBAND_USER_ACCESS
27 tristate "InfiniBand userspace access (verbs and CM)" 27 tristate "InfiniBand userspace access (verbs and CM)"
28 select ANON_INODES 28 select ANON_INODES
29 depends on MMU
29 ---help--- 30 ---help---
30 Userspace InfiniBand access support. This enables the 31 Userspace InfiniBand access support. This enables the
31 kernel side of userspace verbs and the userspace 32 kernel side of userspace verbs and the userspace
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 46b855a42884..0dce94e3c495 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -45,6 +45,7 @@
45#include <net/addrconf.h> 45#include <net/addrconf.h>
46#include <net/ip6_route.h> 46#include <net/ip6_route.h>
47#include <rdma/ib_addr.h> 47#include <rdma/ib_addr.h>
48#include <rdma/ib_sa.h>
48#include <rdma/ib.h> 49#include <rdma/ib.h>
49#include <rdma/rdma_netlink.h> 50#include <rdma/rdma_netlink.h>
50#include <net/netlink.h> 51#include <net/netlink.h>
@@ -61,6 +62,7 @@ struct addr_req {
61 struct rdma_dev_addr *addr, void *context); 62 struct rdma_dev_addr *addr, void *context);
62 unsigned long timeout; 63 unsigned long timeout;
63 struct delayed_work work; 64 struct delayed_work work;
65 bool resolve_by_gid_attr; /* Consider gid attr in resolve phase */
64 int status; 66 int status;
65 u32 seq; 67 u32 seq;
66}; 68};
@@ -219,60 +221,75 @@ int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr)
219} 221}
220EXPORT_SYMBOL(rdma_addr_size_kss); 222EXPORT_SYMBOL(rdma_addr_size_kss);
221 223
222void rdma_copy_addr(struct rdma_dev_addr *dev_addr, 224/**
223 const struct net_device *dev, 225 * rdma_copy_src_l2_addr - Copy netdevice source addresses
224 const unsigned char *dst_dev_addr) 226 * @dev_addr: Destination address pointer where to copy the addresses
227 * @dev: Netdevice whose source addresses to copy
228 *
229 * rdma_copy_src_l2_addr() copies source addresses from the specified netdevice.
230 * This includes unicast address, broadcast address, device type and
231 * interface index.
232 */
233void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
234 const struct net_device *dev)
225{ 235{
226 dev_addr->dev_type = dev->type; 236 dev_addr->dev_type = dev->type;
227 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 237 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
228 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); 238 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
229 if (dst_dev_addr)
230 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
231 dev_addr->bound_dev_if = dev->ifindex; 239 dev_addr->bound_dev_if = dev->ifindex;
232} 240}
233EXPORT_SYMBOL(rdma_copy_addr); 241EXPORT_SYMBOL(rdma_copy_src_l2_addr);
234 242
235int rdma_translate_ip(const struct sockaddr *addr, 243static struct net_device *
236 struct rdma_dev_addr *dev_addr) 244rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in)
237{ 245{
238 struct net_device *dev; 246 struct net_device *dev = NULL;
247 int ret = -EADDRNOTAVAIL;
239 248
240 if (dev_addr->bound_dev_if) { 249 switch (src_in->sa_family) {
241 dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
242 if (!dev)
243 return -ENODEV;
244 rdma_copy_addr(dev_addr, dev, NULL);
245 dev_put(dev);
246 return 0;
247 }
248
249 switch (addr->sa_family) {
250 case AF_INET: 250 case AF_INET:
251 dev = ip_dev_find(dev_addr->net, 251 dev = __ip_dev_find(net,
252 ((const struct sockaddr_in *)addr)->sin_addr.s_addr); 252 ((const struct sockaddr_in *)src_in)->sin_addr.s_addr,
253 253 false);
254 if (!dev) 254 if (dev)
255 return -EADDRNOTAVAIL; 255 ret = 0;
256
257 rdma_copy_addr(dev_addr, dev, NULL);
258 dev_put(dev);
259 break; 256 break;
260#if IS_ENABLED(CONFIG_IPV6) 257#if IS_ENABLED(CONFIG_IPV6)
261 case AF_INET6: 258 case AF_INET6:
262 rcu_read_lock(); 259 for_each_netdev_rcu(net, dev) {
263 for_each_netdev_rcu(dev_addr->net, dev) { 260 if (ipv6_chk_addr(net,
264 if (ipv6_chk_addr(dev_addr->net, 261 &((const struct sockaddr_in6 *)src_in)->sin6_addr,
265 &((const struct sockaddr_in6 *)addr)->sin6_addr,
266 dev, 1)) { 262 dev, 1)) {
267 rdma_copy_addr(dev_addr, dev, NULL); 263 ret = 0;
268 break; 264 break;
269 } 265 }
270 } 266 }
271 rcu_read_unlock();
272 break; 267 break;
273#endif 268#endif
274 } 269 }
275 return 0; 270 return ret ? ERR_PTR(ret) : dev;
271}
272
273int rdma_translate_ip(const struct sockaddr *addr,
274 struct rdma_dev_addr *dev_addr)
275{
276 struct net_device *dev;
277
278 if (dev_addr->bound_dev_if) {
279 dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
280 if (!dev)
281 return -ENODEV;
282 rdma_copy_src_l2_addr(dev_addr, dev);
283 dev_put(dev);
284 return 0;
285 }
286
287 rcu_read_lock();
288 dev = rdma_find_ndev_for_src_ip_rcu(dev_addr->net, addr);
289 if (!IS_ERR(dev))
290 rdma_copy_src_l2_addr(dev_addr, dev);
291 rcu_read_unlock();
292 return PTR_ERR_OR_ZERO(dev);
276} 293}
277EXPORT_SYMBOL(rdma_translate_ip); 294EXPORT_SYMBOL(rdma_translate_ip);
278 295
@@ -295,15 +312,12 @@ static void queue_req(struct addr_req *req)
295 spin_unlock_bh(&lock); 312 spin_unlock_bh(&lock);
296} 313}
297 314
298static int ib_nl_fetch_ha(const struct dst_entry *dst, 315static int ib_nl_fetch_ha(struct rdma_dev_addr *dev_addr,
299 struct rdma_dev_addr *dev_addr,
300 const void *daddr, u32 seq, u16 family) 316 const void *daddr, u32 seq, u16 family)
301{ 317{
302 if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) 318 if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
303 return -EADDRNOTAVAIL; 319 return -EADDRNOTAVAIL;
304 320
305 /* We fill in what we can, the response will fill the rest */
306 rdma_copy_addr(dev_addr, dst->dev, NULL);
307 return ib_nl_ip_send_msg(dev_addr, daddr, seq, family); 321 return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
308} 322}
309 323
@@ -322,7 +336,7 @@ static int dst_fetch_ha(const struct dst_entry *dst,
322 neigh_event_send(n, NULL); 336 neigh_event_send(n, NULL);
323 ret = -ENODATA; 337 ret = -ENODATA;
324 } else { 338 } else {
325 rdma_copy_addr(dev_addr, dst->dev, n->ha); 339 memcpy(dev_addr->dst_dev_addr, n->ha, MAX_ADDR_LEN);
326 } 340 }
327 341
328 neigh_release(n); 342 neigh_release(n);
@@ -356,18 +370,22 @@ static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
356 (const void *)&dst_in6->sin6_addr; 370 (const void *)&dst_in6->sin6_addr;
357 sa_family_t family = dst_in->sa_family; 371 sa_family_t family = dst_in->sa_family;
358 372
359 /* Gateway + ARPHRD_INFINIBAND -> IB router */ 373 /* If we have a gateway in IB mode then it must be an IB network */
360 if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND) 374 if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB)
361 return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family); 375 return ib_nl_fetch_ha(dev_addr, daddr, seq, family);
362 else 376 else
363 return dst_fetch_ha(dst, dev_addr, daddr); 377 return dst_fetch_ha(dst, dev_addr, daddr);
364} 378}
365 379
366static int addr4_resolve(struct sockaddr_in *src_in, 380static int addr4_resolve(struct sockaddr *src_sock,
367 const struct sockaddr_in *dst_in, 381 const struct sockaddr *dst_sock,
368 struct rdma_dev_addr *addr, 382 struct rdma_dev_addr *addr,
369 struct rtable **prt) 383 struct rtable **prt)
370{ 384{
385 struct sockaddr_in *src_in = (struct sockaddr_in *)src_sock;
386 const struct sockaddr_in *dst_in =
387 (const struct sockaddr_in *)dst_sock;
388
371 __be32 src_ip = src_in->sin_addr.s_addr; 389 __be32 src_ip = src_in->sin_addr.s_addr;
372 __be32 dst_ip = dst_in->sin_addr.s_addr; 390 __be32 dst_ip = dst_in->sin_addr.s_addr;
373 struct rtable *rt; 391 struct rtable *rt;
@@ -383,16 +401,8 @@ static int addr4_resolve(struct sockaddr_in *src_in,
383 if (ret) 401 if (ret)
384 return ret; 402 return ret;
385 403
386 src_in->sin_family = AF_INET;
387 src_in->sin_addr.s_addr = fl4.saddr; 404 src_in->sin_addr.s_addr = fl4.saddr;
388 405
389 /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
390 * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
391 * type accordingly.
392 */
393 if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND)
394 addr->network = RDMA_NETWORK_IPV4;
395
396 addr->hoplimit = ip4_dst_hoplimit(&rt->dst); 406 addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
397 407
398 *prt = rt; 408 *prt = rt;
@@ -400,14 +410,16 @@ static int addr4_resolve(struct sockaddr_in *src_in,
400} 410}
401 411
402#if IS_ENABLED(CONFIG_IPV6) 412#if IS_ENABLED(CONFIG_IPV6)
403static int addr6_resolve(struct sockaddr_in6 *src_in, 413static int addr6_resolve(struct sockaddr *src_sock,
404 const struct sockaddr_in6 *dst_in, 414 const struct sockaddr *dst_sock,
405 struct rdma_dev_addr *addr, 415 struct rdma_dev_addr *addr,
406 struct dst_entry **pdst) 416 struct dst_entry **pdst)
407{ 417{
418 struct sockaddr_in6 *src_in = (struct sockaddr_in6 *)src_sock;
419 const struct sockaddr_in6 *dst_in =
420 (const struct sockaddr_in6 *)dst_sock;
408 struct flowi6 fl6; 421 struct flowi6 fl6;
409 struct dst_entry *dst; 422 struct dst_entry *dst;
410 struct rt6_info *rt;
411 int ret; 423 int ret;
412 424
413 memset(&fl6, 0, sizeof fl6); 425 memset(&fl6, 0, sizeof fl6);
@@ -419,19 +431,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
419 if (ret < 0) 431 if (ret < 0)
420 return ret; 432 return ret;
421 433
422 rt = (struct rt6_info *)dst; 434 if (ipv6_addr_any(&src_in->sin6_addr))
423 if (ipv6_addr_any(&src_in->sin6_addr)) {
424 src_in->sin6_family = AF_INET6;
425 src_in->sin6_addr = fl6.saddr; 435 src_in->sin6_addr = fl6.saddr;
426 }
427
428 /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
429 * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
430 * type accordingly.
431 */
432 if (rt->rt6i_flags & RTF_GATEWAY &&
433 ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND)
434 addr->network = RDMA_NETWORK_IPV6;
435 436
436 addr->hoplimit = ip6_dst_hoplimit(dst); 437 addr->hoplimit = ip6_dst_hoplimit(dst);
437 438
@@ -439,8 +440,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
439 return 0; 440 return 0;
440} 441}
441#else 442#else
442static int addr6_resolve(struct sockaddr_in6 *src_in, 443static int addr6_resolve(struct sockaddr *src_sock,
443 const struct sockaddr_in6 *dst_in, 444 const struct sockaddr *dst_sock,
444 struct rdma_dev_addr *addr, 445 struct rdma_dev_addr *addr,
445 struct dst_entry **pdst) 446 struct dst_entry **pdst)
446{ 447{
@@ -451,36 +452,110 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
451static int addr_resolve_neigh(const struct dst_entry *dst, 452static int addr_resolve_neigh(const struct dst_entry *dst,
452 const struct sockaddr *dst_in, 453 const struct sockaddr *dst_in,
453 struct rdma_dev_addr *addr, 454 struct rdma_dev_addr *addr,
455 unsigned int ndev_flags,
454 u32 seq) 456 u32 seq)
455{ 457{
456 if (dst->dev->flags & IFF_LOOPBACK) { 458 int ret = 0;
457 int ret;
458 459
459 ret = rdma_translate_ip(dst_in, addr); 460 if (ndev_flags & IFF_LOOPBACK) {
460 if (!ret) 461 memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
461 memcpy(addr->dst_dev_addr, addr->src_dev_addr, 462 } else {
462 MAX_ADDR_LEN); 463 if (!(ndev_flags & IFF_NOARP)) {
464 /* If the device doesn't do ARP internally */
465 ret = fetch_ha(dst, addr, dst_in, seq);
466 }
467 }
468 return ret;
469}
463 470
464 return ret; 471static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
472 const struct sockaddr *dst_in,
473 const struct dst_entry *dst,
474 const struct net_device *ndev)
475{
476 int ret = 0;
477
478 if (dst->dev->flags & IFF_LOOPBACK)
479 ret = rdma_translate_ip(dst_in, dev_addr);
480 else
481 rdma_copy_src_l2_addr(dev_addr, dst->dev);
482
483 /*
484 * If there's a gateway and type of device not ARPHRD_INFINIBAND,
485 * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the
486 * network type accordingly.
487 */
488 if (has_gateway(dst, dst_in->sa_family) &&
489 ndev->type != ARPHRD_INFINIBAND)
490 dev_addr->network = dst_in->sa_family == AF_INET ?
491 RDMA_NETWORK_IPV4 :
492 RDMA_NETWORK_IPV6;
493 else
494 dev_addr->network = RDMA_NETWORK_IB;
495
496 return ret;
497}
498
499static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr,
500 unsigned int *ndev_flags,
501 const struct sockaddr *dst_in,
502 const struct dst_entry *dst)
503{
504 struct net_device *ndev = READ_ONCE(dst->dev);
505
506 *ndev_flags = ndev->flags;
507 /* A physical device must be the RDMA device to use */
508 if (ndev->flags & IFF_LOOPBACK) {
509 /*
510 * RDMA (IB/RoCE, iWarp) doesn't run on lo interface or
511 * loopback IP address. So if route is resolved to loopback
512 * interface, translate that to a real ndev based on non
513 * loopback IP address.
514 */
515 ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in);
516 if (IS_ERR(ndev))
517 return -ENODEV;
465 } 518 }
466 519
467 /* If the device doesn't do ARP internally */ 520 return copy_src_l2_addr(dev_addr, dst_in, dst, ndev);
468 if (!(dst->dev->flags & IFF_NOARP)) 521}
469 return fetch_ha(dst, addr, dst_in, seq); 522
523static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr)
524{
525 struct net_device *ndev;
470 526
471 rdma_copy_addr(addr, dst->dev, NULL); 527 ndev = rdma_read_gid_attr_ndev_rcu(addr->sgid_attr);
528 if (IS_ERR(ndev))
529 return PTR_ERR(ndev);
472 530
531 /*
532 * Since we are holding the rcu, reading net and ifindex
533 * are safe without any additional reference; because
534 * change_net_namespace() in net/core/dev.c does rcu sync
535 * after it changes the state to IFF_DOWN and before
536 * updating netdev fields {net, ifindex}.
537 */
538 addr->net = dev_net(ndev);
539 addr->bound_dev_if = ndev->ifindex;
473 return 0; 540 return 0;
474} 541}
475 542
543static void rdma_addr_set_net_defaults(struct rdma_dev_addr *addr)
544{
545 addr->net = &init_net;
546 addr->bound_dev_if = 0;
547}
548
476static int addr_resolve(struct sockaddr *src_in, 549static int addr_resolve(struct sockaddr *src_in,
477 const struct sockaddr *dst_in, 550 const struct sockaddr *dst_in,
478 struct rdma_dev_addr *addr, 551 struct rdma_dev_addr *addr,
479 bool resolve_neigh, 552 bool resolve_neigh,
553 bool resolve_by_gid_attr,
480 u32 seq) 554 u32 seq)
481{ 555{
482 struct net_device *ndev; 556 struct dst_entry *dst = NULL;
483 struct dst_entry *dst; 557 unsigned int ndev_flags = 0;
558 struct rtable *rt = NULL;
484 int ret; 559 int ret;
485 560
486 if (!addr->net) { 561 if (!addr->net) {
@@ -488,58 +563,55 @@ static int addr_resolve(struct sockaddr *src_in,
488 return -EINVAL; 563 return -EINVAL;
489 } 564 }
490 565
491 if (src_in->sa_family == AF_INET) { 566 rcu_read_lock();
492 struct rtable *rt = NULL; 567 if (resolve_by_gid_attr) {
493 const struct sockaddr_in *dst_in4 = 568 if (!addr->sgid_attr) {
494 (const struct sockaddr_in *)dst_in; 569 rcu_read_unlock();
495 570 pr_warn_ratelimited("%s: missing gid_attr\n", __func__);
496 ret = addr4_resolve((struct sockaddr_in *)src_in, 571 return -EINVAL;
497 dst_in4, addr, &rt);
498 if (ret)
499 return ret;
500
501 if (resolve_neigh)
502 ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
503
504 if (addr->bound_dev_if) {
505 ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
506 } else {
507 ndev = rt->dst.dev;
508 dev_hold(ndev);
509 } 572 }
510 573 /*
511 ip_rt_put(rt); 574 * If the request is for a specific gid attribute of the
512 } else { 575 * rdma_dev_addr, derive net from the netdevice of the
513 const struct sockaddr_in6 *dst_in6 = 576 * GID attribute.
514 (const struct sockaddr_in6 *)dst_in; 577 */
515 578 ret = set_addr_netns_by_gid_rcu(addr);
516 ret = addr6_resolve((struct sockaddr_in6 *)src_in, 579 if (ret) {
517 dst_in6, addr, 580 rcu_read_unlock();
518 &dst);
519 if (ret)
520 return ret; 581 return ret;
521
522 if (resolve_neigh)
523 ret = addr_resolve_neigh(dst, dst_in, addr, seq);
524
525 if (addr->bound_dev_if) {
526 ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
527 } else {
528 ndev = dst->dev;
529 dev_hold(ndev);
530 } 582 }
531
532 dst_release(dst);
533 } 583 }
534 584 if (src_in->sa_family == AF_INET) {
535 if (ndev) { 585 ret = addr4_resolve(src_in, dst_in, addr, &rt);
536 if (ndev->flags & IFF_LOOPBACK) 586 dst = &rt->dst;
537 ret = rdma_translate_ip(dst_in, addr); 587 } else {
538 else 588 ret = addr6_resolve(src_in, dst_in, addr, &dst);
539 addr->bound_dev_if = ndev->ifindex;
540 dev_put(ndev);
541 } 589 }
590 if (ret) {
591 rcu_read_unlock();
592 goto done;
593 }
594 ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst);
595 rcu_read_unlock();
596
597 /*
598 * Resolve neighbor destination address if requested and
599 * only if src addr translation didn't fail.
600 */
601 if (!ret && resolve_neigh)
602 ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq);
542 603
604 if (src_in->sa_family == AF_INET)
605 ip_rt_put(rt);
606 else
607 dst_release(dst);
608done:
609 /*
610 * Clear the addr net to go back to its original state, only if it was
611 * derived from GID attribute in this context.
612 */
613 if (resolve_by_gid_attr)
614 rdma_addr_set_net_defaults(addr);
543 return ret; 615 return ret;
544} 616}
545 617
@@ -554,7 +626,8 @@ static void process_one_req(struct work_struct *_work)
554 src_in = (struct sockaddr *)&req->src_addr; 626 src_in = (struct sockaddr *)&req->src_addr;
555 dst_in = (struct sockaddr *)&req->dst_addr; 627 dst_in = (struct sockaddr *)&req->dst_addr;
556 req->status = addr_resolve(src_in, dst_in, req->addr, 628 req->status = addr_resolve(src_in, dst_in, req->addr,
557 true, req->seq); 629 true, req->resolve_by_gid_attr,
630 req->seq);
558 if (req->status && time_after_eq(jiffies, req->timeout)) { 631 if (req->status && time_after_eq(jiffies, req->timeout)) {
559 req->status = -ETIMEDOUT; 632 req->status = -ETIMEDOUT;
560 } else if (req->status == -ENODATA) { 633 } else if (req->status == -ENODATA) {
@@ -586,10 +659,10 @@ static void process_one_req(struct work_struct *_work)
586} 659}
587 660
588int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, 661int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
589 struct rdma_dev_addr *addr, int timeout_ms, 662 struct rdma_dev_addr *addr, unsigned long timeout_ms,
590 void (*callback)(int status, struct sockaddr *src_addr, 663 void (*callback)(int status, struct sockaddr *src_addr,
591 struct rdma_dev_addr *addr, void *context), 664 struct rdma_dev_addr *addr, void *context),
592 void *context) 665 bool resolve_by_gid_attr, void *context)
593{ 666{
594 struct sockaddr *src_in, *dst_in; 667 struct sockaddr *src_in, *dst_in;
595 struct addr_req *req; 668 struct addr_req *req;
@@ -617,10 +690,12 @@ int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
617 req->addr = addr; 690 req->addr = addr;
618 req->callback = callback; 691 req->callback = callback;
619 req->context = context; 692 req->context = context;
693 req->resolve_by_gid_attr = resolve_by_gid_attr;
620 INIT_DELAYED_WORK(&req->work, process_one_req); 694 INIT_DELAYED_WORK(&req->work, process_one_req);
621 req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq); 695 req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
622 696
623 req->status = addr_resolve(src_in, dst_in, addr, true, req->seq); 697 req->status = addr_resolve(src_in, dst_in, addr, true,
698 req->resolve_by_gid_attr, req->seq);
624 switch (req->status) { 699 switch (req->status) {
625 case 0: 700 case 0:
626 req->timeout = jiffies; 701 req->timeout = jiffies;
@@ -641,25 +716,53 @@ err:
641} 716}
642EXPORT_SYMBOL(rdma_resolve_ip); 717EXPORT_SYMBOL(rdma_resolve_ip);
643 718
644int rdma_resolve_ip_route(struct sockaddr *src_addr, 719int roce_resolve_route_from_path(struct sa_path_rec *rec,
645 const struct sockaddr *dst_addr, 720 const struct ib_gid_attr *attr)
646 struct rdma_dev_addr *addr)
647{ 721{
648 struct sockaddr_storage ssrc_addr = {}; 722 union {
649 struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr; 723 struct sockaddr _sockaddr;
724 struct sockaddr_in _sockaddr_in;
725 struct sockaddr_in6 _sockaddr_in6;
726 } sgid, dgid;
727 struct rdma_dev_addr dev_addr = {};
728 int ret;
650 729
651 if (src_addr) { 730 if (rec->roce.route_resolved)
652 if (src_addr->sa_family != dst_addr->sa_family) 731 return 0;
653 return -EINVAL;
654 732
655 memcpy(src_in, src_addr, rdma_addr_size(src_addr)); 733 rdma_gid2ip(&sgid._sockaddr, &rec->sgid);
656 } else { 734 rdma_gid2ip(&dgid._sockaddr, &rec->dgid);
657 src_in->sa_family = dst_addr->sa_family; 735
658 } 736 if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family)
737 return -EINVAL;
738
739 if (!attr || !attr->ndev)
740 return -EINVAL;
741
742 dev_addr.net = &init_net;
743 dev_addr.sgid_attr = attr;
744
745 ret = addr_resolve(&sgid._sockaddr, &dgid._sockaddr,
746 &dev_addr, false, true, 0);
747 if (ret)
748 return ret;
749
750 if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
751 dev_addr.network == RDMA_NETWORK_IPV6) &&
752 rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
753 return -EINVAL;
659 754
660 return addr_resolve(src_in, dst_addr, addr, false, 0); 755 rec->roce.route_resolved = true;
756 return 0;
661} 757}
662 758
759/**
760 * rdma_addr_cancel - Cancel resolve ip request
761 * @addr: Pointer to address structure given previously
762 * during rdma_resolve_ip().
763 * rdma_addr_cancel() is synchronous function which cancels any pending
764 * request if there is any.
765 */
663void rdma_addr_cancel(struct rdma_dev_addr *addr) 766void rdma_addr_cancel(struct rdma_dev_addr *addr)
664{ 767{
665 struct addr_req *req, *temp_req; 768 struct addr_req *req, *temp_req;
@@ -687,11 +790,6 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
687 * guarentees no work is running and none will be started. 790 * guarentees no work is running and none will be started.
688 */ 791 */
689 cancel_delayed_work_sync(&found->work); 792 cancel_delayed_work_sync(&found->work);
690
691 if (found->callback)
692 found->callback(-ECANCELED, (struct sockaddr *)&found->src_addr,
693 found->addr, found->context);
694
695 kfree(found); 793 kfree(found);
696} 794}
697EXPORT_SYMBOL(rdma_addr_cancel); 795EXPORT_SYMBOL(rdma_addr_cancel);
@@ -710,7 +808,7 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
710 808
711int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, 809int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
712 const union ib_gid *dgid, 810 const union ib_gid *dgid,
713 u8 *dmac, const struct net_device *ndev, 811 u8 *dmac, const struct ib_gid_attr *sgid_attr,
714 int *hoplimit) 812 int *hoplimit)
715{ 813{
716 struct rdma_dev_addr dev_addr; 814 struct rdma_dev_addr dev_addr;
@@ -726,12 +824,12 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
726 rdma_gid2ip(&dgid_addr._sockaddr, dgid); 824 rdma_gid2ip(&dgid_addr._sockaddr, dgid);
727 825
728 memset(&dev_addr, 0, sizeof(dev_addr)); 826 memset(&dev_addr, 0, sizeof(dev_addr));
729 dev_addr.bound_dev_if = ndev->ifindex;
730 dev_addr.net = &init_net; 827 dev_addr.net = &init_net;
828 dev_addr.sgid_attr = sgid_attr;
731 829
732 init_completion(&ctx.comp); 830 init_completion(&ctx.comp);
733 ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr, 831 ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr,
734 &dev_addr, 1000, resolve_cb, &ctx); 832 &dev_addr, 1000, resolve_cb, true, &ctx);
735 if (ret) 833 if (ret)
736 return ret; 834 return ret;
737 835
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 3208ad6ad540..5b2fce4a7091 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -212,9 +212,8 @@ static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
212 u8 port_num = entry->attr.port_num; 212 u8 port_num = entry->attr.port_num;
213 struct ib_gid_table *table = rdma_gid_table(device, port_num); 213 struct ib_gid_table *table = rdma_gid_table(device, port_num);
214 214
215 pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, 215 dev_dbg(&device->dev, "%s port=%d index=%d gid %pI6\n", __func__,
216 device->name, port_num, entry->attr.index, 216 port_num, entry->attr.index, entry->attr.gid.raw);
217 entry->attr.gid.raw);
218 217
219 if (rdma_cap_roce_gid_table(device, port_num) && 218 if (rdma_cap_roce_gid_table(device, port_num) &&
220 entry->state != GID_TABLE_ENTRY_INVALID) 219 entry->state != GID_TABLE_ENTRY_INVALID)
@@ -289,9 +288,9 @@ static void store_gid_entry(struct ib_gid_table *table,
289{ 288{
290 entry->state = GID_TABLE_ENTRY_VALID; 289 entry->state = GID_TABLE_ENTRY_VALID;
291 290
292 pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, 291 dev_dbg(&entry->attr.device->dev, "%s port=%d index=%d gid %pI6\n",
293 entry->attr.device->name, entry->attr.port_num, 292 __func__, entry->attr.port_num, entry->attr.index,
294 entry->attr.index, entry->attr.gid.raw); 293 entry->attr.gid.raw);
295 294
296 lockdep_assert_held(&table->lock); 295 lockdep_assert_held(&table->lock);
297 write_lock_irq(&table->rwlock); 296 write_lock_irq(&table->rwlock);
@@ -320,17 +319,16 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
320 int ret; 319 int ret;
321 320
322 if (!attr->ndev) { 321 if (!attr->ndev) {
323 pr_err("%s NULL netdev device=%s port=%d index=%d\n", 322 dev_err(&attr->device->dev, "%s NULL netdev port=%d index=%d\n",
324 __func__, attr->device->name, attr->port_num, 323 __func__, attr->port_num, attr->index);
325 attr->index);
326 return -EINVAL; 324 return -EINVAL;
327 } 325 }
328 if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) { 326 if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
329 ret = attr->device->add_gid(attr, &entry->context); 327 ret = attr->device->add_gid(attr, &entry->context);
330 if (ret) { 328 if (ret) {
331 pr_err("%s GID add failed device=%s port=%d index=%d\n", 329 dev_err(&attr->device->dev,
332 __func__, attr->device->name, attr->port_num, 330 "%s GID add failed port=%d index=%d\n",
333 attr->index); 331 __func__, attr->port_num, attr->index);
334 return ret; 332 return ret;
335 } 333 }
336 } 334 }
@@ -353,9 +351,8 @@ static void del_gid(struct ib_device *ib_dev, u8 port,
353 351
354 lockdep_assert_held(&table->lock); 352 lockdep_assert_held(&table->lock);
355 353
356 pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, 354 dev_dbg(&ib_dev->dev, "%s port=%d index=%d gid %pI6\n", __func__, port,
357 ib_dev->name, port, ix, 355 ix, table->data_vec[ix]->attr.gid.raw);
358 table->data_vec[ix]->attr.gid.raw);
359 356
360 write_lock_irq(&table->rwlock); 357 write_lock_irq(&table->rwlock);
361 entry = table->data_vec[ix]; 358 entry = table->data_vec[ix];
@@ -782,9 +779,9 @@ static void release_gid_table(struct ib_device *device, u8 port,
782 if (is_gid_entry_free(table->data_vec[i])) 779 if (is_gid_entry_free(table->data_vec[i]))
783 continue; 780 continue;
784 if (kref_read(&table->data_vec[i]->kref) > 1) { 781 if (kref_read(&table->data_vec[i]->kref) > 1) {
785 pr_err("GID entry ref leak for %s (index %d) ref=%d\n", 782 dev_err(&device->dev,
786 device->name, i, 783 "GID entry ref leak for index %d ref=%d\n", i,
787 kref_read(&table->data_vec[i]->kref)); 784 kref_read(&table->data_vec[i]->kref));
788 leak = true; 785 leak = true;
789 } 786 }
790 } 787 }
@@ -1252,6 +1249,39 @@ void rdma_hold_gid_attr(const struct ib_gid_attr *attr)
1252} 1249}
1253EXPORT_SYMBOL(rdma_hold_gid_attr); 1250EXPORT_SYMBOL(rdma_hold_gid_attr);
1254 1251
1252/**
1253 * rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice
1254 * which must be in UP state.
1255 *
1256 * @attr:Pointer to the GID attribute
1257 *
1258 * Returns pointer to netdevice if the netdevice was attached to GID and
1259 * netdevice is in UP state. Caller must hold RCU lock as this API
1260 * reads the netdev flags which can change while netdevice migrates to
1261 * different net namespace. Returns ERR_PTR with error code otherwise.
1262 *
1263 */
1264struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
1265{
1266 struct ib_gid_table_entry *entry =
1267 container_of(attr, struct ib_gid_table_entry, attr);
1268 struct ib_device *device = entry->attr.device;
1269 struct net_device *ndev = ERR_PTR(-ENODEV);
1270 u8 port_num = entry->attr.port_num;
1271 struct ib_gid_table *table;
1272 unsigned long flags;
1273 bool valid;
1274
1275 table = rdma_gid_table(device, port_num);
1276
1277 read_lock_irqsave(&table->rwlock, flags);
1278 valid = is_gid_entry_valid(table->data_vec[attr->index]);
1279 if (valid && attr->ndev && (READ_ONCE(attr->ndev->flags) & IFF_UP))
1280 ndev = attr->ndev;
1281 read_unlock_irqrestore(&table->rwlock, flags);
1282 return ndev;
1283}
1284
1255static int config_non_roce_gid_cache(struct ib_device *device, 1285static int config_non_roce_gid_cache(struct ib_device *device,
1256 u8 port, int gid_tbl_len) 1286 u8 port, int gid_tbl_len)
1257{ 1287{
@@ -1270,8 +1300,9 @@ static int config_non_roce_gid_cache(struct ib_device *device,
1270 continue; 1300 continue;
1271 ret = device->query_gid(device, port, i, &gid_attr.gid); 1301 ret = device->query_gid(device, port, i, &gid_attr.gid);
1272 if (ret) { 1302 if (ret) {
1273 pr_warn("query_gid failed (%d) for %s (index %d)\n", 1303 dev_warn(&device->dev,
1274 ret, device->name, i); 1304 "query_gid failed (%d) for index %d\n", ret,
1305 i);
1275 goto err; 1306 goto err;
1276 } 1307 }
1277 gid_attr.index = i; 1308 gid_attr.index = i;
@@ -1300,8 +1331,7 @@ static void ib_cache_update(struct ib_device *device,
1300 1331
1301 ret = ib_query_port(device, port, tprops); 1332 ret = ib_query_port(device, port, tprops);
1302 if (ret) { 1333 if (ret) {
1303 pr_warn("ib_query_port failed (%d) for %s\n", 1334 dev_warn(&device->dev, "ib_query_port failed (%d)\n", ret);
1304 ret, device->name);
1305 goto err; 1335 goto err;
1306 } 1336 }
1307 1337
@@ -1323,8 +1353,9 @@ static void ib_cache_update(struct ib_device *device,
1323 for (i = 0; i < pkey_cache->table_len; ++i) { 1353 for (i = 0; i < pkey_cache->table_len; ++i) {
1324 ret = ib_query_pkey(device, port, i, pkey_cache->table + i); 1354 ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
1325 if (ret) { 1355 if (ret) {
1326 pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n", 1356 dev_warn(&device->dev,
1327 ret, device->name, i); 1357 "ib_query_pkey failed (%d) for index %d\n",
1358 ret, i);
1328 goto err; 1359 goto err;
1329 } 1360 }
1330 } 1361 }
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 6e39c27dca8e..edb2cb758be7 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3292,8 +3292,11 @@ static int cm_lap_handler(struct cm_work *work)
3292 if (ret) 3292 if (ret)
3293 goto unlock; 3293 goto unlock;
3294 3294
3295 cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av, 3295 ret = cm_init_av_by_path(param->alternate_path, NULL,
3296 cm_id_priv); 3296 &cm_id_priv->alt_av, cm_id_priv);
3297 if (ret)
3298 goto unlock;
3299
3297 cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; 3300 cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
3298 cm_id_priv->tid = lap_msg->hdr.tid; 3301 cm_id_priv->tid = lap_msg->hdr.tid;
3299 ret = atomic_inc_and_test(&cm_id_priv->work_count); 3302 ret = atomic_inc_and_test(&cm_id_priv->work_count);
@@ -4367,7 +4370,7 @@ static void cm_add_one(struct ib_device *ib_device)
4367 cm_dev->going_down = 0; 4370 cm_dev->going_down = 0;
4368 cm_dev->device = device_create(&cm_class, &ib_device->dev, 4371 cm_dev->device = device_create(&cm_class, &ib_device->dev,
4369 MKDEV(0, 0), NULL, 4372 MKDEV(0, 0), NULL,
4370 "%s", ib_device->name); 4373 "%s", dev_name(&ib_device->dev));
4371 if (IS_ERR(cm_dev->device)) { 4374 if (IS_ERR(cm_dev->device)) {
4372 kfree(cm_dev); 4375 kfree(cm_dev);
4373 return; 4376 return;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index a36c94930c31..15d5bb7bf6bb 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -639,13 +639,21 @@ static void cma_bind_sgid_attr(struct rdma_id_private *id_priv,
639 id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr; 639 id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr;
640} 640}
641 641
642static int cma_acquire_dev(struct rdma_id_private *id_priv, 642/**
643 const struct rdma_id_private *listen_id_priv) 643 * cma_acquire_dev_by_src_ip - Acquire cma device, port, gid attribute
644 * based on source ip address.
645 * @id_priv: cm_id which should be bound to cma device
646 *
647 * cma_acquire_dev_by_src_ip() binds cm id to cma device, port and GID attribute
648 * based on source IP address. It returns 0 on success or error code otherwise.
649 * It is applicable to active and passive side cm_id.
650 */
651static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv)
644{ 652{
645 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 653 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
646 const struct ib_gid_attr *sgid_attr; 654 const struct ib_gid_attr *sgid_attr;
647 struct cma_device *cma_dev;
648 union ib_gid gid, iboe_gid, *gidp; 655 union ib_gid gid, iboe_gid, *gidp;
656 struct cma_device *cma_dev;
649 enum ib_gid_type gid_type; 657 enum ib_gid_type gid_type;
650 int ret = -ENODEV; 658 int ret = -ENODEV;
651 u8 port; 659 u8 port;
@@ -654,41 +662,125 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
654 id_priv->id.ps == RDMA_PS_IPOIB) 662 id_priv->id.ps == RDMA_PS_IPOIB)
655 return -EINVAL; 663 return -EINVAL;
656 664
657 mutex_lock(&lock);
658 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 665 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
659 &iboe_gid); 666 &iboe_gid);
660 667
661 memcpy(&gid, dev_addr->src_dev_addr + 668 memcpy(&gid, dev_addr->src_dev_addr +
662 rdma_addr_gid_offset(dev_addr), sizeof gid); 669 rdma_addr_gid_offset(dev_addr), sizeof(gid));
663 670
664 if (listen_id_priv) { 671 mutex_lock(&lock);
665 cma_dev = listen_id_priv->cma_dev; 672 list_for_each_entry(cma_dev, &dev_list, list) {
666 port = listen_id_priv->id.port_num; 673 for (port = rdma_start_port(cma_dev->device);
667 gidp = rdma_protocol_roce(cma_dev->device, port) ? 674 port <= rdma_end_port(cma_dev->device); port++) {
668 &iboe_gid : &gid; 675 gidp = rdma_protocol_roce(cma_dev->device, port) ?
669 gid_type = listen_id_priv->gid_type; 676 &iboe_gid : &gid;
670 sgid_attr = cma_validate_port(cma_dev->device, port, 677 gid_type = cma_dev->default_gid_type[port - 1];
671 gid_type, gidp, id_priv); 678 sgid_attr = cma_validate_port(cma_dev->device, port,
672 if (!IS_ERR(sgid_attr)) { 679 gid_type, gidp, id_priv);
673 id_priv->id.port_num = port; 680 if (!IS_ERR(sgid_attr)) {
674 cma_bind_sgid_attr(id_priv, sgid_attr); 681 id_priv->id.port_num = port;
675 ret = 0; 682 cma_bind_sgid_attr(id_priv, sgid_attr);
676 goto out; 683 cma_attach_to_dev(id_priv, cma_dev);
684 ret = 0;
685 goto out;
686 }
677 } 687 }
678 } 688 }
689out:
690 mutex_unlock(&lock);
691 return ret;
692}
693
694/**
695 * cma_ib_acquire_dev - Acquire cma device, port and SGID attribute
696 * @id_priv: cm id to bind to cma device
697 * @listen_id_priv: listener cm id to match against
698 * @req: Pointer to req structure containaining incoming
699 * request information
700 * cma_ib_acquire_dev() acquires cma device, port and SGID attribute when
701 * rdma device matches for listen_id and incoming request. It also verifies
702 * that a GID table entry is present for the source address.
703 * Returns 0 on success, or returns error code otherwise.
704 */
705static int cma_ib_acquire_dev(struct rdma_id_private *id_priv,
706 const struct rdma_id_private *listen_id_priv,
707 struct cma_req_info *req)
708{
709 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
710 const struct ib_gid_attr *sgid_attr;
711 enum ib_gid_type gid_type;
712 union ib_gid gid;
713
714 if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
715 id_priv->id.ps == RDMA_PS_IPOIB)
716 return -EINVAL;
717
718 if (rdma_protocol_roce(req->device, req->port))
719 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
720 &gid);
721 else
722 memcpy(&gid, dev_addr->src_dev_addr +
723 rdma_addr_gid_offset(dev_addr), sizeof(gid));
724
725 gid_type = listen_id_priv->cma_dev->default_gid_type[req->port - 1];
726 sgid_attr = cma_validate_port(req->device, req->port,
727 gid_type, &gid, id_priv);
728 if (IS_ERR(sgid_attr))
729 return PTR_ERR(sgid_attr);
730
731 id_priv->id.port_num = req->port;
732 cma_bind_sgid_attr(id_priv, sgid_attr);
733 /* Need to acquire lock to protect against reader
734 * of cma_dev->id_list such as cma_netdev_callback() and
735 * cma_process_remove().
736 */
737 mutex_lock(&lock);
738 cma_attach_to_dev(id_priv, listen_id_priv->cma_dev);
739 mutex_unlock(&lock);
740 return 0;
741}
742
743static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
744 const struct rdma_id_private *listen_id_priv)
745{
746 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
747 const struct ib_gid_attr *sgid_attr;
748 struct cma_device *cma_dev;
749 enum ib_gid_type gid_type;
750 int ret = -ENODEV;
751 union ib_gid gid;
752 u8 port;
753
754 if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
755 id_priv->id.ps == RDMA_PS_IPOIB)
756 return -EINVAL;
757
758 memcpy(&gid, dev_addr->src_dev_addr +
759 rdma_addr_gid_offset(dev_addr), sizeof(gid));
760
761 mutex_lock(&lock);
762
763 cma_dev = listen_id_priv->cma_dev;
764 port = listen_id_priv->id.port_num;
765 gid_type = listen_id_priv->gid_type;
766 sgid_attr = cma_validate_port(cma_dev->device, port,
767 gid_type, &gid, id_priv);
768 if (!IS_ERR(sgid_attr)) {
769 id_priv->id.port_num = port;
770 cma_bind_sgid_attr(id_priv, sgid_attr);
771 ret = 0;
772 goto out;
773 }
679 774
680 list_for_each_entry(cma_dev, &dev_list, list) { 775 list_for_each_entry(cma_dev, &dev_list, list) {
681 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { 776 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
682 if (listen_id_priv && 777 if (listen_id_priv->cma_dev == cma_dev &&
683 listen_id_priv->cma_dev == cma_dev &&
684 listen_id_priv->id.port_num == port) 778 listen_id_priv->id.port_num == port)
685 continue; 779 continue;
686 780
687 gidp = rdma_protocol_roce(cma_dev->device, port) ?
688 &iboe_gid : &gid;
689 gid_type = cma_dev->default_gid_type[port - 1]; 781 gid_type = cma_dev->default_gid_type[port - 1];
690 sgid_attr = cma_validate_port(cma_dev->device, port, 782 sgid_attr = cma_validate_port(cma_dev->device, port,
691 gid_type, gidp, id_priv); 783 gid_type, &gid, id_priv);
692 if (!IS_ERR(sgid_attr)) { 784 if (!IS_ERR(sgid_attr)) {
693 id_priv->id.port_num = port; 785 id_priv->id.port_num = port;
694 cma_bind_sgid_attr(id_priv, sgid_attr); 786 cma_bind_sgid_attr(id_priv, sgid_attr);
@@ -785,10 +877,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
785 if (!id_priv) 877 if (!id_priv)
786 return ERR_PTR(-ENOMEM); 878 return ERR_PTR(-ENOMEM);
787 879
788 if (caller) 880 rdma_restrack_set_task(&id_priv->res, caller);
789 id_priv->res.kern_name = caller;
790 else
791 rdma_restrack_set_task(&id_priv->res, current);
792 id_priv->res.type = RDMA_RESTRACK_CM_ID; 881 id_priv->res.type = RDMA_RESTRACK_CM_ID;
793 id_priv->state = RDMA_CM_IDLE; 882 id_priv->state = RDMA_CM_IDLE;
794 id_priv->id.context = context; 883 id_priv->id.context = context;
@@ -1462,18 +1551,35 @@ static bool cma_protocol_roce(const struct rdma_cm_id *id)
1462 return rdma_protocol_roce(device, port_num); 1551 return rdma_protocol_roce(device, port_num);
1463} 1552}
1464 1553
1554static bool cma_is_req_ipv6_ll(const struct cma_req_info *req)
1555{
1556 const struct sockaddr *daddr =
1557 (const struct sockaddr *)&req->listen_addr_storage;
1558 const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr;
1559
1560 /* Returns true if the req is for IPv6 link local */
1561 return (daddr->sa_family == AF_INET6 &&
1562 (ipv6_addr_type(&daddr6->sin6_addr) & IPV6_ADDR_LINKLOCAL));
1563}
1564
1465static bool cma_match_net_dev(const struct rdma_cm_id *id, 1565static bool cma_match_net_dev(const struct rdma_cm_id *id,
1466 const struct net_device *net_dev, 1566 const struct net_device *net_dev,
1467 u8 port_num) 1567 const struct cma_req_info *req)
1468{ 1568{
1469 const struct rdma_addr *addr = &id->route.addr; 1569 const struct rdma_addr *addr = &id->route.addr;
1470 1570
1471 if (!net_dev) 1571 if (!net_dev)
1472 /* This request is an AF_IB request */ 1572 /* This request is an AF_IB request */
1473 return (!id->port_num || id->port_num == port_num) && 1573 return (!id->port_num || id->port_num == req->port) &&
1474 (addr->src_addr.ss_family == AF_IB); 1574 (addr->src_addr.ss_family == AF_IB);
1475 1575
1476 /* 1576 /*
1577 * If the request is not for IPv6 link local, allow matching
1578 * request to any netdevice of the one or multiport rdma device.
1579 */
1580 if (!cma_is_req_ipv6_ll(req))
1581 return true;
1582 /*
1477 * Net namespaces must match, and if the listner is listening 1583 * Net namespaces must match, and if the listner is listening
1478 * on a specific netdevice than netdevice must match as well. 1584 * on a specific netdevice than netdevice must match as well.
1479 */ 1585 */
@@ -1500,13 +1606,14 @@ static struct rdma_id_private *cma_find_listener(
1500 hlist_for_each_entry(id_priv, &bind_list->owners, node) { 1606 hlist_for_each_entry(id_priv, &bind_list->owners, node) {
1501 if (cma_match_private_data(id_priv, ib_event->private_data)) { 1607 if (cma_match_private_data(id_priv, ib_event->private_data)) {
1502 if (id_priv->id.device == cm_id->device && 1608 if (id_priv->id.device == cm_id->device &&
1503 cma_match_net_dev(&id_priv->id, net_dev, req->port)) 1609 cma_match_net_dev(&id_priv->id, net_dev, req))
1504 return id_priv; 1610 return id_priv;
1505 list_for_each_entry(id_priv_dev, 1611 list_for_each_entry(id_priv_dev,
1506 &id_priv->listen_list, 1612 &id_priv->listen_list,
1507 listen_list) { 1613 listen_list) {
1508 if (id_priv_dev->id.device == cm_id->device && 1614 if (id_priv_dev->id.device == cm_id->device &&
1509 cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) 1615 cma_match_net_dev(&id_priv_dev->id,
1616 net_dev, req))
1510 return id_priv_dev; 1617 return id_priv_dev;
1511 } 1618 }
1512 } 1619 }
@@ -1518,18 +1625,18 @@ static struct rdma_id_private *cma_find_listener(
1518static struct rdma_id_private * 1625static struct rdma_id_private *
1519cma_ib_id_from_event(struct ib_cm_id *cm_id, 1626cma_ib_id_from_event(struct ib_cm_id *cm_id,
1520 const struct ib_cm_event *ib_event, 1627 const struct ib_cm_event *ib_event,
1628 struct cma_req_info *req,
1521 struct net_device **net_dev) 1629 struct net_device **net_dev)
1522{ 1630{
1523 struct cma_req_info req;
1524 struct rdma_bind_list *bind_list; 1631 struct rdma_bind_list *bind_list;
1525 struct rdma_id_private *id_priv; 1632 struct rdma_id_private *id_priv;
1526 int err; 1633 int err;
1527 1634
1528 err = cma_save_req_info(ib_event, &req); 1635 err = cma_save_req_info(ib_event, req);
1529 if (err) 1636 if (err)
1530 return ERR_PTR(err); 1637 return ERR_PTR(err);
1531 1638
1532 *net_dev = cma_get_net_dev(ib_event, &req); 1639 *net_dev = cma_get_net_dev(ib_event, req);
1533 if (IS_ERR(*net_dev)) { 1640 if (IS_ERR(*net_dev)) {
1534 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { 1641 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
1535 /* Assuming the protocol is AF_IB */ 1642 /* Assuming the protocol is AF_IB */
@@ -1567,17 +1674,17 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
1567 } 1674 }
1568 1675
1569 if (!validate_net_dev(*net_dev, 1676 if (!validate_net_dev(*net_dev,
1570 (struct sockaddr *)&req.listen_addr_storage, 1677 (struct sockaddr *)&req->listen_addr_storage,
1571 (struct sockaddr *)&req.src_addr_storage)) { 1678 (struct sockaddr *)&req->src_addr_storage)) {
1572 id_priv = ERR_PTR(-EHOSTUNREACH); 1679 id_priv = ERR_PTR(-EHOSTUNREACH);
1573 goto err; 1680 goto err;
1574 } 1681 }
1575 } 1682 }
1576 1683
1577 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, 1684 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
1578 rdma_ps_from_service_id(req.service_id), 1685 rdma_ps_from_service_id(req->service_id),
1579 cma_port_from_service_id(req.service_id)); 1686 cma_port_from_service_id(req->service_id));
1580 id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); 1687 id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev);
1581err: 1688err:
1582 rcu_read_unlock(); 1689 rcu_read_unlock();
1583 if (IS_ERR(id_priv) && *net_dev) { 1690 if (IS_ERR(id_priv) && *net_dev) {
@@ -1710,8 +1817,8 @@ void rdma_destroy_id(struct rdma_cm_id *id)
1710 mutex_lock(&id_priv->handler_mutex); 1817 mutex_lock(&id_priv->handler_mutex);
1711 mutex_unlock(&id_priv->handler_mutex); 1818 mutex_unlock(&id_priv->handler_mutex);
1712 1819
1820 rdma_restrack_del(&id_priv->res);
1713 if (id_priv->cma_dev) { 1821 if (id_priv->cma_dev) {
1714 rdma_restrack_del(&id_priv->res);
1715 if (rdma_cap_ib_cm(id_priv->id.device, 1)) { 1822 if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
1716 if (id_priv->cm_id.ib) 1823 if (id_priv->cm_id.ib)
1717 ib_destroy_cm_id(id_priv->cm_id.ib); 1824 ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -1902,7 +2009,7 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
1902 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; 2009 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1903 2010
1904 if (net_dev) { 2011 if (net_dev) {
1905 rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); 2012 rdma_copy_src_l2_addr(&rt->addr.dev_addr, net_dev);
1906 } else { 2013 } else {
1907 if (!cma_protocol_roce(listen_id) && 2014 if (!cma_protocol_roce(listen_id) &&
1908 cma_any_addr(cma_src_addr(id_priv))) { 2015 cma_any_addr(cma_src_addr(id_priv))) {
@@ -1952,7 +2059,7 @@ cma_ib_new_udp_id(const struct rdma_cm_id *listen_id,
1952 goto err; 2059 goto err;
1953 2060
1954 if (net_dev) { 2061 if (net_dev) {
1955 rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); 2062 rdma_copy_src_l2_addr(&id->route.addr.dev_addr, net_dev);
1956 } else { 2063 } else {
1957 if (!cma_any_addr(cma_src_addr(id_priv))) { 2064 if (!cma_any_addr(cma_src_addr(id_priv))) {
1958 ret = cma_translate_addr(cma_src_addr(id_priv), 2065 ret = cma_translate_addr(cma_src_addr(id_priv),
@@ -1999,11 +2106,12 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
1999{ 2106{
2000 struct rdma_id_private *listen_id, *conn_id = NULL; 2107 struct rdma_id_private *listen_id, *conn_id = NULL;
2001 struct rdma_cm_event event = {}; 2108 struct rdma_cm_event event = {};
2109 struct cma_req_info req = {};
2002 struct net_device *net_dev; 2110 struct net_device *net_dev;
2003 u8 offset; 2111 u8 offset;
2004 int ret; 2112 int ret;
2005 2113
2006 listen_id = cma_ib_id_from_event(cm_id, ib_event, &net_dev); 2114 listen_id = cma_ib_id_from_event(cm_id, ib_event, &req, &net_dev);
2007 if (IS_ERR(listen_id)) 2115 if (IS_ERR(listen_id))
2008 return PTR_ERR(listen_id); 2116 return PTR_ERR(listen_id);
2009 2117
@@ -2036,7 +2144,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
2036 } 2144 }
2037 2145
2038 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2146 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
2039 ret = cma_acquire_dev(conn_id, listen_id); 2147 ret = cma_ib_acquire_dev(conn_id, listen_id, &req);
2040 if (ret) 2148 if (ret)
2041 goto err2; 2149 goto err2;
2042 2150
@@ -2232,7 +2340,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
2232 goto out; 2340 goto out;
2233 } 2341 }
2234 2342
2235 ret = cma_acquire_dev(conn_id, listen_id); 2343 ret = cma_iw_acquire_dev(conn_id, listen_id);
2236 if (ret) { 2344 if (ret) {
2237 mutex_unlock(&conn_id->handler_mutex); 2345 mutex_unlock(&conn_id->handler_mutex);
2238 rdma_destroy_id(new_cm_id); 2346 rdma_destroy_id(new_cm_id);
@@ -2354,8 +2462,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
2354 2462
2355 ret = rdma_listen(id, id_priv->backlog); 2463 ret = rdma_listen(id, id_priv->backlog);
2356 if (ret) 2464 if (ret)
2357 pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", 2465 dev_warn(&cma_dev->device->dev,
2358 ret, cma_dev->device->name); 2466 "RDMA CMA: cma_listen_on_dev, error %d\n", ret);
2359} 2467}
2360 2468
2361static void cma_listen_on_all(struct rdma_id_private *id_priv) 2469static void cma_listen_on_all(struct rdma_id_private *id_priv)
@@ -2402,8 +2510,8 @@ static void cma_query_handler(int status, struct sa_path_rec *path_rec,
2402 queue_work(cma_wq, &work->work); 2510 queue_work(cma_wq, &work->work);
2403} 2511}
2404 2512
2405static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, 2513static int cma_query_ib_route(struct rdma_id_private *id_priv,
2406 struct cma_work *work) 2514 unsigned long timeout_ms, struct cma_work *work)
2407{ 2515{
2408 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 2516 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2409 struct sa_path_rec path_rec; 2517 struct sa_path_rec path_rec;
@@ -2521,7 +2629,8 @@ static void cma_init_resolve_addr_work(struct cma_work *work,
2521 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2629 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2522} 2630}
2523 2631
2524static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) 2632static int cma_resolve_ib_route(struct rdma_id_private *id_priv,
2633 unsigned long timeout_ms)
2525{ 2634{
2526 struct rdma_route *route = &id_priv->id.route; 2635 struct rdma_route *route = &id_priv->id.route;
2527 struct cma_work *work; 2636 struct cma_work *work;
@@ -2643,7 +2752,7 @@ err:
2643} 2752}
2644EXPORT_SYMBOL(rdma_set_ib_path); 2753EXPORT_SYMBOL(rdma_set_ib_path);
2645 2754
2646static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) 2755static int cma_resolve_iw_route(struct rdma_id_private *id_priv)
2647{ 2756{
2648 struct cma_work *work; 2757 struct cma_work *work;
2649 2758
@@ -2744,7 +2853,7 @@ err1:
2744 return ret; 2853 return ret;
2745} 2854}
2746 2855
2747int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) 2856int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
2748{ 2857{
2749 struct rdma_id_private *id_priv; 2858 struct rdma_id_private *id_priv;
2750 int ret; 2859 int ret;
@@ -2759,7 +2868,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
2759 else if (rdma_protocol_roce(id->device, id->port_num)) 2868 else if (rdma_protocol_roce(id->device, id->port_num))
2760 ret = cma_resolve_iboe_route(id_priv); 2869 ret = cma_resolve_iboe_route(id_priv);
2761 else if (rdma_protocol_iwarp(id->device, id->port_num)) 2870 else if (rdma_protocol_iwarp(id->device, id->port_num))
2762 ret = cma_resolve_iw_route(id_priv, timeout_ms); 2871 ret = cma_resolve_iw_route(id_priv);
2763 else 2872 else
2764 ret = -ENOSYS; 2873 ret = -ENOSYS;
2765 2874
@@ -2862,7 +2971,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
2862 2971
2863 memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); 2972 memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
2864 if (!status && !id_priv->cma_dev) { 2973 if (!status && !id_priv->cma_dev) {
2865 status = cma_acquire_dev(id_priv, NULL); 2974 status = cma_acquire_dev_by_src_ip(id_priv);
2866 if (status) 2975 if (status)
2867 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n", 2976 pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
2868 status); 2977 status);
@@ -2882,13 +2991,11 @@ static void addr_handler(int status, struct sockaddr *src_addr,
2882 if (id_priv->id.event_handler(&id_priv->id, &event)) { 2991 if (id_priv->id.event_handler(&id_priv->id, &event)) {
2883 cma_exch(id_priv, RDMA_CM_DESTROYING); 2992 cma_exch(id_priv, RDMA_CM_DESTROYING);
2884 mutex_unlock(&id_priv->handler_mutex); 2993 mutex_unlock(&id_priv->handler_mutex);
2885 cma_deref_id(id_priv);
2886 rdma_destroy_id(&id_priv->id); 2994 rdma_destroy_id(&id_priv->id);
2887 return; 2995 return;
2888 } 2996 }
2889out: 2997out:
2890 mutex_unlock(&id_priv->handler_mutex); 2998 mutex_unlock(&id_priv->handler_mutex);
2891 cma_deref_id(id_priv);
2892} 2999}
2893 3000
2894static int cma_resolve_loopback(struct rdma_id_private *id_priv) 3001static int cma_resolve_loopback(struct rdma_id_private *id_priv)
@@ -2966,7 +3073,7 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2966} 3073}
2967 3074
2968int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 3075int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2969 const struct sockaddr *dst_addr, int timeout_ms) 3076 const struct sockaddr *dst_addr, unsigned long timeout_ms)
2970{ 3077{
2971 struct rdma_id_private *id_priv; 3078 struct rdma_id_private *id_priv;
2972 int ret; 3079 int ret;
@@ -2985,16 +3092,16 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2985 return -EINVAL; 3092 return -EINVAL;
2986 3093
2987 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); 3094 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
2988 atomic_inc(&id_priv->refcount);
2989 if (cma_any_addr(dst_addr)) { 3095 if (cma_any_addr(dst_addr)) {
2990 ret = cma_resolve_loopback(id_priv); 3096 ret = cma_resolve_loopback(id_priv);
2991 } else { 3097 } else {
2992 if (dst_addr->sa_family == AF_IB) { 3098 if (dst_addr->sa_family == AF_IB) {
2993 ret = cma_resolve_ib_addr(id_priv); 3099 ret = cma_resolve_ib_addr(id_priv);
2994 } else { 3100 } else {
2995 ret = rdma_resolve_ip(cma_src_addr(id_priv), 3101 ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
2996 dst_addr, &id->route.addr.dev_addr, 3102 &id->route.addr.dev_addr,
2997 timeout_ms, addr_handler, id_priv); 3103 timeout_ms, addr_handler,
3104 false, id_priv);
2998 } 3105 }
2999 } 3106 }
3000 if (ret) 3107 if (ret)
@@ -3003,7 +3110,6 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
3003 return 0; 3110 return 0;
3004err: 3111err:
3005 cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); 3112 cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
3006 cma_deref_id(id_priv);
3007 return ret; 3113 return ret;
3008} 3114}
3009EXPORT_SYMBOL(rdma_resolve_addr); 3115EXPORT_SYMBOL(rdma_resolve_addr);
@@ -3414,7 +3520,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
3414 if (ret) 3520 if (ret)
3415 goto err1; 3521 goto err1;
3416 3522
3417 ret = cma_acquire_dev(id_priv, NULL); 3523 ret = cma_acquire_dev_by_src_ip(id_priv);
3418 if (ret) 3524 if (ret)
3419 goto err1; 3525 goto err1;
3420 } 3526 }
@@ -3439,10 +3545,9 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
3439 3545
3440 return 0; 3546 return 0;
3441err2: 3547err2:
3442 if (id_priv->cma_dev) { 3548 rdma_restrack_del(&id_priv->res);
3443 rdma_restrack_del(&id_priv->res); 3549 if (id_priv->cma_dev)
3444 cma_release_dev(id_priv); 3550 cma_release_dev(id_priv);
3445 }
3446err1: 3551err1:
3447 cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); 3552 cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
3448 return ret; 3553 return ret;
@@ -3839,10 +3944,7 @@ int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
3839 3944
3840 id_priv = container_of(id, struct rdma_id_private, id); 3945 id_priv = container_of(id, struct rdma_id_private, id);
3841 3946
3842 if (caller) 3947 rdma_restrack_set_task(&id_priv->res, caller);
3843 id_priv->res.kern_name = caller;
3844 else
3845 rdma_restrack_set_task(&id_priv->res, current);
3846 3948
3847 if (!cma_comp(id_priv, RDMA_CM_CONNECT)) 3949 if (!cma_comp(id_priv, RDMA_CM_CONNECT))
3848 return -EINVAL; 3950 return -EINVAL;
@@ -4087,9 +4189,10 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
4087 (!ib_sa_sendonly_fullmem_support(&sa_client, 4189 (!ib_sa_sendonly_fullmem_support(&sa_client,
4088 id_priv->id.device, 4190 id_priv->id.device,
4089 id_priv->id.port_num))) { 4191 id_priv->id.port_num))) {
4090 pr_warn("RDMA CM: %s port %u Unable to multicast join\n" 4192 dev_warn(
4091 "RDMA CM: SM doesn't support Send Only Full Member option\n", 4193 &id_priv->id.device->dev,
4092 id_priv->id.device->name, id_priv->id.port_num); 4194 "RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n",
4195 id_priv->id.port_num);
4093 return -EOPNOTSUPP; 4196 return -EOPNOTSUPP;
4094 } 4197 }
4095 4198
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index eee38b40be99..8c2dfb3e294e 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -65,7 +65,7 @@ static struct cma_dev_port_group *to_dev_port_group(struct config_item *item)
65 65
66static bool filter_by_name(struct ib_device *ib_dev, void *cookie) 66static bool filter_by_name(struct ib_device *ib_dev, void *cookie)
67{ 67{
68 return !strcmp(ib_dev->name, cookie); 68 return !strcmp(dev_name(&ib_dev->dev), cookie);
69} 69}
70 70
71static int cma_configfs_params_get(struct config_item *item, 71static int cma_configfs_params_get(struct config_item *item,
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 77c7005c396c..bb9007a0cca7 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -44,7 +44,7 @@
44#include "mad_priv.h" 44#include "mad_priv.h"
45 45
46/* Total number of ports combined across all struct ib_devices's */ 46/* Total number of ports combined across all struct ib_devices's */
47#define RDMA_MAX_PORTS 1024 47#define RDMA_MAX_PORTS 8192
48 48
49struct pkey_index_qp_list { 49struct pkey_index_qp_list {
50 struct list_head pkey_index_list; 50 struct list_head pkey_index_list;
@@ -87,6 +87,7 @@ int ib_device_register_sysfs(struct ib_device *device,
87 int (*port_callback)(struct ib_device *, 87 int (*port_callback)(struct ib_device *,
88 u8, struct kobject *)); 88 u8, struct kobject *));
89void ib_device_unregister_sysfs(struct ib_device *device); 89void ib_device_unregister_sysfs(struct ib_device *device);
90int ib_device_rename(struct ib_device *ibdev, const char *name);
90 91
91typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, 92typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
92 struct net_device *idev, void *cookie); 93 struct net_device *idev, void *cookie);
@@ -338,7 +339,14 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
338 339
339int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, 340int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
340 const union ib_gid *dgid, 341 const union ib_gid *dgid,
341 u8 *dmac, const struct net_device *ndev, 342 u8 *dmac, const struct ib_gid_attr *sgid_attr,
342 int *hoplimit); 343 int *hoplimit);
344void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
345 const struct net_device *dev);
343 346
347struct sa_path_rec;
348int roce_resolve_route_from_path(struct sa_path_rec *rec,
349 const struct ib_gid_attr *attr);
350
351struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr);
344#endif /* _CORE_PRIV_H */ 352#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index af5ad6a56ae4..b1e5365ddafa 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -112,12 +112,12 @@ static void ib_cq_poll_work(struct work_struct *work)
112 IB_POLL_BATCH); 112 IB_POLL_BATCH);
113 if (completed >= IB_POLL_BUDGET_WORKQUEUE || 113 if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
114 ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) 114 ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
115 queue_work(ib_comp_wq, &cq->work); 115 queue_work(cq->comp_wq, &cq->work);
116} 116}
117 117
118static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) 118static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
119{ 119{
120 queue_work(ib_comp_wq, &cq->work); 120 queue_work(cq->comp_wq, &cq->work);
121} 121}
122 122
123/** 123/**
@@ -161,7 +161,7 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
161 goto out_destroy_cq; 161 goto out_destroy_cq;
162 162
163 cq->res.type = RDMA_RESTRACK_CQ; 163 cq->res.type = RDMA_RESTRACK_CQ;
164 cq->res.kern_name = caller; 164 rdma_restrack_set_task(&cq->res, caller);
165 rdma_restrack_add(&cq->res); 165 rdma_restrack_add(&cq->res);
166 166
167 switch (cq->poll_ctx) { 167 switch (cq->poll_ctx) {
@@ -175,9 +175,12 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
175 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 175 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
176 break; 176 break;
177 case IB_POLL_WORKQUEUE: 177 case IB_POLL_WORKQUEUE:
178 case IB_POLL_UNBOUND_WORKQUEUE:
178 cq->comp_handler = ib_cq_completion_workqueue; 179 cq->comp_handler = ib_cq_completion_workqueue;
179 INIT_WORK(&cq->work, ib_cq_poll_work); 180 INIT_WORK(&cq->work, ib_cq_poll_work);
180 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 181 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
182 cq->comp_wq = (cq->poll_ctx == IB_POLL_WORKQUEUE) ?
183 ib_comp_wq : ib_comp_unbound_wq;
181 break; 184 break;
182 default: 185 default:
183 ret = -EINVAL; 186 ret = -EINVAL;
@@ -213,6 +216,7 @@ void ib_free_cq(struct ib_cq *cq)
213 irq_poll_disable(&cq->iop); 216 irq_poll_disable(&cq->iop);
214 break; 217 break;
215 case IB_POLL_WORKQUEUE: 218 case IB_POLL_WORKQUEUE:
219 case IB_POLL_UNBOUND_WORKQUEUE:
216 cancel_work_sync(&cq->work); 220 cancel_work_sync(&cq->work);
217 break; 221 break;
218 default: 222 default:
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index db3b6271f09d..87eb4f2cdd7d 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -61,6 +61,7 @@ struct ib_client_data {
61}; 61};
62 62
63struct workqueue_struct *ib_comp_wq; 63struct workqueue_struct *ib_comp_wq;
64struct workqueue_struct *ib_comp_unbound_wq;
64struct workqueue_struct *ib_wq; 65struct workqueue_struct *ib_wq;
65EXPORT_SYMBOL_GPL(ib_wq); 66EXPORT_SYMBOL_GPL(ib_wq);
66 67
@@ -122,8 +123,9 @@ static int ib_device_check_mandatory(struct ib_device *device)
122 123
123 for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { 124 for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
124 if (!*(void **) ((void *) device + mandatory_table[i].offset)) { 125 if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
125 pr_warn("Device %s is missing mandatory function %s\n", 126 dev_warn(&device->dev,
126 device->name, mandatory_table[i].name); 127 "Device is missing mandatory function %s\n",
128 mandatory_table[i].name);
127 return -EINVAL; 129 return -EINVAL;
128 } 130 }
129 } 131 }
@@ -163,16 +165,40 @@ static struct ib_device *__ib_device_get_by_name(const char *name)
163 struct ib_device *device; 165 struct ib_device *device;
164 166
165 list_for_each_entry(device, &device_list, core_list) 167 list_for_each_entry(device, &device_list, core_list)
166 if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX)) 168 if (!strcmp(name, dev_name(&device->dev)))
167 return device; 169 return device;
168 170
169 return NULL; 171 return NULL;
170} 172}
171 173
172static int alloc_name(char *name) 174int ib_device_rename(struct ib_device *ibdev, const char *name)
175{
176 struct ib_device *device;
177 int ret = 0;
178
179 if (!strcmp(name, dev_name(&ibdev->dev)))
180 return ret;
181
182 mutex_lock(&device_mutex);
183 list_for_each_entry(device, &device_list, core_list) {
184 if (!strcmp(name, dev_name(&device->dev))) {
185 ret = -EEXIST;
186 goto out;
187 }
188 }
189
190 ret = device_rename(&ibdev->dev, name);
191 if (ret)
192 goto out;
193 strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
194out:
195 mutex_unlock(&device_mutex);
196 return ret;
197}
198
199static int alloc_name(struct ib_device *ibdev, const char *name)
173{ 200{
174 unsigned long *inuse; 201 unsigned long *inuse;
175 char buf[IB_DEVICE_NAME_MAX];
176 struct ib_device *device; 202 struct ib_device *device;
177 int i; 203 int i;
178 204
@@ -181,24 +207,21 @@ static int alloc_name(char *name)
181 return -ENOMEM; 207 return -ENOMEM;
182 208
183 list_for_each_entry(device, &device_list, core_list) { 209 list_for_each_entry(device, &device_list, core_list) {
184 if (!sscanf(device->name, name, &i)) 210 char buf[IB_DEVICE_NAME_MAX];
211
212 if (sscanf(dev_name(&device->dev), name, &i) != 1)
185 continue; 213 continue;
186 if (i < 0 || i >= PAGE_SIZE * 8) 214 if (i < 0 || i >= PAGE_SIZE * 8)
187 continue; 215 continue;
188 snprintf(buf, sizeof buf, name, i); 216 snprintf(buf, sizeof buf, name, i);
189 if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX)) 217 if (!strcmp(buf, dev_name(&device->dev)))
190 set_bit(i, inuse); 218 set_bit(i, inuse);
191 } 219 }
192 220
193 i = find_first_zero_bit(inuse, PAGE_SIZE * 8); 221 i = find_first_zero_bit(inuse, PAGE_SIZE * 8);
194 free_page((unsigned long) inuse); 222 free_page((unsigned long) inuse);
195 snprintf(buf, sizeof buf, name, i);
196 223
197 if (__ib_device_get_by_name(buf)) 224 return dev_set_name(&ibdev->dev, name, i);
198 return -ENFILE;
199
200 strlcpy(name, buf, IB_DEVICE_NAME_MAX);
201 return 0;
202} 225}
203 226
204static void ib_device_release(struct device *device) 227static void ib_device_release(struct device *device)
@@ -221,9 +244,7 @@ static void ib_device_release(struct device *device)
221static int ib_device_uevent(struct device *device, 244static int ib_device_uevent(struct device *device,
222 struct kobj_uevent_env *env) 245 struct kobj_uevent_env *env)
223{ 246{
224 struct ib_device *dev = container_of(device, struct ib_device, dev); 247 if (add_uevent_var(env, "NAME=%s", dev_name(device)))
225
226 if (add_uevent_var(env, "NAME=%s", dev->name))
227 return -ENOMEM; 248 return -ENOMEM;
228 249
229 /* 250 /*
@@ -269,7 +290,7 @@ struct ib_device *ib_alloc_device(size_t size)
269 290
270 INIT_LIST_HEAD(&device->event_handler_list); 291 INIT_LIST_HEAD(&device->event_handler_list);
271 spin_lock_init(&device->event_handler_lock); 292 spin_lock_init(&device->event_handler_lock);
272 spin_lock_init(&device->client_data_lock); 293 rwlock_init(&device->client_data_lock);
273 INIT_LIST_HEAD(&device->client_data_list); 294 INIT_LIST_HEAD(&device->client_data_list);
274 INIT_LIST_HEAD(&device->port_list); 295 INIT_LIST_HEAD(&device->port_list);
275 296
@@ -285,6 +306,7 @@ EXPORT_SYMBOL(ib_alloc_device);
285 */ 306 */
286void ib_dealloc_device(struct ib_device *device) 307void ib_dealloc_device(struct ib_device *device)
287{ 308{
309 WARN_ON(!list_empty(&device->client_data_list));
288 WARN_ON(device->reg_state != IB_DEV_UNREGISTERED && 310 WARN_ON(device->reg_state != IB_DEV_UNREGISTERED &&
289 device->reg_state != IB_DEV_UNINITIALIZED); 311 device->reg_state != IB_DEV_UNINITIALIZED);
290 rdma_restrack_clean(&device->res); 312 rdma_restrack_clean(&device->res);
@@ -295,9 +317,8 @@ EXPORT_SYMBOL(ib_dealloc_device);
295static int add_client_context(struct ib_device *device, struct ib_client *client) 317static int add_client_context(struct ib_device *device, struct ib_client *client)
296{ 318{
297 struct ib_client_data *context; 319 struct ib_client_data *context;
298 unsigned long flags;
299 320
300 context = kmalloc(sizeof *context, GFP_KERNEL); 321 context = kmalloc(sizeof(*context), GFP_KERNEL);
301 if (!context) 322 if (!context)
302 return -ENOMEM; 323 return -ENOMEM;
303 324
@@ -306,9 +327,9 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
306 context->going_down = false; 327 context->going_down = false;
307 328
308 down_write(&lists_rwsem); 329 down_write(&lists_rwsem);
309 spin_lock_irqsave(&device->client_data_lock, flags); 330 write_lock_irq(&device->client_data_lock);
310 list_add(&context->list, &device->client_data_list); 331 list_add(&context->list, &device->client_data_list);
311 spin_unlock_irqrestore(&device->client_data_lock, flags); 332 write_unlock_irq(&device->client_data_lock);
312 up_write(&lists_rwsem); 333 up_write(&lists_rwsem);
313 334
314 return 0; 335 return 0;
@@ -444,22 +465,8 @@ static u32 __dev_new_index(void)
444 } 465 }
445} 466}
446 467
447/** 468static void setup_dma_device(struct ib_device *device)
448 * ib_register_device - Register an IB device with IB core
449 * @device:Device to register
450 *
451 * Low-level drivers use ib_register_device() to register their
452 * devices with the IB core. All registered clients will receive a
453 * callback for each device that is added. @device must be allocated
454 * with ib_alloc_device().
455 */
456int ib_register_device(struct ib_device *device,
457 int (*port_callback)(struct ib_device *,
458 u8, struct kobject *))
459{ 469{
460 int ret;
461 struct ib_client *client;
462 struct ib_udata uhw = {.outlen = 0, .inlen = 0};
463 struct device *parent = device->dev.parent; 470 struct device *parent = device->dev.parent;
464 471
465 WARN_ON_ONCE(device->dma_device); 472 WARN_ON_ONCE(device->dma_device);
@@ -491,56 +498,113 @@ int ib_register_device(struct ib_device *device,
491 WARN_ON_ONCE(!parent); 498 WARN_ON_ONCE(!parent);
492 device->dma_device = parent; 499 device->dma_device = parent;
493 } 500 }
501}
494 502
495 mutex_lock(&device_mutex); 503static void cleanup_device(struct ib_device *device)
504{
505 ib_cache_cleanup_one(device);
506 ib_cache_release_one(device);
507 kfree(device->port_pkey_list);
508 kfree(device->port_immutable);
509}
496 510
497 if (strchr(device->name, '%')) { 511static int setup_device(struct ib_device *device)
498 ret = alloc_name(device->name); 512{
499 if (ret) 513 struct ib_udata uhw = {.outlen = 0, .inlen = 0};
500 goto out; 514 int ret;
501 }
502 515
503 if (ib_device_check_mandatory(device)) { 516 ret = ib_device_check_mandatory(device);
504 ret = -EINVAL; 517 if (ret)
505 goto out; 518 return ret;
506 }
507 519
508 ret = read_port_immutable(device); 520 ret = read_port_immutable(device);
509 if (ret) { 521 if (ret) {
510 pr_warn("Couldn't create per port immutable data %s\n", 522 dev_warn(&device->dev,
511 device->name); 523 "Couldn't create per port immutable data\n");
512 goto out; 524 return ret;
513 } 525 }
514 526
515 ret = setup_port_pkey_list(device); 527 memset(&device->attrs, 0, sizeof(device->attrs));
528 ret = device->query_device(device, &device->attrs, &uhw);
516 if (ret) { 529 if (ret) {
517 pr_warn("Couldn't create per port_pkey_list\n"); 530 dev_warn(&device->dev,
518 goto out; 531 "Couldn't query the device attributes\n");
532 goto port_cleanup;
519 } 533 }
520 534
521 ret = ib_cache_setup_one(device); 535 ret = setup_port_pkey_list(device);
522 if (ret) { 536 if (ret) {
523 pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n"); 537 dev_warn(&device->dev, "Couldn't create per port_pkey_list\n");
524 goto port_cleanup; 538 goto port_cleanup;
525 } 539 }
526 540
527 ret = ib_device_register_rdmacg(device); 541 ret = ib_cache_setup_one(device);
528 if (ret) { 542 if (ret) {
529 pr_warn("Couldn't register device with rdma cgroup\n"); 543 dev_warn(&device->dev,
530 goto cache_cleanup; 544 "Couldn't set up InfiniBand P_Key/GID cache\n");
545 goto pkey_cleanup;
546 }
547 return 0;
548
549pkey_cleanup:
550 kfree(device->port_pkey_list);
551port_cleanup:
552 kfree(device->port_immutable);
553 return ret;
554}
555
556/**
557 * ib_register_device - Register an IB device with IB core
558 * @device:Device to register
559 *
560 * Low-level drivers use ib_register_device() to register their
561 * devices with the IB core. All registered clients will receive a
562 * callback for each device that is added. @device must be allocated
563 * with ib_alloc_device().
564 */
565int ib_register_device(struct ib_device *device, const char *name,
566 int (*port_callback)(struct ib_device *, u8,
567 struct kobject *))
568{
569 int ret;
570 struct ib_client *client;
571
572 setup_dma_device(device);
573
574 mutex_lock(&device_mutex);
575
576 if (strchr(name, '%')) {
577 ret = alloc_name(device, name);
578 if (ret)
579 goto out;
580 } else {
581 ret = dev_set_name(&device->dev, name);
582 if (ret)
583 goto out;
584 }
585 if (__ib_device_get_by_name(dev_name(&device->dev))) {
586 ret = -ENFILE;
587 goto out;
531 } 588 }
589 strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
532 590
533 memset(&device->attrs, 0, sizeof(device->attrs)); 591 ret = setup_device(device);
534 ret = device->query_device(device, &device->attrs, &uhw); 592 if (ret)
593 goto out;
594
595 device->index = __dev_new_index();
596
597 ret = ib_device_register_rdmacg(device);
535 if (ret) { 598 if (ret) {
536 pr_warn("Couldn't query the device attributes\n"); 599 dev_warn(&device->dev,
537 goto cg_cleanup; 600 "Couldn't register device with rdma cgroup\n");
601 goto dev_cleanup;
538 } 602 }
539 603
540 ret = ib_device_register_sysfs(device, port_callback); 604 ret = ib_device_register_sysfs(device, port_callback);
541 if (ret) { 605 if (ret) {
542 pr_warn("Couldn't register device %s with driver model\n", 606 dev_warn(&device->dev,
543 device->name); 607 "Couldn't register device with driver model\n");
544 goto cg_cleanup; 608 goto cg_cleanup;
545 } 609 }
546 610
@@ -550,7 +614,6 @@ int ib_register_device(struct ib_device *device,
550 if (!add_client_context(device, client) && client->add) 614 if (!add_client_context(device, client) && client->add)
551 client->add(device); 615 client->add(device);
552 616
553 device->index = __dev_new_index();
554 down_write(&lists_rwsem); 617 down_write(&lists_rwsem);
555 list_add_tail(&device->core_list, &device_list); 618 list_add_tail(&device->core_list, &device_list);
556 up_write(&lists_rwsem); 619 up_write(&lists_rwsem);
@@ -559,11 +622,8 @@ int ib_register_device(struct ib_device *device,
559 622
560cg_cleanup: 623cg_cleanup:
561 ib_device_unregister_rdmacg(device); 624 ib_device_unregister_rdmacg(device);
562cache_cleanup: 625dev_cleanup:
563 ib_cache_cleanup_one(device); 626 cleanup_device(device);
564 ib_cache_release_one(device);
565port_cleanup:
566 kfree(device->port_immutable);
567out: 627out:
568 mutex_unlock(&device_mutex); 628 mutex_unlock(&device_mutex);
569 return ret; 629 return ret;
@@ -585,21 +645,20 @@ void ib_unregister_device(struct ib_device *device)
585 645
586 down_write(&lists_rwsem); 646 down_write(&lists_rwsem);
587 list_del(&device->core_list); 647 list_del(&device->core_list);
588 spin_lock_irqsave(&device->client_data_lock, flags); 648 write_lock_irq(&device->client_data_lock);
589 list_for_each_entry_safe(context, tmp, &device->client_data_list, list) 649 list_for_each_entry(context, &device->client_data_list, list)
590 context->going_down = true; 650 context->going_down = true;
591 spin_unlock_irqrestore(&device->client_data_lock, flags); 651 write_unlock_irq(&device->client_data_lock);
592 downgrade_write(&lists_rwsem); 652 downgrade_write(&lists_rwsem);
593 653
594 list_for_each_entry_safe(context, tmp, &device->client_data_list, 654 list_for_each_entry(context, &device->client_data_list, list) {
595 list) {
596 if (context->client->remove) 655 if (context->client->remove)
597 context->client->remove(device, context->data); 656 context->client->remove(device, context->data);
598 } 657 }
599 up_read(&lists_rwsem); 658 up_read(&lists_rwsem);
600 659
601 ib_device_unregister_rdmacg(device);
602 ib_device_unregister_sysfs(device); 660 ib_device_unregister_sysfs(device);
661 ib_device_unregister_rdmacg(device);
603 662
604 mutex_unlock(&device_mutex); 663 mutex_unlock(&device_mutex);
605 664
@@ -609,10 +668,13 @@ void ib_unregister_device(struct ib_device *device)
609 kfree(device->port_pkey_list); 668 kfree(device->port_pkey_list);
610 669
611 down_write(&lists_rwsem); 670 down_write(&lists_rwsem);
612 spin_lock_irqsave(&device->client_data_lock, flags); 671 write_lock_irqsave(&device->client_data_lock, flags);
613 list_for_each_entry_safe(context, tmp, &device->client_data_list, list) 672 list_for_each_entry_safe(context, tmp, &device->client_data_list,
673 list) {
674 list_del(&context->list);
614 kfree(context); 675 kfree(context);
615 spin_unlock_irqrestore(&device->client_data_lock, flags); 676 }
677 write_unlock_irqrestore(&device->client_data_lock, flags);
616 up_write(&lists_rwsem); 678 up_write(&lists_rwsem);
617 679
618 device->reg_state = IB_DEV_UNREGISTERED; 680 device->reg_state = IB_DEV_UNREGISTERED;
@@ -662,9 +724,8 @@ EXPORT_SYMBOL(ib_register_client);
662 */ 724 */
663void ib_unregister_client(struct ib_client *client) 725void ib_unregister_client(struct ib_client *client)
664{ 726{
665 struct ib_client_data *context, *tmp; 727 struct ib_client_data *context;
666 struct ib_device *device; 728 struct ib_device *device;
667 unsigned long flags;
668 729
669 mutex_lock(&device_mutex); 730 mutex_lock(&device_mutex);
670 731
@@ -676,14 +737,14 @@ void ib_unregister_client(struct ib_client *client)
676 struct ib_client_data *found_context = NULL; 737 struct ib_client_data *found_context = NULL;
677 738
678 down_write(&lists_rwsem); 739 down_write(&lists_rwsem);
679 spin_lock_irqsave(&device->client_data_lock, flags); 740 write_lock_irq(&device->client_data_lock);
680 list_for_each_entry_safe(context, tmp, &device->client_data_list, list) 741 list_for_each_entry(context, &device->client_data_list, list)
681 if (context->client == client) { 742 if (context->client == client) {
682 context->going_down = true; 743 context->going_down = true;
683 found_context = context; 744 found_context = context;
684 break; 745 break;
685 } 746 }
686 spin_unlock_irqrestore(&device->client_data_lock, flags); 747 write_unlock_irq(&device->client_data_lock);
687 up_write(&lists_rwsem); 748 up_write(&lists_rwsem);
688 749
689 if (client->remove) 750 if (client->remove)
@@ -691,17 +752,18 @@ void ib_unregister_client(struct ib_client *client)
691 found_context->data : NULL); 752 found_context->data : NULL);
692 753
693 if (!found_context) { 754 if (!found_context) {
694 pr_warn("No client context found for %s/%s\n", 755 dev_warn(&device->dev,
695 device->name, client->name); 756 "No client context found for %s\n",
757 client->name);
696 continue; 758 continue;
697 } 759 }
698 760
699 down_write(&lists_rwsem); 761 down_write(&lists_rwsem);
700 spin_lock_irqsave(&device->client_data_lock, flags); 762 write_lock_irq(&device->client_data_lock);
701 list_del(&found_context->list); 763 list_del(&found_context->list);
702 kfree(found_context); 764 write_unlock_irq(&device->client_data_lock);
703 spin_unlock_irqrestore(&device->client_data_lock, flags);
704 up_write(&lists_rwsem); 765 up_write(&lists_rwsem);
766 kfree(found_context);
705 } 767 }
706 768
707 mutex_unlock(&device_mutex); 769 mutex_unlock(&device_mutex);
@@ -722,13 +784,13 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
722 void *ret = NULL; 784 void *ret = NULL;
723 unsigned long flags; 785 unsigned long flags;
724 786
725 spin_lock_irqsave(&device->client_data_lock, flags); 787 read_lock_irqsave(&device->client_data_lock, flags);
726 list_for_each_entry(context, &device->client_data_list, list) 788 list_for_each_entry(context, &device->client_data_list, list)
727 if (context->client == client) { 789 if (context->client == client) {
728 ret = context->data; 790 ret = context->data;
729 break; 791 break;
730 } 792 }
731 spin_unlock_irqrestore(&device->client_data_lock, flags); 793 read_unlock_irqrestore(&device->client_data_lock, flags);
732 794
733 return ret; 795 return ret;
734} 796}
@@ -749,18 +811,18 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client,
749 struct ib_client_data *context; 811 struct ib_client_data *context;
750 unsigned long flags; 812 unsigned long flags;
751 813
752 spin_lock_irqsave(&device->client_data_lock, flags); 814 write_lock_irqsave(&device->client_data_lock, flags);
753 list_for_each_entry(context, &device->client_data_list, list) 815 list_for_each_entry(context, &device->client_data_list, list)
754 if (context->client == client) { 816 if (context->client == client) {
755 context->data = data; 817 context->data = data;
756 goto out; 818 goto out;
757 } 819 }
758 820
759 pr_warn("No client context found for %s/%s\n", 821 dev_warn(&device->dev, "No client context found for %s\n",
760 device->name, client->name); 822 client->name);
761 823
762out: 824out:
763 spin_unlock_irqrestore(&device->client_data_lock, flags); 825 write_unlock_irqrestore(&device->client_data_lock, flags);
764} 826}
765EXPORT_SYMBOL(ib_set_client_data); 827EXPORT_SYMBOL(ib_set_client_data);
766 828
@@ -1166,10 +1228,19 @@ static int __init ib_core_init(void)
1166 goto err; 1228 goto err;
1167 } 1229 }
1168 1230
1231 ib_comp_unbound_wq =
1232 alloc_workqueue("ib-comp-unb-wq",
1233 WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
1234 WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
1235 if (!ib_comp_unbound_wq) {
1236 ret = -ENOMEM;
1237 goto err_comp;
1238 }
1239
1169 ret = class_register(&ib_class); 1240 ret = class_register(&ib_class);
1170 if (ret) { 1241 if (ret) {
1171 pr_warn("Couldn't create InfiniBand device class\n"); 1242 pr_warn("Couldn't create InfiniBand device class\n");
1172 goto err_comp; 1243 goto err_comp_unbound;
1173 } 1244 }
1174 1245
1175 ret = rdma_nl_init(); 1246 ret = rdma_nl_init();
@@ -1218,6 +1289,8 @@ err_ibnl:
1218 rdma_nl_exit(); 1289 rdma_nl_exit();
1219err_sysfs: 1290err_sysfs:
1220 class_unregister(&ib_class); 1291 class_unregister(&ib_class);
1292err_comp_unbound:
1293 destroy_workqueue(ib_comp_unbound_wq);
1221err_comp: 1294err_comp:
1222 destroy_workqueue(ib_comp_wq); 1295 destroy_workqueue(ib_comp_wq);
1223err: 1296err:
@@ -1236,6 +1309,7 @@ static void __exit ib_core_cleanup(void)
1236 addr_cleanup(); 1309 addr_cleanup();
1237 rdma_nl_exit(); 1310 rdma_nl_exit();
1238 class_unregister(&ib_class); 1311 class_unregister(&ib_class);
1312 destroy_workqueue(ib_comp_unbound_wq);
1239 destroy_workqueue(ib_comp_wq); 1313 destroy_workqueue(ib_comp_wq);
1240 /* Make sure that any pending umem accounting work is done. */ 1314 /* Make sure that any pending umem accounting work is done. */
1241 destroy_workqueue(ib_wq); 1315 destroy_workqueue(ib_wq);
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index a077500f7f32..83ba0068e8bb 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -213,7 +213,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
213 device = pd->device; 213 device = pd->device;
214 if (!device->alloc_fmr || !device->dealloc_fmr || 214 if (!device->alloc_fmr || !device->dealloc_fmr ||
215 !device->map_phys_fmr || !device->unmap_fmr) { 215 !device->map_phys_fmr || !device->unmap_fmr) {
216 pr_info(PFX "Device %s does not support FMRs\n", device->name); 216 dev_info(&device->dev, "Device does not support FMRs\n");
217 return ERR_PTR(-ENOSYS); 217 return ERR_PTR(-ENOSYS);
218 } 218 }
219 219
@@ -257,7 +257,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
257 atomic_set(&pool->flush_ser, 0); 257 atomic_set(&pool->flush_ser, 0);
258 init_waitqueue_head(&pool->force_wait); 258 init_waitqueue_head(&pool->force_wait);
259 259
260 pool->worker = kthread_create_worker(0, "ib_fmr(%s)", device->name); 260 pool->worker =
261 kthread_create_worker(0, "ib_fmr(%s)", dev_name(&device->dev));
261 if (IS_ERR(pool->worker)) { 262 if (IS_ERR(pool->worker)) {
262 pr_warn(PFX "couldn't start cleanup kthread worker\n"); 263 pr_warn(PFX "couldn't start cleanup kthread worker\n");
263 ret = PTR_ERR(pool->worker); 264 ret = PTR_ERR(pool->worker);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 5d676cff41f4..ba668d49c751 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -509,7 +509,7 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
509 cm_id->m_local_addr = cm_id->local_addr; 509 cm_id->m_local_addr = cm_id->local_addr;
510 cm_id->m_remote_addr = cm_id->remote_addr; 510 cm_id->m_remote_addr = cm_id->remote_addr;
511 511
512 memcpy(pm_reg_msg.dev_name, cm_id->device->name, 512 memcpy(pm_reg_msg.dev_name, dev_name(&cm_id->device->dev),
513 sizeof(pm_reg_msg.dev_name)); 513 sizeof(pm_reg_msg.dev_name));
514 memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname, 514 memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname,
515 sizeof(pm_reg_msg.if_name)); 515 sizeof(pm_reg_msg.if_name));
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index ef459f2f2eeb..d7025cd5be28 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -220,33 +220,37 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
220 int ret2, qpn; 220 int ret2, qpn;
221 u8 mgmt_class, vclass; 221 u8 mgmt_class, vclass;
222 222
223 if ((qp_type == IB_QPT_SMI && !rdma_cap_ib_smi(device, port_num)) ||
224 (qp_type == IB_QPT_GSI && !rdma_cap_ib_cm(device, port_num)))
225 return ERR_PTR(-EPROTONOSUPPORT);
226
223 /* Validate parameters */ 227 /* Validate parameters */
224 qpn = get_spl_qp_index(qp_type); 228 qpn = get_spl_qp_index(qp_type);
225 if (qpn == -1) { 229 if (qpn == -1) {
226 dev_notice(&device->dev, 230 dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n",
227 "ib_register_mad_agent: invalid QP Type %d\n", 231 __func__, qp_type);
228 qp_type);
229 goto error1; 232 goto error1;
230 } 233 }
231 234
232 if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) { 235 if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
233 dev_notice(&device->dev, 236 dev_dbg_ratelimited(&device->dev,
234 "ib_register_mad_agent: invalid RMPP Version %u\n", 237 "%s: invalid RMPP Version %u\n",
235 rmpp_version); 238 __func__, rmpp_version);
236 goto error1; 239 goto error1;
237 } 240 }
238 241
239 /* Validate MAD registration request if supplied */ 242 /* Validate MAD registration request if supplied */
240 if (mad_reg_req) { 243 if (mad_reg_req) {
241 if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) { 244 if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
242 dev_notice(&device->dev, 245 dev_dbg_ratelimited(&device->dev,
243 "ib_register_mad_agent: invalid Class Version %u\n", 246 "%s: invalid Class Version %u\n",
244 mad_reg_req->mgmt_class_version); 247 __func__,
248 mad_reg_req->mgmt_class_version);
245 goto error1; 249 goto error1;
246 } 250 }
247 if (!recv_handler) { 251 if (!recv_handler) {
248 dev_notice(&device->dev, 252 dev_dbg_ratelimited(&device->dev,
249 "ib_register_mad_agent: no recv_handler\n"); 253 "%s: no recv_handler\n", __func__);
250 goto error1; 254 goto error1;
251 } 255 }
252 if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) { 256 if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
@@ -256,9 +260,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
256 */ 260 */
257 if (mad_reg_req->mgmt_class != 261 if (mad_reg_req->mgmt_class !=
258 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { 262 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
259 dev_notice(&device->dev, 263 dev_dbg_ratelimited(&device->dev,
260 "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n", 264 "%s: Invalid Mgmt Class 0x%x\n",
261 mad_reg_req->mgmt_class); 265 __func__, mad_reg_req->mgmt_class);
262 goto error1; 266 goto error1;
263 } 267 }
264 } else if (mad_reg_req->mgmt_class == 0) { 268 } else if (mad_reg_req->mgmt_class == 0) {
@@ -266,8 +270,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
266 * Class 0 is reserved in IBA and is used for 270 * Class 0 is reserved in IBA and is used for
267 * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 271 * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
268 */ 272 */
269 dev_notice(&device->dev, 273 dev_dbg_ratelimited(&device->dev,
270 "ib_register_mad_agent: Invalid Mgmt Class 0\n"); 274 "%s: Invalid Mgmt Class 0\n",
275 __func__);
271 goto error1; 276 goto error1;
272 } else if (is_vendor_class(mad_reg_req->mgmt_class)) { 277 } else if (is_vendor_class(mad_reg_req->mgmt_class)) {
273 /* 278 /*
@@ -275,18 +280,19 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
275 * ensure supplied OUI is not zero 280 * ensure supplied OUI is not zero
276 */ 281 */
277 if (!is_vendor_oui(mad_reg_req->oui)) { 282 if (!is_vendor_oui(mad_reg_req->oui)) {
278 dev_notice(&device->dev, 283 dev_dbg_ratelimited(&device->dev,
279 "ib_register_mad_agent: No OUI specified for class 0x%x\n", 284 "%s: No OUI specified for class 0x%x\n",
280 mad_reg_req->mgmt_class); 285 __func__,
286 mad_reg_req->mgmt_class);
281 goto error1; 287 goto error1;
282 } 288 }
283 } 289 }
284 /* Make sure class supplied is consistent with RMPP */ 290 /* Make sure class supplied is consistent with RMPP */
285 if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { 291 if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
286 if (rmpp_version) { 292 if (rmpp_version) {
287 dev_notice(&device->dev, 293 dev_dbg_ratelimited(&device->dev,
288 "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n", 294 "%s: RMPP version for non-RMPP class 0x%x\n",
289 mad_reg_req->mgmt_class); 295 __func__, mad_reg_req->mgmt_class);
290 goto error1; 296 goto error1;
291 } 297 }
292 } 298 }
@@ -297,9 +303,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
297 IB_MGMT_CLASS_SUBN_LID_ROUTED) && 303 IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
298 (mad_reg_req->mgmt_class != 304 (mad_reg_req->mgmt_class !=
299 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { 305 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
300 dev_notice(&device->dev, 306 dev_dbg_ratelimited(&device->dev,
301 "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n", 307 "%s: Invalid SM QP type: class 0x%x\n",
302 mad_reg_req->mgmt_class); 308 __func__, mad_reg_req->mgmt_class);
303 goto error1; 309 goto error1;
304 } 310 }
305 } else { 311 } else {
@@ -307,9 +313,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
307 IB_MGMT_CLASS_SUBN_LID_ROUTED) || 313 IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
308 (mad_reg_req->mgmt_class == 314 (mad_reg_req->mgmt_class ==
309 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { 315 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
310 dev_notice(&device->dev, 316 dev_dbg_ratelimited(&device->dev,
311 "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n", 317 "%s: Invalid GS QP type: class 0x%x\n",
312 mad_reg_req->mgmt_class); 318 __func__, mad_reg_req->mgmt_class);
313 goto error1; 319 goto error1;
314 } 320 }
315 } 321 }
@@ -324,18 +330,18 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
324 /* Validate device and port */ 330 /* Validate device and port */
325 port_priv = ib_get_mad_port(device, port_num); 331 port_priv = ib_get_mad_port(device, port_num);
326 if (!port_priv) { 332 if (!port_priv) {
327 dev_notice(&device->dev, 333 dev_dbg_ratelimited(&device->dev, "%s: Invalid port %d\n",
328 "ib_register_mad_agent: Invalid port %d\n", 334 __func__, port_num);
329 port_num);
330 ret = ERR_PTR(-ENODEV); 335 ret = ERR_PTR(-ENODEV);
331 goto error1; 336 goto error1;
332 } 337 }
333 338
334 /* Verify the QP requested is supported. For example, Ethernet devices 339 /* Verify the QP requested is supported. For example, Ethernet devices
335 * will not have QP0 */ 340 * will not have QP0.
341 */
336 if (!port_priv->qp_info[qpn].qp) { 342 if (!port_priv->qp_info[qpn].qp) {
337 dev_notice(&device->dev, 343 dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n",
338 "ib_register_mad_agent: QP %d not supported\n", qpn); 344 __func__, qpn);
339 ret = ERR_PTR(-EPROTONOSUPPORT); 345 ret = ERR_PTR(-EPROTONOSUPPORT);
340 goto error1; 346 goto error1;
341 } 347 }
@@ -2408,7 +2414,7 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
2408} 2414}
2409 2415
2410void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, 2416void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
2411 int timeout_ms) 2417 unsigned long timeout_ms)
2412{ 2418{
2413 mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); 2419 mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2414 wait_for_response(mad_send_wr); 2420 wait_for_response(mad_send_wr);
@@ -3183,7 +3189,7 @@ static int ib_mad_port_open(struct ib_device *device,
3183 cq_size *= 2; 3189 cq_size *= 2;
3184 3190
3185 port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0, 3191 port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
3186 IB_POLL_WORKQUEUE); 3192 IB_POLL_UNBOUND_WORKQUEUE);
3187 if (IS_ERR(port_priv->cq)) { 3193 if (IS_ERR(port_priv->cq)) {
3188 dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); 3194 dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
3189 ret = PTR_ERR(port_priv->cq); 3195 ret = PTR_ERR(port_priv->cq);
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index d84ae1671898..216509036aa8 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -221,6 +221,6 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
221void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr); 221void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr);
222 222
223void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, 223void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
224 int timeout_ms); 224 unsigned long timeout_ms);
225 225
226#endif /* __IB_MAD_PRIV_H__ */ 226#endif /* __IB_MAD_PRIV_H__ */
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 3ccaae18ad75..724f5a62e82f 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -47,9 +47,9 @@ static struct {
47 const struct rdma_nl_cbs *cb_table; 47 const struct rdma_nl_cbs *cb_table;
48} rdma_nl_types[RDMA_NL_NUM_CLIENTS]; 48} rdma_nl_types[RDMA_NL_NUM_CLIENTS];
49 49
50int rdma_nl_chk_listeners(unsigned int group) 50bool rdma_nl_chk_listeners(unsigned int group)
51{ 51{
52 return (netlink_has_listeners(nls, group)) ? 0 : -1; 52 return netlink_has_listeners(nls, group);
53} 53}
54EXPORT_SYMBOL(rdma_nl_chk_listeners); 54EXPORT_SYMBOL(rdma_nl_chk_listeners);
55 55
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 0385ab438320..573399e3ccc1 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -179,7 +179,8 @@ static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
179{ 179{
180 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index)) 180 if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
181 return -EMSGSIZE; 181 return -EMSGSIZE;
182 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name)) 182 if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
183 dev_name(&device->dev)))
183 return -EMSGSIZE; 184 return -EMSGSIZE;
184 185
185 return 0; 186 return 0;
@@ -645,6 +646,36 @@ err:
645 return err; 646 return err;
646} 647}
647 648
649static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
650 struct netlink_ext_ack *extack)
651{
652 struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
653 struct ib_device *device;
654 u32 index;
655 int err;
656
657 err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
658 extack);
659 if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
660 return -EINVAL;
661
662 index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
663 device = ib_device_get_by_index(index);
664 if (!device)
665 return -EINVAL;
666
667 if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
668 char name[IB_DEVICE_NAME_MAX] = {};
669
670 nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
671 IB_DEVICE_NAME_MAX);
672 err = ib_device_rename(device, name);
673 }
674
675 put_device(&device->dev);
676 return err;
677}
678
648static int _nldev_get_dumpit(struct ib_device *device, 679static int _nldev_get_dumpit(struct ib_device *device,
649 struct sk_buff *skb, 680 struct sk_buff *skb,
650 struct netlink_callback *cb, 681 struct netlink_callback *cb,
@@ -1077,6 +1108,10 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
1077 .doit = nldev_get_doit, 1108 .doit = nldev_get_doit,
1078 .dump = nldev_get_dumpit, 1109 .dump = nldev_get_dumpit,
1079 }, 1110 },
1111 [RDMA_NLDEV_CMD_SET] = {
1112 .doit = nldev_set_doit,
1113 .flags = RDMA_NL_ADMIN_PERM,
1114 },
1080 [RDMA_NLDEV_CMD_PORT_GET] = { 1115 [RDMA_NLDEV_CMD_PORT_GET] = {
1081 .doit = nldev_port_get_doit, 1116 .doit = nldev_port_get_doit,
1082 .dump = nldev_port_get_dumpit, 1117 .dump = nldev_port_get_dumpit,
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index c4118bcd5103..752a55c6bdce 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -794,44 +794,6 @@ void uverbs_close_fd(struct file *f)
794 uverbs_uobject_put(uobj); 794 uverbs_uobject_put(uobj);
795} 795}
796 796
797static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext)
798{
799 struct ib_device *ib_dev = ibcontext->device;
800 struct task_struct *owning_process = NULL;
801 struct mm_struct *owning_mm = NULL;
802
803 owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
804 if (!owning_process)
805 return;
806
807 owning_mm = get_task_mm(owning_process);
808 if (!owning_mm) {
809 pr_info("no mm, disassociate ucontext is pending task termination\n");
810 while (1) {
811 put_task_struct(owning_process);
812 usleep_range(1000, 2000);
813 owning_process = get_pid_task(ibcontext->tgid,
814 PIDTYPE_PID);
815 if (!owning_process ||
816 owning_process->state == TASK_DEAD) {
817 pr_info("disassociate ucontext done, task was terminated\n");
818 /* in case task was dead need to release the
819 * task struct.
820 */
821 if (owning_process)
822 put_task_struct(owning_process);
823 return;
824 }
825 }
826 }
827
828 down_write(&owning_mm->mmap_sem);
829 ib_dev->disassociate_ucontext(ibcontext);
830 up_write(&owning_mm->mmap_sem);
831 mmput(owning_mm);
832 put_task_struct(owning_process);
833}
834
835/* 797/*
836 * Drop the ucontext off the ufile and completely disconnect it from the 798 * Drop the ucontext off the ufile and completely disconnect it from the
837 * ib_device 799 * ib_device
@@ -840,20 +802,28 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
840 enum rdma_remove_reason reason) 802 enum rdma_remove_reason reason)
841{ 803{
842 struct ib_ucontext *ucontext = ufile->ucontext; 804 struct ib_ucontext *ucontext = ufile->ucontext;
805 struct ib_device *ib_dev = ucontext->device;
843 int ret; 806 int ret;
844 807
845 if (reason == RDMA_REMOVE_DRIVER_REMOVE) 808 /*
846 ufile_disassociate_ucontext(ucontext); 809 * If we are closing the FD then the user mmap VMAs must have
810 * already been destroyed as they hold on to the filep, otherwise
811 * they need to be zap'd.
812 */
813 if (reason == RDMA_REMOVE_DRIVER_REMOVE) {
814 uverbs_user_mmap_disassociate(ufile);
815 if (ib_dev->disassociate_ucontext)
816 ib_dev->disassociate_ucontext(ucontext);
817 }
847 818
848 put_pid(ucontext->tgid); 819 ib_rdmacg_uncharge(&ucontext->cg_obj, ib_dev,
849 ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device,
850 RDMACG_RESOURCE_HCA_HANDLE); 820 RDMACG_RESOURCE_HCA_HANDLE);
851 821
852 /* 822 /*
853 * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove 823 * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
854 * the error return. 824 * the error return.
855 */ 825 */
856 ret = ucontext->device->dealloc_ucontext(ucontext); 826 ret = ib_dev->dealloc_ucontext(ucontext);
857 WARN_ON(ret); 827 WARN_ON(ret);
858 828
859 ufile->ucontext = NULL; 829 ufile->ucontext = NULL;
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index f962f2a593ba..4886d2bba7c7 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -160,5 +160,6 @@ void uverbs_disassociate_api(struct uverbs_api *uapi);
160void uverbs_destroy_api(struct uverbs_api *uapi); 160void uverbs_destroy_api(struct uverbs_api *uapi);
161void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, 161void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
162 unsigned int num_attrs); 162 unsigned int num_attrs);
163void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile);
163 164
164#endif /* RDMA_CORE_H */ 165#endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 3b7fa0ccaa08..06d8657ce583 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -50,8 +50,7 @@ void rdma_restrack_clean(struct rdma_restrack_root *res)
50 50
51 dev = container_of(res, struct ib_device, res); 51 dev = container_of(res, struct ib_device, res);
52 pr_err("restrack: %s", CUT_HERE); 52 pr_err("restrack: %s", CUT_HERE);
53 pr_err("restrack: BUG: RESTRACK detected leak of resources on %s\n", 53 dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
54 dev->name);
55 hash_for_each(res->hash, bkt, e, node) { 54 hash_for_each(res->hash, bkt, e, node) {
56 if (rdma_is_kernel_res(e)) { 55 if (rdma_is_kernel_res(e)) {
57 owner = e->kern_name; 56 owner = e->kern_name;
@@ -156,6 +155,21 @@ static bool res_is_user(struct rdma_restrack_entry *res)
156 } 155 }
157} 156}
158 157
158void rdma_restrack_set_task(struct rdma_restrack_entry *res,
159 const char *caller)
160{
161 if (caller) {
162 res->kern_name = caller;
163 return;
164 }
165
166 if (res->task)
167 put_task_struct(res->task);
168 get_task_struct(current);
169 res->task = current;
170}
171EXPORT_SYMBOL(rdma_restrack_set_task);
172
159void rdma_restrack_add(struct rdma_restrack_entry *res) 173void rdma_restrack_add(struct rdma_restrack_entry *res)
160{ 174{
161 struct ib_device *dev = res_to_dev(res); 175 struct ib_device *dev = res_to_dev(res);
@@ -168,7 +182,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
168 182
169 if (res_is_user(res)) { 183 if (res_is_user(res)) {
170 if (!res->task) 184 if (!res->task)
171 rdma_restrack_set_task(res, current); 185 rdma_restrack_set_task(res, NULL);
172 res->kern_name = NULL; 186 res->kern_name = NULL;
173 } else { 187 } else {
174 set_kern_name(res); 188 set_kern_name(res);
@@ -209,7 +223,7 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
209 struct ib_device *dev; 223 struct ib_device *dev;
210 224
211 if (!res->valid) 225 if (!res->valid)
212 return; 226 goto out;
213 227
214 dev = res_to_dev(res); 228 dev = res_to_dev(res);
215 if (!dev) 229 if (!dev)
@@ -222,8 +236,12 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
222 down_write(&dev->res.rwsem); 236 down_write(&dev->res.rwsem);
223 hash_del(&res->node); 237 hash_del(&res->node);
224 res->valid = false; 238 res->valid = false;
225 if (res->task)
226 put_task_struct(res->task);
227 up_write(&dev->res.rwsem); 239 up_write(&dev->res.rwsem);
240
241out:
242 if (res->task) {
243 put_task_struct(res->task);
244 res->task = NULL;
245 }
228} 246}
229EXPORT_SYMBOL(rdma_restrack_del); 247EXPORT_SYMBOL(rdma_restrack_del);
diff --git a/drivers/infiniband/core/sa.h b/drivers/infiniband/core/sa.h
index b1d4bbf4ce5c..cbaaaa92fff3 100644
--- a/drivers/infiniband/core/sa.h
+++ b/drivers/infiniband/core/sa.h
@@ -49,16 +49,14 @@ static inline void ib_sa_client_put(struct ib_sa_client *client)
49} 49}
50 50
51int ib_sa_mcmember_rec_query(struct ib_sa_client *client, 51int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
52 struct ib_device *device, u8 port_num, 52 struct ib_device *device, u8 port_num, u8 method,
53 u8 method,
54 struct ib_sa_mcmember_rec *rec, 53 struct ib_sa_mcmember_rec *rec,
55 ib_sa_comp_mask comp_mask, 54 ib_sa_comp_mask comp_mask,
56 int timeout_ms, gfp_t gfp_mask, 55 unsigned long timeout_ms, gfp_t gfp_mask,
57 void (*callback)(int status, 56 void (*callback)(int status,
58 struct ib_sa_mcmember_rec *resp, 57 struct ib_sa_mcmember_rec *resp,
59 void *context), 58 void *context),
60 void *context, 59 void *context, struct ib_sa_query **sa_query);
61 struct ib_sa_query **sa_query);
62 60
63int mcast_init(void); 61int mcast_init(void);
64void mcast_cleanup(void); 62void mcast_cleanup(void);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 7b794a14d6e8..be5ba5e15496 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -761,7 +761,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
761 761
762 /* Construct the family header first */ 762 /* Construct the family header first */
763 header = skb_put(skb, NLMSG_ALIGN(sizeof(*header))); 763 header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
764 memcpy(header->device_name, query->port->agent->device->name, 764 memcpy(header->device_name, dev_name(&query->port->agent->device->dev),
765 LS_DEVICE_NAME_MAX); 765 LS_DEVICE_NAME_MAX);
766 header->port_num = query->port->port_num; 766 header->port_num = query->port->port_num;
767 767
@@ -835,7 +835,6 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
835 struct sk_buff *skb = NULL; 835 struct sk_buff *skb = NULL;
836 struct nlmsghdr *nlh; 836 struct nlmsghdr *nlh;
837 void *data; 837 void *data;
838 int ret = 0;
839 struct ib_sa_mad *mad; 838 struct ib_sa_mad *mad;
840 int len; 839 int len;
841 840
@@ -862,13 +861,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
862 /* Repair the nlmsg header length */ 861 /* Repair the nlmsg header length */
863 nlmsg_end(skb, nlh); 862 nlmsg_end(skb, nlh);
864 863
865 ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask); 864 return rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask);
866 if (!ret)
867 ret = len;
868 else
869 ret = 0;
870
871 return ret;
872} 865}
873 866
874static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask) 867static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
@@ -891,14 +884,12 @@ static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
891 spin_unlock_irqrestore(&ib_nl_request_lock, flags); 884 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
892 885
893 ret = ib_nl_send_msg(query, gfp_mask); 886 ret = ib_nl_send_msg(query, gfp_mask);
894 if (ret <= 0) { 887 if (ret) {
895 ret = -EIO; 888 ret = -EIO;
896 /* Remove the request */ 889 /* Remove the request */
897 spin_lock_irqsave(&ib_nl_request_lock, flags); 890 spin_lock_irqsave(&ib_nl_request_lock, flags);
898 list_del(&query->list); 891 list_del(&query->list);
899 spin_unlock_irqrestore(&ib_nl_request_lock, flags); 892 spin_unlock_irqrestore(&ib_nl_request_lock, flags);
900 } else {
901 ret = 0;
902 } 893 }
903 894
904 return ret; 895 return ret;
@@ -1227,46 +1218,6 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
1227 return src_path_mask; 1218 return src_path_mask;
1228} 1219}
1229 1220
1230static int roce_resolve_route_from_path(struct sa_path_rec *rec,
1231 const struct ib_gid_attr *attr)
1232{
1233 struct rdma_dev_addr dev_addr = {};
1234 union {
1235 struct sockaddr _sockaddr;
1236 struct sockaddr_in _sockaddr_in;
1237 struct sockaddr_in6 _sockaddr_in6;
1238 } sgid_addr, dgid_addr;
1239 int ret;
1240
1241 if (rec->roce.route_resolved)
1242 return 0;
1243 if (!attr || !attr->ndev)
1244 return -EINVAL;
1245
1246 dev_addr.bound_dev_if = attr->ndev->ifindex;
1247 /* TODO: Use net from the ib_gid_attr once it is added to it,
1248 * until than, limit itself to init_net.
1249 */
1250 dev_addr.net = &init_net;
1251
1252 rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
1253 rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
1254
1255 /* validate the route */
1256 ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
1257 &dgid_addr._sockaddr, &dev_addr);
1258 if (ret)
1259 return ret;
1260
1261 if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
1262 dev_addr.network == RDMA_NETWORK_IPV6) &&
1263 rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
1264 return -EINVAL;
1265
1266 rec->roce.route_resolved = true;
1267 return 0;
1268}
1269
1270static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num, 1221static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
1271 struct sa_path_rec *rec, 1222 struct sa_path_rec *rec,
1272 struct rdma_ah_attr *ah_attr, 1223 struct rdma_ah_attr *ah_attr,
@@ -1409,7 +1360,8 @@ static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
1409 spin_unlock_irqrestore(&tid_lock, flags); 1360 spin_unlock_irqrestore(&tid_lock, flags);
1410} 1361}
1411 1362
1412static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) 1363static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
1364 gfp_t gfp_mask)
1413{ 1365{
1414 bool preload = gfpflags_allow_blocking(gfp_mask); 1366 bool preload = gfpflags_allow_blocking(gfp_mask);
1415 unsigned long flags; 1367 unsigned long flags;
@@ -1433,7 +1385,7 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
1433 1385
1434 if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) && 1386 if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
1435 (!(query->flags & IB_SA_QUERY_OPA))) { 1387 (!(query->flags & IB_SA_QUERY_OPA))) {
1436 if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) { 1388 if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
1437 if (!ib_nl_make_request(query, gfp_mask)) 1389 if (!ib_nl_make_request(query, gfp_mask))
1438 return id; 1390 return id;
1439 } 1391 }
@@ -1599,7 +1551,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
1599 struct ib_device *device, u8 port_num, 1551 struct ib_device *device, u8 port_num,
1600 struct sa_path_rec *rec, 1552 struct sa_path_rec *rec,
1601 ib_sa_comp_mask comp_mask, 1553 ib_sa_comp_mask comp_mask,
1602 int timeout_ms, gfp_t gfp_mask, 1554 unsigned long timeout_ms, gfp_t gfp_mask,
1603 void (*callback)(int status, 1555 void (*callback)(int status,
1604 struct sa_path_rec *resp, 1556 struct sa_path_rec *resp,
1605 void *context), 1557 void *context),
@@ -1753,7 +1705,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
1753 struct ib_device *device, u8 port_num, u8 method, 1705 struct ib_device *device, u8 port_num, u8 method,
1754 struct ib_sa_service_rec *rec, 1706 struct ib_sa_service_rec *rec,
1755 ib_sa_comp_mask comp_mask, 1707 ib_sa_comp_mask comp_mask,
1756 int timeout_ms, gfp_t gfp_mask, 1708 unsigned long timeout_ms, gfp_t gfp_mask,
1757 void (*callback)(int status, 1709 void (*callback)(int status,
1758 struct ib_sa_service_rec *resp, 1710 struct ib_sa_service_rec *resp,
1759 void *context), 1711 void *context),
@@ -1850,7 +1802,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
1850 u8 method, 1802 u8 method,
1851 struct ib_sa_mcmember_rec *rec, 1803 struct ib_sa_mcmember_rec *rec,
1852 ib_sa_comp_mask comp_mask, 1804 ib_sa_comp_mask comp_mask,
1853 int timeout_ms, gfp_t gfp_mask, 1805 unsigned long timeout_ms, gfp_t gfp_mask,
1854 void (*callback)(int status, 1806 void (*callback)(int status,
1855 struct ib_sa_mcmember_rec *resp, 1807 struct ib_sa_mcmember_rec *resp,
1856 void *context), 1808 void *context),
@@ -1941,7 +1893,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
1941 struct ib_device *device, u8 port_num, 1893 struct ib_device *device, u8 port_num,
1942 struct ib_sa_guidinfo_rec *rec, 1894 struct ib_sa_guidinfo_rec *rec,
1943 ib_sa_comp_mask comp_mask, u8 method, 1895 ib_sa_comp_mask comp_mask, u8 method,
1944 int timeout_ms, gfp_t gfp_mask, 1896 unsigned long timeout_ms, gfp_t gfp_mask,
1945 void (*callback)(int status, 1897 void (*callback)(int status,
1946 struct ib_sa_guidinfo_rec *resp, 1898 struct ib_sa_guidinfo_rec *resp,
1947 void *context), 1899 void *context),
@@ -2108,7 +2060,7 @@ static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
2108} 2060}
2109 2061
2110static int ib_sa_classport_info_rec_query(struct ib_sa_port *port, 2062static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
2111 int timeout_ms, 2063 unsigned long timeout_ms,
2112 void (*callback)(void *context), 2064 void (*callback)(void *context),
2113 void *context, 2065 void *context,
2114 struct ib_sa_query **sa_query) 2066 struct ib_sa_query **sa_query)
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 9b0bea8303e0..1143c0448666 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -685,9 +685,8 @@ static int ib_mad_agent_security_change(struct notifier_block *nb,
685 if (event != LSM_POLICY_CHANGE) 685 if (event != LSM_POLICY_CHANGE)
686 return NOTIFY_DONE; 686 return NOTIFY_DONE;
687 687
688 ag->smp_allowed = !security_ib_endport_manage_subnet(ag->security, 688 ag->smp_allowed = !security_ib_endport_manage_subnet(
689 ag->device->name, 689 ag->security, dev_name(&ag->device->dev), ag->port_num);
690 ag->port_num);
691 690
692 return NOTIFY_OK; 691 return NOTIFY_OK;
693} 692}
@@ -708,7 +707,7 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
708 return 0; 707 return 0;
709 708
710 ret = security_ib_endport_manage_subnet(agent->security, 709 ret = security_ib_endport_manage_subnet(agent->security,
711 agent->device->name, 710 dev_name(&agent->device->dev),
712 agent->port_num); 711 agent->port_num);
713 if (ret) 712 if (ret)
714 return ret; 713 return ret;
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 7fd14ead7b37..6fcce2c206c6 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -512,7 +512,7 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
512 ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data, 512 ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data,
513 40 + offset / 8, sizeof(data)); 513 40 + offset / 8, sizeof(data));
514 if (ret < 0) 514 if (ret < 0)
515 return sprintf(buf, "N/A (no PMA)\n"); 515 return ret;
516 516
517 switch (width) { 517 switch (width) {
518 case 4: 518 case 4:
@@ -1036,7 +1036,7 @@ static int add_port(struct ib_device *device, int port_num,
1036 p->port_num = port_num; 1036 p->port_num = port_num;
1037 1037
1038 ret = kobject_init_and_add(&p->kobj, &port_type, 1038 ret = kobject_init_and_add(&p->kobj, &port_type,
1039 device->ports_parent, 1039 device->ports_kobj,
1040 "%d", port_num); 1040 "%d", port_num);
1041 if (ret) { 1041 if (ret) {
1042 kfree(p); 1042 kfree(p);
@@ -1057,10 +1057,12 @@ static int add_port(struct ib_device *device, int port_num,
1057 goto err_put; 1057 goto err_put;
1058 } 1058 }
1059 1059
1060 p->pma_table = get_counter_table(device, port_num); 1060 if (device->process_mad) {
1061 ret = sysfs_create_group(&p->kobj, p->pma_table); 1061 p->pma_table = get_counter_table(device, port_num);
1062 if (ret) 1062 ret = sysfs_create_group(&p->kobj, p->pma_table);
1063 goto err_put_gid_attrs; 1063 if (ret)
1064 goto err_put_gid_attrs;
1065 }
1064 1066
1065 p->gid_group.name = "gids"; 1067 p->gid_group.name = "gids";
1066 p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len); 1068 p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
@@ -1118,9 +1120,9 @@ static int add_port(struct ib_device *device, int port_num,
1118 } 1120 }
1119 1121
1120 /* 1122 /*
1121 * If port == 0, it means we have only one port and the parent 1123 * If port == 0, it means hw_counters are per device and not per
1122 * device, not this port device, should be the holder of the 1124 * port, so holder should be device. Therefore skip per port conunter
1123 * hw_counters 1125 * initialization.
1124 */ 1126 */
1125 if (device->alloc_hw_stats && port_num) 1127 if (device->alloc_hw_stats && port_num)
1126 setup_hw_stats(device, p, port_num); 1128 setup_hw_stats(device, p, port_num);
@@ -1173,7 +1175,8 @@ err_free_gid:
1173 p->gid_group.attrs = NULL; 1175 p->gid_group.attrs = NULL;
1174 1176
1175err_remove_pma: 1177err_remove_pma:
1176 sysfs_remove_group(&p->kobj, p->pma_table); 1178 if (p->pma_table)
1179 sysfs_remove_group(&p->kobj, p->pma_table);
1177 1180
1178err_put_gid_attrs: 1181err_put_gid_attrs:
1179 kobject_put(&p->gid_attr_group->kobj); 1182 kobject_put(&p->gid_attr_group->kobj);
@@ -1183,7 +1186,7 @@ err_put:
1183 return ret; 1186 return ret;
1184} 1187}
1185 1188
1186static ssize_t show_node_type(struct device *device, 1189static ssize_t node_type_show(struct device *device,
1187 struct device_attribute *attr, char *buf) 1190 struct device_attribute *attr, char *buf)
1188{ 1191{
1189 struct ib_device *dev = container_of(device, struct ib_device, dev); 1192 struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1198,8 +1201,9 @@ static ssize_t show_node_type(struct device *device,
1198 default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); 1201 default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
1199 } 1202 }
1200} 1203}
1204static DEVICE_ATTR_RO(node_type);
1201 1205
1202static ssize_t show_sys_image_guid(struct device *device, 1206static ssize_t sys_image_guid_show(struct device *device,
1203 struct device_attribute *dev_attr, char *buf) 1207 struct device_attribute *dev_attr, char *buf)
1204{ 1208{
1205 struct ib_device *dev = container_of(device, struct ib_device, dev); 1209 struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1210,8 +1214,9 @@ static ssize_t show_sys_image_guid(struct device *device,
1210 be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]), 1214 be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]),
1211 be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3])); 1215 be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3]));
1212} 1216}
1217static DEVICE_ATTR_RO(sys_image_guid);
1213 1218
1214static ssize_t show_node_guid(struct device *device, 1219static ssize_t node_guid_show(struct device *device,
1215 struct device_attribute *attr, char *buf) 1220 struct device_attribute *attr, char *buf)
1216{ 1221{
1217 struct ib_device *dev = container_of(device, struct ib_device, dev); 1222 struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1222,8 +1227,9 @@ static ssize_t show_node_guid(struct device *device,
1222 be16_to_cpu(((__be16 *) &dev->node_guid)[2]), 1227 be16_to_cpu(((__be16 *) &dev->node_guid)[2]),
1223 be16_to_cpu(((__be16 *) &dev->node_guid)[3])); 1228 be16_to_cpu(((__be16 *) &dev->node_guid)[3]));
1224} 1229}
1230static DEVICE_ATTR_RO(node_guid);
1225 1231
1226static ssize_t show_node_desc(struct device *device, 1232static ssize_t node_desc_show(struct device *device,
1227 struct device_attribute *attr, char *buf) 1233 struct device_attribute *attr, char *buf)
1228{ 1234{
1229 struct ib_device *dev = container_of(device, struct ib_device, dev); 1235 struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1231,9 +1237,9 @@ static ssize_t show_node_desc(struct device *device,
1231 return sprintf(buf, "%.64s\n", dev->node_desc); 1237 return sprintf(buf, "%.64s\n", dev->node_desc);
1232} 1238}
1233 1239
1234static ssize_t set_node_desc(struct device *device, 1240static ssize_t node_desc_store(struct device *device,
1235 struct device_attribute *attr, 1241 struct device_attribute *attr,
1236 const char *buf, size_t count) 1242 const char *buf, size_t count)
1237{ 1243{
1238 struct ib_device *dev = container_of(device, struct ib_device, dev); 1244 struct ib_device *dev = container_of(device, struct ib_device, dev);
1239 struct ib_device_modify desc = {}; 1245 struct ib_device_modify desc = {};
@@ -1249,8 +1255,9 @@ static ssize_t set_node_desc(struct device *device,
1249 1255
1250 return count; 1256 return count;
1251} 1257}
1258static DEVICE_ATTR_RW(node_desc);
1252 1259
1253static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, 1260static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr,
1254 char *buf) 1261 char *buf)
1255{ 1262{
1256 struct ib_device *dev = container_of(device, struct ib_device, dev); 1263 struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1259,19 +1266,19 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
1259 strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX); 1266 strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX);
1260 return strlen(buf); 1267 return strlen(buf);
1261} 1268}
1269static DEVICE_ATTR_RO(fw_ver);
1270
1271static struct attribute *ib_dev_attrs[] = {
1272 &dev_attr_node_type.attr,
1273 &dev_attr_node_guid.attr,
1274 &dev_attr_sys_image_guid.attr,
1275 &dev_attr_fw_ver.attr,
1276 &dev_attr_node_desc.attr,
1277 NULL,
1278};
1262 1279
1263static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL); 1280static const struct attribute_group dev_attr_group = {
1264static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL); 1281 .attrs = ib_dev_attrs,
1265static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
1266static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc);
1267static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
1268
1269static struct device_attribute *ib_class_attributes[] = {
1270 &dev_attr_node_type,
1271 &dev_attr_sys_image_guid,
1272 &dev_attr_node_guid,
1273 &dev_attr_node_desc,
1274 &dev_attr_fw_ver,
1275}; 1282};
1276 1283
1277static void free_port_list_attributes(struct ib_device *device) 1284static void free_port_list_attributes(struct ib_device *device)
@@ -1285,7 +1292,9 @@ static void free_port_list_attributes(struct ib_device *device)
1285 kfree(port->hw_stats); 1292 kfree(port->hw_stats);
1286 free_hsag(&port->kobj, port->hw_stats_ag); 1293 free_hsag(&port->kobj, port->hw_stats_ag);
1287 } 1294 }
1288 sysfs_remove_group(p, port->pma_table); 1295
1296 if (port->pma_table)
1297 sysfs_remove_group(p, port->pma_table);
1289 sysfs_remove_group(p, &port->pkey_group); 1298 sysfs_remove_group(p, &port->pkey_group);
1290 sysfs_remove_group(p, &port->gid_group); 1299 sysfs_remove_group(p, &port->gid_group);
1291 sysfs_remove_group(&port->gid_attr_group->kobj, 1300 sysfs_remove_group(&port->gid_attr_group->kobj,
@@ -1296,7 +1305,7 @@ static void free_port_list_attributes(struct ib_device *device)
1296 kobject_put(p); 1305 kobject_put(p);
1297 } 1306 }
1298 1307
1299 kobject_put(device->ports_parent); 1308 kobject_put(device->ports_kobj);
1300} 1309}
1301 1310
1302int ib_device_register_sysfs(struct ib_device *device, 1311int ib_device_register_sysfs(struct ib_device *device,
@@ -1307,23 +1316,15 @@ int ib_device_register_sysfs(struct ib_device *device,
1307 int ret; 1316 int ret;
1308 int i; 1317 int i;
1309 1318
1310 ret = dev_set_name(class_dev, "%s", device->name); 1319 device->groups[0] = &dev_attr_group;
1311 if (ret) 1320 class_dev->groups = device->groups;
1312 return ret;
1313 1321
1314 ret = device_add(class_dev); 1322 ret = device_add(class_dev);
1315 if (ret) 1323 if (ret)
1316 goto err; 1324 goto err;
1317 1325
1318 for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) { 1326 device->ports_kobj = kobject_create_and_add("ports", &class_dev->kobj);
1319 ret = device_create_file(class_dev, ib_class_attributes[i]); 1327 if (!device->ports_kobj) {
1320 if (ret)
1321 goto err_unregister;
1322 }
1323
1324 device->ports_parent = kobject_create_and_add("ports",
1325 &class_dev->kobj);
1326 if (!device->ports_parent) {
1327 ret = -ENOMEM; 1328 ret = -ENOMEM;
1328 goto err_put; 1329 goto err_put;
1329 } 1330 }
@@ -1347,20 +1348,15 @@ int ib_device_register_sysfs(struct ib_device *device,
1347 1348
1348err_put: 1349err_put:
1349 free_port_list_attributes(device); 1350 free_port_list_attributes(device);
1350
1351err_unregister:
1352 device_del(class_dev); 1351 device_del(class_dev);
1353
1354err: 1352err:
1355 return ret; 1353 return ret;
1356} 1354}
1357 1355
1358void ib_device_unregister_sysfs(struct ib_device *device) 1356void ib_device_unregister_sysfs(struct ib_device *device)
1359{ 1357{
1360 int i; 1358 /* Hold device until ib_dealloc_device() */
1361 1359 get_device(&device->dev);
1362 /* Hold kobject until ib_dealloc_device() */
1363 kobject_get(&device->dev.kobj);
1364 1360
1365 free_port_list_attributes(device); 1361 free_port_list_attributes(device);
1366 1362
@@ -1369,8 +1365,5 @@ void ib_device_unregister_sysfs(struct ib_device *device)
1369 free_hsag(&device->dev.kobj, device->hw_stats_ag); 1365 free_hsag(&device->dev.kobj, device->hw_stats_ag);
1370 } 1366 }
1371 1367
1372 for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i)
1373 device_remove_file(&device->dev, ib_class_attributes[i]);
1374
1375 device_unregister(&device->dev); 1368 device_unregister(&device->dev);
1376} 1369}
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index a41792dbae1f..c6144df47ea4 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -85,7 +85,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
85 struct page **page_list; 85 struct page **page_list;
86 struct vm_area_struct **vma_list; 86 struct vm_area_struct **vma_list;
87 unsigned long lock_limit; 87 unsigned long lock_limit;
88 unsigned long new_pinned;
88 unsigned long cur_base; 89 unsigned long cur_base;
90 struct mm_struct *mm;
89 unsigned long npages; 91 unsigned long npages;
90 int ret; 92 int ret;
91 int i; 93 int i;
@@ -107,25 +109,32 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
107 if (!can_do_mlock()) 109 if (!can_do_mlock())
108 return ERR_PTR(-EPERM); 110 return ERR_PTR(-EPERM);
109 111
110 umem = kzalloc(sizeof *umem, GFP_KERNEL); 112 if (access & IB_ACCESS_ON_DEMAND) {
111 if (!umem) 113 umem = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
112 return ERR_PTR(-ENOMEM); 114 if (!umem)
115 return ERR_PTR(-ENOMEM);
116 umem->is_odp = 1;
117 } else {
118 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
119 if (!umem)
120 return ERR_PTR(-ENOMEM);
121 }
113 122
114 umem->context = context; 123 umem->context = context;
115 umem->length = size; 124 umem->length = size;
116 umem->address = addr; 125 umem->address = addr;
117 umem->page_shift = PAGE_SHIFT; 126 umem->page_shift = PAGE_SHIFT;
118 umem->writable = ib_access_writable(access); 127 umem->writable = ib_access_writable(access);
128 umem->owning_mm = mm = current->mm;
129 mmgrab(mm);
119 130
120 if (access & IB_ACCESS_ON_DEMAND) { 131 if (access & IB_ACCESS_ON_DEMAND) {
121 ret = ib_umem_odp_get(context, umem, access); 132 ret = ib_umem_odp_get(to_ib_umem_odp(umem), access);
122 if (ret) 133 if (ret)
123 goto umem_kfree; 134 goto umem_kfree;
124 return umem; 135 return umem;
125 } 136 }
126 137
127 umem->odp_data = NULL;
128
129 /* We assume the memory is from hugetlb until proved otherwise */ 138 /* We assume the memory is from hugetlb until proved otherwise */
130 umem->hugetlb = 1; 139 umem->hugetlb = 1;
131 140
@@ -144,25 +153,25 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
144 umem->hugetlb = 0; 153 umem->hugetlb = 0;
145 154
146 npages = ib_umem_num_pages(umem); 155 npages = ib_umem_num_pages(umem);
156 if (npages == 0 || npages > UINT_MAX) {
157 ret = -EINVAL;
158 goto out;
159 }
147 160
148 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 161 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
149 162
150 down_write(&current->mm->mmap_sem); 163 down_write(&mm->mmap_sem);
151 current->mm->pinned_vm += npages; 164 if (check_add_overflow(mm->pinned_vm, npages, &new_pinned) ||
152 if ((current->mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) { 165 (new_pinned > lock_limit && !capable(CAP_IPC_LOCK))) {
153 up_write(&current->mm->mmap_sem); 166 up_write(&mm->mmap_sem);
154 ret = -ENOMEM; 167 ret = -ENOMEM;
155 goto vma; 168 goto out;
156 } 169 }
157 up_write(&current->mm->mmap_sem); 170 mm->pinned_vm = new_pinned;
171 up_write(&mm->mmap_sem);
158 172
159 cur_base = addr & PAGE_MASK; 173 cur_base = addr & PAGE_MASK;
160 174
161 if (npages == 0 || npages > UINT_MAX) {
162 ret = -EINVAL;
163 goto vma;
164 }
165
166 ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); 175 ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
167 if (ret) 176 if (ret)
168 goto vma; 177 goto vma;
@@ -172,14 +181,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
172 181
173 sg_list_start = umem->sg_head.sgl; 182 sg_list_start = umem->sg_head.sgl;
174 183
175 down_read(&current->mm->mmap_sem);
176 while (npages) { 184 while (npages) {
185 down_read(&mm->mmap_sem);
177 ret = get_user_pages_longterm(cur_base, 186 ret = get_user_pages_longterm(cur_base,
178 min_t(unsigned long, npages, 187 min_t(unsigned long, npages,
179 PAGE_SIZE / sizeof (struct page *)), 188 PAGE_SIZE / sizeof (struct page *)),
180 gup_flags, page_list, vma_list); 189 gup_flags, page_list, vma_list);
181 if (ret < 0) { 190 if (ret < 0) {
182 up_read(&current->mm->mmap_sem); 191 up_read(&mm->mmap_sem);
183 goto umem_release; 192 goto umem_release;
184 } 193 }
185 194
@@ -187,17 +196,20 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
187 cur_base += ret * PAGE_SIZE; 196 cur_base += ret * PAGE_SIZE;
188 npages -= ret; 197 npages -= ret;
189 198
199 /* Continue to hold the mmap_sem as vma_list access
200 * needs to be protected.
201 */
190 for_each_sg(sg_list_start, sg, ret, i) { 202 for_each_sg(sg_list_start, sg, ret, i) {
191 if (vma_list && !is_vm_hugetlb_page(vma_list[i])) 203 if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
192 umem->hugetlb = 0; 204 umem->hugetlb = 0;
193 205
194 sg_set_page(sg, page_list[i], PAGE_SIZE, 0); 206 sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
195 } 207 }
208 up_read(&mm->mmap_sem);
196 209
197 /* preparing for next loop */ 210 /* preparing for next loop */
198 sg_list_start = sg; 211 sg_list_start = sg;
199 } 212 }
200 up_read(&current->mm->mmap_sem);
201 213
202 umem->nmap = ib_dma_map_sg_attrs(context->device, 214 umem->nmap = ib_dma_map_sg_attrs(context->device,
203 umem->sg_head.sgl, 215 umem->sg_head.sgl,
@@ -216,29 +228,40 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
216umem_release: 228umem_release:
217 __ib_umem_release(context->device, umem, 0); 229 __ib_umem_release(context->device, umem, 0);
218vma: 230vma:
219 down_write(&current->mm->mmap_sem); 231 down_write(&mm->mmap_sem);
220 current->mm->pinned_vm -= ib_umem_num_pages(umem); 232 mm->pinned_vm -= ib_umem_num_pages(umem);
221 up_write(&current->mm->mmap_sem); 233 up_write(&mm->mmap_sem);
222out: 234out:
223 if (vma_list) 235 if (vma_list)
224 free_page((unsigned long) vma_list); 236 free_page((unsigned long) vma_list);
225 free_page((unsigned long) page_list); 237 free_page((unsigned long) page_list);
226umem_kfree: 238umem_kfree:
227 if (ret) 239 if (ret) {
240 mmdrop(umem->owning_mm);
228 kfree(umem); 241 kfree(umem);
242 }
229 return ret ? ERR_PTR(ret) : umem; 243 return ret ? ERR_PTR(ret) : umem;
230} 244}
231EXPORT_SYMBOL(ib_umem_get); 245EXPORT_SYMBOL(ib_umem_get);
232 246
233static void ib_umem_account(struct work_struct *work) 247static void __ib_umem_release_tail(struct ib_umem *umem)
248{
249 mmdrop(umem->owning_mm);
250 if (umem->is_odp)
251 kfree(to_ib_umem_odp(umem));
252 else
253 kfree(umem);
254}
255
256static void ib_umem_release_defer(struct work_struct *work)
234{ 257{
235 struct ib_umem *umem = container_of(work, struct ib_umem, work); 258 struct ib_umem *umem = container_of(work, struct ib_umem, work);
236 259
237 down_write(&umem->mm->mmap_sem); 260 down_write(&umem->owning_mm->mmap_sem);
238 umem->mm->pinned_vm -= umem->diff; 261 umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
239 up_write(&umem->mm->mmap_sem); 262 up_write(&umem->owning_mm->mmap_sem);
240 mmput(umem->mm); 263
241 kfree(umem); 264 __ib_umem_release_tail(umem);
242} 265}
243 266
244/** 267/**
@@ -248,52 +271,36 @@ static void ib_umem_account(struct work_struct *work)
248void ib_umem_release(struct ib_umem *umem) 271void ib_umem_release(struct ib_umem *umem)
249{ 272{
250 struct ib_ucontext *context = umem->context; 273 struct ib_ucontext *context = umem->context;
251 struct mm_struct *mm;
252 struct task_struct *task;
253 unsigned long diff;
254 274
255 if (umem->odp_data) { 275 if (umem->is_odp) {
256 ib_umem_odp_release(umem); 276 ib_umem_odp_release(to_ib_umem_odp(umem));
277 __ib_umem_release_tail(umem);
257 return; 278 return;
258 } 279 }
259 280
260 __ib_umem_release(umem->context->device, umem, 1); 281 __ib_umem_release(umem->context->device, umem, 1);
261 282
262 task = get_pid_task(umem->context->tgid, PIDTYPE_PID);
263 if (!task)
264 goto out;
265 mm = get_task_mm(task);
266 put_task_struct(task);
267 if (!mm)
268 goto out;
269
270 diff = ib_umem_num_pages(umem);
271
272 /* 283 /*
273 * We may be called with the mm's mmap_sem already held. This 284 * We may be called with the mm's mmap_sem already held. This
274 * can happen when a userspace munmap() is the call that drops 285 * can happen when a userspace munmap() is the call that drops
275 * the last reference to our file and calls our release 286 * the last reference to our file and calls our release
276 * method. If there are memory regions to destroy, we'll end 287 * method. If there are memory regions to destroy, we'll end
277 * up here and not be able to take the mmap_sem. In that case 288 * up here and not be able to take the mmap_sem. In that case
278 * we defer the vm_locked accounting to the system workqueue. 289 * we defer the vm_locked accounting a workqueue.
279 */ 290 */
280 if (context->closing) { 291 if (context->closing) {
281 if (!down_write_trylock(&mm->mmap_sem)) { 292 if (!down_write_trylock(&umem->owning_mm->mmap_sem)) {
282 INIT_WORK(&umem->work, ib_umem_account); 293 INIT_WORK(&umem->work, ib_umem_release_defer);
283 umem->mm = mm;
284 umem->diff = diff;
285
286 queue_work(ib_wq, &umem->work); 294 queue_work(ib_wq, &umem->work);
287 return; 295 return;
288 } 296 }
289 } else 297 } else {
290 down_write(&mm->mmap_sem); 298 down_write(&umem->owning_mm->mmap_sem);
299 }
300 umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
301 up_write(&umem->owning_mm->mmap_sem);
291 302
292 mm->pinned_vm -= diff; 303 __ib_umem_release_tail(umem);
293 up_write(&mm->mmap_sem);
294 mmput(mm);
295out:
296 kfree(umem);
297} 304}
298EXPORT_SYMBOL(ib_umem_release); 305EXPORT_SYMBOL(ib_umem_release);
299 306
@@ -303,7 +310,7 @@ int ib_umem_page_count(struct ib_umem *umem)
303 int n; 310 int n;
304 struct scatterlist *sg; 311 struct scatterlist *sg;
305 312
306 if (umem->odp_data) 313 if (umem->is_odp)
307 return ib_umem_num_pages(umem); 314 return ib_umem_num_pages(umem);
308 315
309 n = 0; 316 n = 0;
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 6ec748eccff7..2b4c5e7dd5a1 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -58,7 +58,7 @@ static u64 node_start(struct umem_odp_node *n)
58 struct ib_umem_odp *umem_odp = 58 struct ib_umem_odp *umem_odp =
59 container_of(n, struct ib_umem_odp, interval_tree); 59 container_of(n, struct ib_umem_odp, interval_tree);
60 60
61 return ib_umem_start(umem_odp->umem); 61 return ib_umem_start(&umem_odp->umem);
62} 62}
63 63
64/* Note that the representation of the intervals in the interval tree 64/* Note that the representation of the intervals in the interval tree
@@ -71,140 +71,86 @@ static u64 node_last(struct umem_odp_node *n)
71 struct ib_umem_odp *umem_odp = 71 struct ib_umem_odp *umem_odp =
72 container_of(n, struct ib_umem_odp, interval_tree); 72 container_of(n, struct ib_umem_odp, interval_tree);
73 73
74 return ib_umem_end(umem_odp->umem) - 1; 74 return ib_umem_end(&umem_odp->umem) - 1;
75} 75}
76 76
77INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, 77INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
78 node_start, node_last, static, rbt_ib_umem) 78 node_start, node_last, static, rbt_ib_umem)
79 79
80static void ib_umem_notifier_start_account(struct ib_umem *item) 80static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp)
81{ 81{
82 mutex_lock(&item->odp_data->umem_mutex); 82 mutex_lock(&umem_odp->umem_mutex);
83 83 if (umem_odp->notifiers_count++ == 0)
84 /* Only update private counters for this umem if it has them.
85 * Otherwise skip it. All page faults will be delayed for this umem. */
86 if (item->odp_data->mn_counters_active) {
87 int notifiers_count = item->odp_data->notifiers_count++;
88
89 if (notifiers_count == 0)
90 /* Initialize the completion object for waiting on
91 * notifiers. Since notifier_count is zero, no one
92 * should be waiting right now. */
93 reinit_completion(&item->odp_data->notifier_completion);
94 }
95 mutex_unlock(&item->odp_data->umem_mutex);
96}
97
98static void ib_umem_notifier_end_account(struct ib_umem *item)
99{
100 mutex_lock(&item->odp_data->umem_mutex);
101
102 /* Only update private counters for this umem if it has them.
103 * Otherwise skip it. All page faults will be delayed for this umem. */
104 if (item->odp_data->mn_counters_active) {
105 /* 84 /*
106 * This sequence increase will notify the QP page fault that 85 * Initialize the completion object for waiting on
107 * the page that is going to be mapped in the spte could have 86 * notifiers. Since notifier_count is zero, no one should be
108 * been freed. 87 * waiting right now.
109 */ 88 */
110 ++item->odp_data->notifiers_seq; 89 reinit_completion(&umem_odp->notifier_completion);
111 if (--item->odp_data->notifiers_count == 0) 90 mutex_unlock(&umem_odp->umem_mutex);
112 complete_all(&item->odp_data->notifier_completion);
113 }
114 mutex_unlock(&item->odp_data->umem_mutex);
115} 91}
116 92
117/* Account for a new mmu notifier in an ib_ucontext. */ 93static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp)
118static void ib_ucontext_notifier_start_account(struct ib_ucontext *context)
119{ 94{
120 atomic_inc(&context->notifier_count); 95 mutex_lock(&umem_odp->umem_mutex);
96 /*
97 * This sequence increase will notify the QP page fault that the page
98 * that is going to be mapped in the spte could have been freed.
99 */
100 ++umem_odp->notifiers_seq;
101 if (--umem_odp->notifiers_count == 0)
102 complete_all(&umem_odp->notifier_completion);
103 mutex_unlock(&umem_odp->umem_mutex);
121} 104}
122 105
123/* Account for a terminating mmu notifier in an ib_ucontext. 106static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
124 * 107 u64 start, u64 end, void *cookie)
125 * Must be called with the ib_ucontext->umem_rwsem semaphore unlocked, since
126 * the function takes the semaphore itself. */
127static void ib_ucontext_notifier_end_account(struct ib_ucontext *context)
128{ 108{
129 int zero_notifiers = atomic_dec_and_test(&context->notifier_count); 109 struct ib_umem *umem = &umem_odp->umem;
130
131 if (zero_notifiers &&
132 !list_empty(&context->no_private_counters)) {
133 /* No currently running mmu notifiers. Now is the chance to
134 * add private accounting to all previously added umems. */
135 struct ib_umem_odp *odp_data, *next;
136
137 /* Prevent concurrent mmu notifiers from working on the
138 * no_private_counters list. */
139 down_write(&context->umem_rwsem);
140
141 /* Read the notifier_count again, with the umem_rwsem
142 * semaphore taken for write. */
143 if (!atomic_read(&context->notifier_count)) {
144 list_for_each_entry_safe(odp_data, next,
145 &context->no_private_counters,
146 no_private_counters) {
147 mutex_lock(&odp_data->umem_mutex);
148 odp_data->mn_counters_active = true;
149 list_del(&odp_data->no_private_counters);
150 complete_all(&odp_data->notifier_completion);
151 mutex_unlock(&odp_data->umem_mutex);
152 }
153 }
154
155 up_write(&context->umem_rwsem);
156 }
157}
158 110
159static int ib_umem_notifier_release_trampoline(struct ib_umem *item, u64 start,
160 u64 end, void *cookie) {
161 /* 111 /*
162 * Increase the number of notifiers running, to 112 * Increase the number of notifiers running, to
163 * prevent any further fault handling on this MR. 113 * prevent any further fault handling on this MR.
164 */ 114 */
165 ib_umem_notifier_start_account(item); 115 ib_umem_notifier_start_account(umem_odp);
166 item->odp_data->dying = 1; 116 umem_odp->dying = 1;
167 /* Make sure that the fact the umem is dying is out before we release 117 /* Make sure that the fact the umem is dying is out before we release
168 * all pending page faults. */ 118 * all pending page faults. */
169 smp_wmb(); 119 smp_wmb();
170 complete_all(&item->odp_data->notifier_completion); 120 complete_all(&umem_odp->notifier_completion);
171 item->context->invalidate_range(item, ib_umem_start(item), 121 umem->context->invalidate_range(umem_odp, ib_umem_start(umem),
172 ib_umem_end(item)); 122 ib_umem_end(umem));
173 return 0; 123 return 0;
174} 124}
175 125
176static void ib_umem_notifier_release(struct mmu_notifier *mn, 126static void ib_umem_notifier_release(struct mmu_notifier *mn,
177 struct mm_struct *mm) 127 struct mm_struct *mm)
178{ 128{
179 struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); 129 struct ib_ucontext_per_mm *per_mm =
180 130 container_of(mn, struct ib_ucontext_per_mm, mn);
181 if (!context->invalidate_range) 131
182 return; 132 down_read(&per_mm->umem_rwsem);
183 133 if (per_mm->active)
184 ib_ucontext_notifier_start_account(context); 134 rbt_ib_umem_for_each_in_range(
185 down_read(&context->umem_rwsem); 135 &per_mm->umem_tree, 0, ULLONG_MAX,
186 rbt_ib_umem_for_each_in_range(&context->umem_tree, 0, 136 ib_umem_notifier_release_trampoline, true, NULL);
187 ULLONG_MAX, 137 up_read(&per_mm->umem_rwsem);
188 ib_umem_notifier_release_trampoline,
189 true,
190 NULL);
191 up_read(&context->umem_rwsem);
192} 138}
193 139
194static int invalidate_page_trampoline(struct ib_umem *item, u64 start, 140static int invalidate_page_trampoline(struct ib_umem_odp *item, u64 start,
195 u64 end, void *cookie) 141 u64 end, void *cookie)
196{ 142{
197 ib_umem_notifier_start_account(item); 143 ib_umem_notifier_start_account(item);
198 item->context->invalidate_range(item, start, start + PAGE_SIZE); 144 item->umem.context->invalidate_range(item, start, start + PAGE_SIZE);
199 ib_umem_notifier_end_account(item); 145 ib_umem_notifier_end_account(item);
200 return 0; 146 return 0;
201} 147}
202 148
203static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start, 149static int invalidate_range_start_trampoline(struct ib_umem_odp *item,
204 u64 end, void *cookie) 150 u64 start, u64 end, void *cookie)
205{ 151{
206 ib_umem_notifier_start_account(item); 152 ib_umem_notifier_start_account(item);
207 item->context->invalidate_range(item, start, end); 153 item->umem.context->invalidate_range(item, start, end);
208 return 0; 154 return 0;
209} 155}
210 156
@@ -214,28 +160,30 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
214 unsigned long end, 160 unsigned long end,
215 bool blockable) 161 bool blockable)
216{ 162{
217 struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); 163 struct ib_ucontext_per_mm *per_mm =
218 int ret; 164 container_of(mn, struct ib_ucontext_per_mm, mn);
219
220 if (!context->invalidate_range)
221 return 0;
222 165
223 if (blockable) 166 if (blockable)
224 down_read(&context->umem_rwsem); 167 down_read(&per_mm->umem_rwsem);
225 else if (!down_read_trylock(&context->umem_rwsem)) 168 else if (!down_read_trylock(&per_mm->umem_rwsem))
226 return -EAGAIN; 169 return -EAGAIN;
227 170
228 ib_ucontext_notifier_start_account(context); 171 if (!per_mm->active) {
229 ret = rbt_ib_umem_for_each_in_range(&context->umem_tree, start, 172 up_read(&per_mm->umem_rwsem);
230 end, 173 /*
231 invalidate_range_start_trampoline, 174 * At this point active is permanently set and visible to this
232 blockable, NULL); 175 * CPU without a lock, that fact is relied on to skip the unlock
233 up_read(&context->umem_rwsem); 176 * in range_end.
177 */
178 return 0;
179 }
234 180
235 return ret; 181 return rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start, end,
182 invalidate_range_start_trampoline,
183 blockable, NULL);
236} 184}
237 185
238static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start, 186static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start,
239 u64 end, void *cookie) 187 u64 end, void *cookie)
240{ 188{
241 ib_umem_notifier_end_account(item); 189 ib_umem_notifier_end_account(item);
@@ -247,22 +195,16 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
247 unsigned long start, 195 unsigned long start,
248 unsigned long end) 196 unsigned long end)
249{ 197{
250 struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); 198 struct ib_ucontext_per_mm *per_mm =
199 container_of(mn, struct ib_ucontext_per_mm, mn);
251 200
252 if (!context->invalidate_range) 201 if (unlikely(!per_mm->active))
253 return; 202 return;
254 203
255 /* 204 rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start,
256 * TODO: we currently bail out if there is any sleepable work to be done
257 * in ib_umem_notifier_invalidate_range_start so we shouldn't really block
258 * here. But this is ugly and fragile.
259 */
260 down_read(&context->umem_rwsem);
261 rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
262 end, 205 end,
263 invalidate_range_end_trampoline, true, NULL); 206 invalidate_range_end_trampoline, true, NULL);
264 up_read(&context->umem_rwsem); 207 up_read(&per_mm->umem_rwsem);
265 ib_ucontext_notifier_end_account(context);
266} 208}
267 209
268static const struct mmu_notifier_ops ib_umem_notifiers = { 210static const struct mmu_notifier_ops ib_umem_notifiers = {
@@ -271,31 +213,158 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
271 .invalidate_range_end = ib_umem_notifier_invalidate_range_end, 213 .invalidate_range_end = ib_umem_notifier_invalidate_range_end,
272}; 214};
273 215
274struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, 216static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
275 unsigned long addr,
276 size_t size)
277{ 217{
278 struct ib_umem *umem; 218 struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
219 struct ib_umem *umem = &umem_odp->umem;
220
221 down_write(&per_mm->umem_rwsem);
222 if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
223 rbt_ib_umem_insert(&umem_odp->interval_tree,
224 &per_mm->umem_tree);
225 up_write(&per_mm->umem_rwsem);
226}
227
228static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
229{
230 struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
231 struct ib_umem *umem = &umem_odp->umem;
232
233 down_write(&per_mm->umem_rwsem);
234 if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
235 rbt_ib_umem_remove(&umem_odp->interval_tree,
236 &per_mm->umem_tree);
237 complete_all(&umem_odp->notifier_completion);
238
239 up_write(&per_mm->umem_rwsem);
240}
241
242static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx,
243 struct mm_struct *mm)
244{
245 struct ib_ucontext_per_mm *per_mm;
246 int ret;
247
248 per_mm = kzalloc(sizeof(*per_mm), GFP_KERNEL);
249 if (!per_mm)
250 return ERR_PTR(-ENOMEM);
251
252 per_mm->context = ctx;
253 per_mm->mm = mm;
254 per_mm->umem_tree = RB_ROOT_CACHED;
255 init_rwsem(&per_mm->umem_rwsem);
256 per_mm->active = ctx->invalidate_range;
257
258 rcu_read_lock();
259 per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
260 rcu_read_unlock();
261
262 WARN_ON(mm != current->mm);
263
264 per_mm->mn.ops = &ib_umem_notifiers;
265 ret = mmu_notifier_register(&per_mm->mn, per_mm->mm);
266 if (ret) {
267 dev_err(&ctx->device->dev,
268 "Failed to register mmu_notifier %d\n", ret);
269 goto out_pid;
270 }
271
272 list_add(&per_mm->ucontext_list, &ctx->per_mm_list);
273 return per_mm;
274
275out_pid:
276 put_pid(per_mm->tgid);
277 kfree(per_mm);
278 return ERR_PTR(ret);
279}
280
281static int get_per_mm(struct ib_umem_odp *umem_odp)
282{
283 struct ib_ucontext *ctx = umem_odp->umem.context;
284 struct ib_ucontext_per_mm *per_mm;
285
286 /*
287 * Generally speaking we expect only one or two per_mm in this list,
288 * so no reason to optimize this search today.
289 */
290 mutex_lock(&ctx->per_mm_list_lock);
291 list_for_each_entry(per_mm, &ctx->per_mm_list, ucontext_list) {
292 if (per_mm->mm == umem_odp->umem.owning_mm)
293 goto found;
294 }
295
296 per_mm = alloc_per_mm(ctx, umem_odp->umem.owning_mm);
297 if (IS_ERR(per_mm)) {
298 mutex_unlock(&ctx->per_mm_list_lock);
299 return PTR_ERR(per_mm);
300 }
301
302found:
303 umem_odp->per_mm = per_mm;
304 per_mm->odp_mrs_count++;
305 mutex_unlock(&ctx->per_mm_list_lock);
306
307 return 0;
308}
309
310static void free_per_mm(struct rcu_head *rcu)
311{
312 kfree(container_of(rcu, struct ib_ucontext_per_mm, rcu));
313}
314
315void put_per_mm(struct ib_umem_odp *umem_odp)
316{
317 struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
318 struct ib_ucontext *ctx = umem_odp->umem.context;
319 bool need_free;
320
321 mutex_lock(&ctx->per_mm_list_lock);
322 umem_odp->per_mm = NULL;
323 per_mm->odp_mrs_count--;
324 need_free = per_mm->odp_mrs_count == 0;
325 if (need_free)
326 list_del(&per_mm->ucontext_list);
327 mutex_unlock(&ctx->per_mm_list_lock);
328
329 if (!need_free)
330 return;
331
332 /*
333 * NOTE! mmu_notifier_unregister() can happen between a start/end
334 * callback, resulting in an start/end, and thus an unbalanced
335 * lock. This doesn't really matter to us since we are about to kfree
336 * the memory that holds the lock, however LOCKDEP doesn't like this.
337 */
338 down_write(&per_mm->umem_rwsem);
339 per_mm->active = false;
340 up_write(&per_mm->umem_rwsem);
341
342 WARN_ON(!RB_EMPTY_ROOT(&per_mm->umem_tree.rb_root));
343 mmu_notifier_unregister_no_release(&per_mm->mn, per_mm->mm);
344 put_pid(per_mm->tgid);
345 mmu_notifier_call_srcu(&per_mm->rcu, free_per_mm);
346}
347
348struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
349 unsigned long addr, size_t size)
350{
351 struct ib_ucontext *ctx = per_mm->context;
279 struct ib_umem_odp *odp_data; 352 struct ib_umem_odp *odp_data;
353 struct ib_umem *umem;
280 int pages = size >> PAGE_SHIFT; 354 int pages = size >> PAGE_SHIFT;
281 int ret; 355 int ret;
282 356
283 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 357 odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
284 if (!umem) 358 if (!odp_data)
285 return ERR_PTR(-ENOMEM); 359 return ERR_PTR(-ENOMEM);
286 360 umem = &odp_data->umem;
287 umem->context = context; 361 umem->context = ctx;
288 umem->length = size; 362 umem->length = size;
289 umem->address = addr; 363 umem->address = addr;
290 umem->page_shift = PAGE_SHIFT; 364 umem->page_shift = PAGE_SHIFT;
291 umem->writable = 1; 365 umem->writable = 1;
292 366 umem->is_odp = 1;
293 odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL); 367 odp_data->per_mm = per_mm;
294 if (!odp_data) {
295 ret = -ENOMEM;
296 goto out_umem;
297 }
298 odp_data->umem = umem;
299 368
300 mutex_init(&odp_data->umem_mutex); 369 mutex_init(&odp_data->umem_mutex);
301 init_completion(&odp_data->notifier_completion); 370 init_completion(&odp_data->notifier_completion);
@@ -314,39 +383,34 @@ struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
314 goto out_page_list; 383 goto out_page_list;
315 } 384 }
316 385
317 down_write(&context->umem_rwsem); 386 /*
318 context->odp_mrs_count++; 387 * Caller must ensure that the umem_odp that the per_mm came from
319 rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree); 388 * cannot be freed during the call to ib_alloc_odp_umem.
320 if (likely(!atomic_read(&context->notifier_count))) 389 */
321 odp_data->mn_counters_active = true; 390 mutex_lock(&ctx->per_mm_list_lock);
322 else 391 per_mm->odp_mrs_count++;
323 list_add(&odp_data->no_private_counters, 392 mutex_unlock(&ctx->per_mm_list_lock);
324 &context->no_private_counters); 393 add_umem_to_per_mm(odp_data);
325 up_write(&context->umem_rwsem);
326
327 umem->odp_data = odp_data;
328 394
329 return umem; 395 return odp_data;
330 396
331out_page_list: 397out_page_list:
332 vfree(odp_data->page_list); 398 vfree(odp_data->page_list);
333out_odp_data: 399out_odp_data:
334 kfree(odp_data); 400 kfree(odp_data);
335out_umem:
336 kfree(umem);
337 return ERR_PTR(ret); 401 return ERR_PTR(ret);
338} 402}
339EXPORT_SYMBOL(ib_alloc_odp_umem); 403EXPORT_SYMBOL(ib_alloc_odp_umem);
340 404
341int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem, 405int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
342 int access)
343{ 406{
407 struct ib_umem *umem = &umem_odp->umem;
408 /*
409 * NOTE: This must called in a process context where umem->owning_mm
410 * == current->mm
411 */
412 struct mm_struct *mm = umem->owning_mm;
344 int ret_val; 413 int ret_val;
345 struct pid *our_pid;
346 struct mm_struct *mm = get_task_mm(current);
347
348 if (!mm)
349 return -EINVAL;
350 414
351 if (access & IB_ACCESS_HUGETLB) { 415 if (access & IB_ACCESS_HUGETLB) {
352 struct vm_area_struct *vma; 416 struct vm_area_struct *vma;
@@ -366,111 +430,43 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
366 umem->hugetlb = 0; 430 umem->hugetlb = 0;
367 } 431 }
368 432
369 /* Prevent creating ODP MRs in child processes */ 433 mutex_init(&umem_odp->umem_mutex);
370 rcu_read_lock();
371 our_pid = get_task_pid(current->group_leader, PIDTYPE_PID);
372 rcu_read_unlock();
373 put_pid(our_pid);
374 if (context->tgid != our_pid) {
375 ret_val = -EINVAL;
376 goto out_mm;
377 }
378
379 umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL);
380 if (!umem->odp_data) {
381 ret_val = -ENOMEM;
382 goto out_mm;
383 }
384 umem->odp_data->umem = umem;
385
386 mutex_init(&umem->odp_data->umem_mutex);
387 434
388 init_completion(&umem->odp_data->notifier_completion); 435 init_completion(&umem_odp->notifier_completion);
389 436
390 if (ib_umem_num_pages(umem)) { 437 if (ib_umem_num_pages(umem)) {
391 umem->odp_data->page_list = 438 umem_odp->page_list =
392 vzalloc(array_size(sizeof(*umem->odp_data->page_list), 439 vzalloc(array_size(sizeof(*umem_odp->page_list),
393 ib_umem_num_pages(umem))); 440 ib_umem_num_pages(umem)));
394 if (!umem->odp_data->page_list) { 441 if (!umem_odp->page_list)
395 ret_val = -ENOMEM; 442 return -ENOMEM;
396 goto out_odp_data;
397 }
398 443
399 umem->odp_data->dma_list = 444 umem_odp->dma_list =
400 vzalloc(array_size(sizeof(*umem->odp_data->dma_list), 445 vzalloc(array_size(sizeof(*umem_odp->dma_list),
401 ib_umem_num_pages(umem))); 446 ib_umem_num_pages(umem)));
402 if (!umem->odp_data->dma_list) { 447 if (!umem_odp->dma_list) {
403 ret_val = -ENOMEM; 448 ret_val = -ENOMEM;
404 goto out_page_list; 449 goto out_page_list;
405 } 450 }
406 } 451 }
407 452
408 /* 453 ret_val = get_per_mm(umem_odp);
409 * When using MMU notifiers, we will get a 454 if (ret_val)
410 * notification before the "current" task (and MM) is 455 goto out_dma_list;
411 * destroyed. We use the umem_rwsem semaphore to synchronize. 456 add_umem_to_per_mm(umem_odp);
412 */
413 down_write(&context->umem_rwsem);
414 context->odp_mrs_count++;
415 if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
416 rbt_ib_umem_insert(&umem->odp_data->interval_tree,
417 &context->umem_tree);
418 if (likely(!atomic_read(&context->notifier_count)) ||
419 context->odp_mrs_count == 1)
420 umem->odp_data->mn_counters_active = true;
421 else
422 list_add(&umem->odp_data->no_private_counters,
423 &context->no_private_counters);
424 downgrade_write(&context->umem_rwsem);
425
426 if (context->odp_mrs_count == 1) {
427 /*
428 * Note that at this point, no MMU notifier is running
429 * for this context!
430 */
431 atomic_set(&context->notifier_count, 0);
432 INIT_HLIST_NODE(&context->mn.hlist);
433 context->mn.ops = &ib_umem_notifiers;
434 /*
435 * Lock-dep detects a false positive for mmap_sem vs.
436 * umem_rwsem, due to not grasping downgrade_write correctly.
437 */
438 lockdep_off();
439 ret_val = mmu_notifier_register(&context->mn, mm);
440 lockdep_on();
441 if (ret_val) {
442 pr_err("Failed to register mmu_notifier %d\n", ret_val);
443 ret_val = -EBUSY;
444 goto out_mutex;
445 }
446 }
447
448 up_read(&context->umem_rwsem);
449 457
450 /*
451 * Note that doing an mmput can cause a notifier for the relevant mm.
452 * If the notifier is called while we hold the umem_rwsem, this will
453 * cause a deadlock. Therefore, we release the reference only after we
454 * released the semaphore.
455 */
456 mmput(mm);
457 return 0; 458 return 0;
458 459
459out_mutex: 460out_dma_list:
460 up_read(&context->umem_rwsem); 461 vfree(umem_odp->dma_list);
461 vfree(umem->odp_data->dma_list);
462out_page_list: 462out_page_list:
463 vfree(umem->odp_data->page_list); 463 vfree(umem_odp->page_list);
464out_odp_data:
465 kfree(umem->odp_data);
466out_mm:
467 mmput(mm);
468 return ret_val; 464 return ret_val;
469} 465}
470 466
471void ib_umem_odp_release(struct ib_umem *umem) 467void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
472{ 468{
473 struct ib_ucontext *context = umem->context; 469 struct ib_umem *umem = &umem_odp->umem;
474 470
475 /* 471 /*
476 * Ensure that no more pages are mapped in the umem. 472 * Ensure that no more pages are mapped in the umem.
@@ -478,61 +474,13 @@ void ib_umem_odp_release(struct ib_umem *umem)
478 * It is the driver's responsibility to ensure, before calling us, 474 * It is the driver's responsibility to ensure, before calling us,
479 * that the hardware will not attempt to access the MR any more. 475 * that the hardware will not attempt to access the MR any more.
480 */ 476 */
481 ib_umem_odp_unmap_dma_pages(umem, ib_umem_start(umem), 477 ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
482 ib_umem_end(umem)); 478 ib_umem_end(umem));
483 479
484 down_write(&context->umem_rwsem); 480 remove_umem_from_per_mm(umem_odp);
485 if (likely(ib_umem_start(umem) != ib_umem_end(umem))) 481 put_per_mm(umem_odp);
486 rbt_ib_umem_remove(&umem->odp_data->interval_tree, 482 vfree(umem_odp->dma_list);
487 &context->umem_tree); 483 vfree(umem_odp->page_list);
488 context->odp_mrs_count--;
489 if (!umem->odp_data->mn_counters_active) {
490 list_del(&umem->odp_data->no_private_counters);
491 complete_all(&umem->odp_data->notifier_completion);
492 }
493
494 /*
495 * Downgrade the lock to a read lock. This ensures that the notifiers
496 * (who lock the mutex for reading) will be able to finish, and we
497 * will be able to enventually obtain the mmu notifiers SRCU. Note
498 * that since we are doing it atomically, no other user could register
499 * and unregister while we do the check.
500 */
501 downgrade_write(&context->umem_rwsem);
502 if (!context->odp_mrs_count) {
503 struct task_struct *owning_process = NULL;
504 struct mm_struct *owning_mm = NULL;
505
506 owning_process = get_pid_task(context->tgid,
507 PIDTYPE_PID);
508 if (owning_process == NULL)
509 /*
510 * The process is already dead, notifier were removed
511 * already.
512 */
513 goto out;
514
515 owning_mm = get_task_mm(owning_process);
516 if (owning_mm == NULL)
517 /*
518 * The process' mm is already dead, notifier were
519 * removed already.
520 */
521 goto out_put_task;
522 mmu_notifier_unregister(&context->mn, owning_mm);
523
524 mmput(owning_mm);
525
526out_put_task:
527 put_task_struct(owning_process);
528 }
529out:
530 up_read(&context->umem_rwsem);
531
532 vfree(umem->odp_data->dma_list);
533 vfree(umem->odp_data->page_list);
534 kfree(umem->odp_data);
535 kfree(umem);
536} 484}
537 485
538/* 486/*
@@ -544,7 +492,7 @@ out:
544 * @access_mask: access permissions needed for this page. 492 * @access_mask: access permissions needed for this page.
545 * @current_seq: sequence number for synchronization with invalidations. 493 * @current_seq: sequence number for synchronization with invalidations.
546 * the sequence number is taken from 494 * the sequence number is taken from
547 * umem->odp_data->notifiers_seq. 495 * umem_odp->notifiers_seq.
548 * 496 *
549 * The function returns -EFAULT if the DMA mapping operation fails. It returns 497 * The function returns -EFAULT if the DMA mapping operation fails. It returns
550 * -EAGAIN if a concurrent invalidation prevents us from updating the page. 498 * -EAGAIN if a concurrent invalidation prevents us from updating the page.
@@ -554,12 +502,13 @@ out:
554 * umem. 502 * umem.
555 */ 503 */
556static int ib_umem_odp_map_dma_single_page( 504static int ib_umem_odp_map_dma_single_page(
557 struct ib_umem *umem, 505 struct ib_umem_odp *umem_odp,
558 int page_index, 506 int page_index,
559 struct page *page, 507 struct page *page,
560 u64 access_mask, 508 u64 access_mask,
561 unsigned long current_seq) 509 unsigned long current_seq)
562{ 510{
511 struct ib_umem *umem = &umem_odp->umem;
563 struct ib_device *dev = umem->context->device; 512 struct ib_device *dev = umem->context->device;
564 dma_addr_t dma_addr; 513 dma_addr_t dma_addr;
565 int stored_page = 0; 514 int stored_page = 0;
@@ -571,11 +520,11 @@ static int ib_umem_odp_map_dma_single_page(
571 * handle case of a racing notifier. This check also allows us to bail 520 * handle case of a racing notifier. This check also allows us to bail
572 * early if we have a notifier running in parallel with us. 521 * early if we have a notifier running in parallel with us.
573 */ 522 */
574 if (ib_umem_mmu_notifier_retry(umem, current_seq)) { 523 if (ib_umem_mmu_notifier_retry(umem_odp, current_seq)) {
575 ret = -EAGAIN; 524 ret = -EAGAIN;
576 goto out; 525 goto out;
577 } 526 }
578 if (!(umem->odp_data->dma_list[page_index])) { 527 if (!(umem_odp->dma_list[page_index])) {
579 dma_addr = ib_dma_map_page(dev, 528 dma_addr = ib_dma_map_page(dev,
580 page, 529 page,
581 0, BIT(umem->page_shift), 530 0, BIT(umem->page_shift),
@@ -584,15 +533,15 @@ static int ib_umem_odp_map_dma_single_page(
584 ret = -EFAULT; 533 ret = -EFAULT;
585 goto out; 534 goto out;
586 } 535 }
587 umem->odp_data->dma_list[page_index] = dma_addr | access_mask; 536 umem_odp->dma_list[page_index] = dma_addr | access_mask;
588 umem->odp_data->page_list[page_index] = page; 537 umem_odp->page_list[page_index] = page;
589 umem->npages++; 538 umem->npages++;
590 stored_page = 1; 539 stored_page = 1;
591 } else if (umem->odp_data->page_list[page_index] == page) { 540 } else if (umem_odp->page_list[page_index] == page) {
592 umem->odp_data->dma_list[page_index] |= access_mask; 541 umem_odp->dma_list[page_index] |= access_mask;
593 } else { 542 } else {
594 pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n", 543 pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
595 umem->odp_data->page_list[page_index], page); 544 umem_odp->page_list[page_index], page);
596 /* Better remove the mapping now, to prevent any further 545 /* Better remove the mapping now, to prevent any further
597 * damage. */ 546 * damage. */
598 remove_existing_mapping = 1; 547 remove_existing_mapping = 1;
@@ -605,7 +554,7 @@ out:
605 554
606 if (remove_existing_mapping && umem->context->invalidate_range) { 555 if (remove_existing_mapping && umem->context->invalidate_range) {
607 invalidate_page_trampoline( 556 invalidate_page_trampoline(
608 umem, 557 umem_odp,
609 ib_umem_start(umem) + (page_index >> umem->page_shift), 558 ib_umem_start(umem) + (page_index >> umem->page_shift),
610 ib_umem_start(umem) + ((page_index + 1) >> 559 ib_umem_start(umem) + ((page_index + 1) >>
611 umem->page_shift), 560 umem->page_shift),
@@ -621,7 +570,7 @@ out:
621 * 570 *
622 * Pins the range of pages passed in the argument, and maps them to 571 * Pins the range of pages passed in the argument, and maps them to
623 * DMA addresses. The DMA addresses of the mapped pages is updated in 572 * DMA addresses. The DMA addresses of the mapped pages is updated in
624 * umem->odp_data->dma_list. 573 * umem_odp->dma_list.
625 * 574 *
626 * Returns the number of pages mapped in success, negative error code 575 * Returns the number of pages mapped in success, negative error code
627 * for failure. 576 * for failure.
@@ -629,7 +578,7 @@ out:
629 * the function from completing its task. 578 * the function from completing its task.
630 * An -ENOENT error code indicates that userspace process is being terminated 579 * An -ENOENT error code indicates that userspace process is being terminated
631 * and mm was already destroyed. 580 * and mm was already destroyed.
632 * @umem: the umem to map and pin 581 * @umem_odp: the umem to map and pin
633 * @user_virt: the address from which we need to map. 582 * @user_virt: the address from which we need to map.
634 * @bcnt: the minimal number of bytes to pin and map. The mapping might be 583 * @bcnt: the minimal number of bytes to pin and map. The mapping might be
635 * bigger due to alignment, and may also be smaller in case of an error 584 * bigger due to alignment, and may also be smaller in case of an error
@@ -639,13 +588,15 @@ out:
639 * range. 588 * range.
640 * @current_seq: the MMU notifiers sequance value for synchronization with 589 * @current_seq: the MMU notifiers sequance value for synchronization with
641 * invalidations. the sequance number is read from 590 * invalidations. the sequance number is read from
642 * umem->odp_data->notifiers_seq before calling this function 591 * umem_odp->notifiers_seq before calling this function
643 */ 592 */
644int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, 593int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
645 u64 access_mask, unsigned long current_seq) 594 u64 bcnt, u64 access_mask,
595 unsigned long current_seq)
646{ 596{
597 struct ib_umem *umem = &umem_odp->umem;
647 struct task_struct *owning_process = NULL; 598 struct task_struct *owning_process = NULL;
648 struct mm_struct *owning_mm = NULL; 599 struct mm_struct *owning_mm = umem_odp->umem.owning_mm;
649 struct page **local_page_list = NULL; 600 struct page **local_page_list = NULL;
650 u64 page_mask, off; 601 u64 page_mask, off;
651 int j, k, ret = 0, start_idx, npages = 0, page_shift; 602 int j, k, ret = 0, start_idx, npages = 0, page_shift;
@@ -669,15 +620,14 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
669 user_virt = user_virt & page_mask; 620 user_virt = user_virt & page_mask;
670 bcnt += off; /* Charge for the first page offset as well. */ 621 bcnt += off; /* Charge for the first page offset as well. */
671 622
672 owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID); 623 /*
673 if (owning_process == NULL) { 624 * owning_process is allowed to be NULL, this means somehow the mm is
625 * existing beyond the lifetime of the originating process.. Presumably
626 * mmget_not_zero will fail in this case.
627 */
628 owning_process = get_pid_task(umem_odp->per_mm->tgid, PIDTYPE_PID);
629 if (WARN_ON(!mmget_not_zero(umem_odp->umem.owning_mm))) {
674 ret = -EINVAL; 630 ret = -EINVAL;
675 goto out_no_task;
676 }
677
678 owning_mm = get_task_mm(owning_process);
679 if (owning_mm == NULL) {
680 ret = -ENOENT;
681 goto out_put_task; 631 goto out_put_task;
682 } 632 }
683 633
@@ -709,7 +659,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
709 break; 659 break;
710 660
711 bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt); 661 bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
712 mutex_lock(&umem->odp_data->umem_mutex); 662 mutex_lock(&umem_odp->umem_mutex);
713 for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) { 663 for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
714 if (user_virt & ~page_mask) { 664 if (user_virt & ~page_mask) {
715 p += PAGE_SIZE; 665 p += PAGE_SIZE;
@@ -722,7 +672,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
722 } 672 }
723 673
724 ret = ib_umem_odp_map_dma_single_page( 674 ret = ib_umem_odp_map_dma_single_page(
725 umem, k, local_page_list[j], 675 umem_odp, k, local_page_list[j],
726 access_mask, current_seq); 676 access_mask, current_seq);
727 if (ret < 0) 677 if (ret < 0)
728 break; 678 break;
@@ -730,7 +680,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
730 p = page_to_phys(local_page_list[j]); 680 p = page_to_phys(local_page_list[j]);
731 k++; 681 k++;
732 } 682 }
733 mutex_unlock(&umem->odp_data->umem_mutex); 683 mutex_unlock(&umem_odp->umem_mutex);
734 684
735 if (ret < 0) { 685 if (ret < 0) {
736 /* Release left over pages when handling errors. */ 686 /* Release left over pages when handling errors. */
@@ -749,16 +699,17 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
749 699
750 mmput(owning_mm); 700 mmput(owning_mm);
751out_put_task: 701out_put_task:
752 put_task_struct(owning_process); 702 if (owning_process)
753out_no_task: 703 put_task_struct(owning_process);
754 free_page((unsigned long)local_page_list); 704 free_page((unsigned long)local_page_list);
755 return ret; 705 return ret;
756} 706}
757EXPORT_SYMBOL(ib_umem_odp_map_dma_pages); 707EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
758 708
759void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, 709void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
760 u64 bound) 710 u64 bound)
761{ 711{
712 struct ib_umem *umem = &umem_odp->umem;
762 int idx; 713 int idx;
763 u64 addr; 714 u64 addr;
764 struct ib_device *dev = umem->context->device; 715 struct ib_device *dev = umem->context->device;
@@ -770,12 +721,12 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
770 * faults from completion. We might be racing with other 721 * faults from completion. We might be racing with other
771 * invalidations, so we must make sure we free each page only 722 * invalidations, so we must make sure we free each page only
772 * once. */ 723 * once. */
773 mutex_lock(&umem->odp_data->umem_mutex); 724 mutex_lock(&umem_odp->umem_mutex);
774 for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) { 725 for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
775 idx = (addr - ib_umem_start(umem)) >> umem->page_shift; 726 idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
776 if (umem->odp_data->page_list[idx]) { 727 if (umem_odp->page_list[idx]) {
777 struct page *page = umem->odp_data->page_list[idx]; 728 struct page *page = umem_odp->page_list[idx];
778 dma_addr_t dma = umem->odp_data->dma_list[idx]; 729 dma_addr_t dma = umem_odp->dma_list[idx];
779 dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK; 730 dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
780 731
781 WARN_ON(!dma_addr); 732 WARN_ON(!dma_addr);
@@ -798,12 +749,12 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
798 /* on demand pinning support */ 749 /* on demand pinning support */
799 if (!umem->context->invalidate_range) 750 if (!umem->context->invalidate_range)
800 put_page(page); 751 put_page(page);
801 umem->odp_data->page_list[idx] = NULL; 752 umem_odp->page_list[idx] = NULL;
802 umem->odp_data->dma_list[idx] = 0; 753 umem_odp->dma_list[idx] = 0;
803 umem->npages--; 754 umem->npages--;
804 } 755 }
805 } 756 }
806 mutex_unlock(&umem->odp_data->umem_mutex); 757 mutex_unlock(&umem_odp->umem_mutex);
807} 758}
808EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); 759EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
809 760
@@ -830,7 +781,7 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
830 return -EAGAIN; 781 return -EAGAIN;
831 next = rbt_ib_umem_iter_next(node, start, last - 1); 782 next = rbt_ib_umem_iter_next(node, start, last - 1);
832 umem = container_of(node, struct ib_umem_odp, interval_tree); 783 umem = container_of(node, struct ib_umem_odp, interval_tree);
833 ret_val = cb(umem->umem, start, last, cookie) || ret_val; 784 ret_val = cb(umem, start, last, cookie) || ret_val;
834 } 785 }
835 786
836 return ret_val; 787 return ret_val;
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index c34a6852d691..f55f48f6b272 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -138,7 +138,7 @@ static const dev_t base_issm_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE) +
138static dev_t dynamic_umad_dev; 138static dev_t dynamic_umad_dev;
139static dev_t dynamic_issm_dev; 139static dev_t dynamic_issm_dev;
140 140
141static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); 141static DEFINE_IDA(umad_ida);
142 142
143static void ib_umad_add_one(struct ib_device *device); 143static void ib_umad_add_one(struct ib_device *device);
144static void ib_umad_remove_one(struct ib_device *device, void *client_data); 144static void ib_umad_remove_one(struct ib_device *device, void *client_data);
@@ -1132,7 +1132,7 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
1132 if (!port) 1132 if (!port)
1133 return -ENODEV; 1133 return -ENODEV;
1134 1134
1135 return sprintf(buf, "%s\n", port->ib_dev->name); 1135 return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev));
1136} 1136}
1137static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 1137static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
1138 1138
@@ -1159,11 +1159,10 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
1159 dev_t base_umad; 1159 dev_t base_umad;
1160 dev_t base_issm; 1160 dev_t base_issm;
1161 1161
1162 devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); 1162 devnum = ida_alloc_max(&umad_ida, IB_UMAD_MAX_PORTS - 1, GFP_KERNEL);
1163 if (devnum >= IB_UMAD_MAX_PORTS) 1163 if (devnum < 0)
1164 return -1; 1164 return -1;
1165 port->dev_num = devnum; 1165 port->dev_num = devnum;
1166 set_bit(devnum, dev_map);
1167 if (devnum >= IB_UMAD_NUM_FIXED_MINOR) { 1166 if (devnum >= IB_UMAD_NUM_FIXED_MINOR) {
1168 base_umad = dynamic_umad_dev + devnum - IB_UMAD_NUM_FIXED_MINOR; 1167 base_umad = dynamic_umad_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
1169 base_issm = dynamic_issm_dev + devnum - IB_UMAD_NUM_FIXED_MINOR; 1168 base_issm = dynamic_issm_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
@@ -1227,7 +1226,7 @@ err_dev:
1227 1226
1228err_cdev: 1227err_cdev:
1229 cdev_del(&port->cdev); 1228 cdev_del(&port->cdev);
1230 clear_bit(devnum, dev_map); 1229 ida_free(&umad_ida, devnum);
1231 1230
1232 return -1; 1231 return -1;
1233} 1232}
@@ -1261,7 +1260,7 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
1261 } 1260 }
1262 1261
1263 mutex_unlock(&port->file_mutex); 1262 mutex_unlock(&port->file_mutex);
1264 clear_bit(port->dev_num, dev_map); 1263 ida_free(&umad_ida, port->dev_num);
1265} 1264}
1266 1265
1267static void ib_umad_add_one(struct ib_device *device) 1266static void ib_umad_add_one(struct ib_device *device)
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 5df8e548cc14..c97935a0c7c6 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -100,13 +100,14 @@ struct ib_uverbs_device {
100 atomic_t refcount; 100 atomic_t refcount;
101 int num_comp_vectors; 101 int num_comp_vectors;
102 struct completion comp; 102 struct completion comp;
103 struct device *dev; 103 struct device dev;
104 /* First group for device attributes, NULL terminated array */
105 const struct attribute_group *groups[2];
104 struct ib_device __rcu *ib_dev; 106 struct ib_device __rcu *ib_dev;
105 int devnum; 107 int devnum;
106 struct cdev cdev; 108 struct cdev cdev;
107 struct rb_root xrcd_tree; 109 struct rb_root xrcd_tree;
108 struct mutex xrcd_tree_mutex; 110 struct mutex xrcd_tree_mutex;
109 struct kobject kobj;
110 struct srcu_struct disassociate_srcu; 111 struct srcu_struct disassociate_srcu;
111 struct mutex lists_mutex; /* protect lists */ 112 struct mutex lists_mutex; /* protect lists */
112 struct list_head uverbs_file_list; 113 struct list_head uverbs_file_list;
@@ -146,7 +147,6 @@ struct ib_uverbs_file {
146 struct ib_event_handler event_handler; 147 struct ib_event_handler event_handler;
147 struct ib_uverbs_async_event_file *async_file; 148 struct ib_uverbs_async_event_file *async_file;
148 struct list_head list; 149 struct list_head list;
149 int is_closed;
150 150
151 /* 151 /*
152 * To access the uobjects list hw_destroy_rwsem must be held for write 152 * To access the uobjects list hw_destroy_rwsem must be held for write
@@ -158,6 +158,9 @@ struct ib_uverbs_file {
158 spinlock_t uobjects_lock; 158 spinlock_t uobjects_lock;
159 struct list_head uobjects; 159 struct list_head uobjects;
160 160
161 struct mutex umap_lock;
162 struct list_head umaps;
163
161 u64 uverbs_cmd_mask; 164 u64 uverbs_cmd_mask;
162 u64 uverbs_ex_cmd_mask; 165 u64 uverbs_ex_cmd_mask;
163 166
@@ -218,12 +221,6 @@ struct ib_ucq_object {
218 u32 async_events_reported; 221 u32 async_events_reported;
219}; 222};
220 223
221struct ib_uflow_resources;
222struct ib_uflow_object {
223 struct ib_uobject uobject;
224 struct ib_uflow_resources *resources;
225};
226
227extern const struct file_operations uverbs_event_fops; 224extern const struct file_operations uverbs_event_fops;
228void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue); 225void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
229struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, 226struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index e012ca80f9d1..a93853770e3c 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -117,18 +117,12 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
117 /* ufile is required when some objects are released */ 117 /* ufile is required when some objects are released */
118 ucontext->ufile = file; 118 ucontext->ufile = file;
119 119
120 rcu_read_lock(); 120 ucontext->closing = false;
121 ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
122 rcu_read_unlock();
123 ucontext->closing = 0;
124 ucontext->cleanup_retryable = false; 121 ucontext->cleanup_retryable = false;
125 122
126#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 123#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
127 ucontext->umem_tree = RB_ROOT_CACHED; 124 mutex_init(&ucontext->per_mm_list_lock);
128 init_rwsem(&ucontext->umem_rwsem); 125 INIT_LIST_HEAD(&ucontext->per_mm_list);
129 ucontext->odp_mrs_count = 0;
130 INIT_LIST_HEAD(&ucontext->no_private_counters);
131
132 if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) 126 if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
133 ucontext->invalidate_range = NULL; 127 ucontext->invalidate_range = NULL;
134 128
@@ -172,7 +166,6 @@ err_fd:
172 put_unused_fd(resp.async_fd); 166 put_unused_fd(resp.async_fd);
173 167
174err_free: 168err_free:
175 put_pid(ucontext->tgid);
176 ib_dev->dealloc_ucontext(ucontext); 169 ib_dev->dealloc_ucontext(ucontext);
177 170
178err_alloc: 171err_alloc:
@@ -2769,16 +2762,7 @@ out_put:
2769 return ret ? ret : in_len; 2762 return ret ? ret : in_len;
2770} 2763}
2771 2764
2772struct ib_uflow_resources { 2765struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
2773 size_t max;
2774 size_t num;
2775 size_t collection_num;
2776 size_t counters_num;
2777 struct ib_counters **counters;
2778 struct ib_flow_action **collection;
2779};
2780
2781static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
2782{ 2766{
2783 struct ib_uflow_resources *resources; 2767 struct ib_uflow_resources *resources;
2784 2768
@@ -2808,6 +2792,7 @@ err:
2808 2792
2809 return NULL; 2793 return NULL;
2810} 2794}
2795EXPORT_SYMBOL(flow_resources_alloc);
2811 2796
2812void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res) 2797void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
2813{ 2798{
@@ -2826,10 +2811,11 @@ void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
2826 kfree(uflow_res->counters); 2811 kfree(uflow_res->counters);
2827 kfree(uflow_res); 2812 kfree(uflow_res);
2828} 2813}
2814EXPORT_SYMBOL(ib_uverbs_flow_resources_free);
2829 2815
2830static void flow_resources_add(struct ib_uflow_resources *uflow_res, 2816void flow_resources_add(struct ib_uflow_resources *uflow_res,
2831 enum ib_flow_spec_type type, 2817 enum ib_flow_spec_type type,
2832 void *ibobj) 2818 void *ibobj)
2833{ 2819{
2834 WARN_ON(uflow_res->num >= uflow_res->max); 2820 WARN_ON(uflow_res->num >= uflow_res->max);
2835 2821
@@ -2850,6 +2836,7 @@ static void flow_resources_add(struct ib_uflow_resources *uflow_res,
2850 2836
2851 uflow_res->num++; 2837 uflow_res->num++;
2852} 2838}
2839EXPORT_SYMBOL(flow_resources_add);
2853 2840
2854static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile, 2841static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
2855 struct ib_uverbs_flow_spec *kern_spec, 2842 struct ib_uverbs_flow_spec *kern_spec,
@@ -3484,7 +3471,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
3484 struct ib_uverbs_create_flow cmd; 3471 struct ib_uverbs_create_flow cmd;
3485 struct ib_uverbs_create_flow_resp resp; 3472 struct ib_uverbs_create_flow_resp resp;
3486 struct ib_uobject *uobj; 3473 struct ib_uobject *uobj;
3487 struct ib_uflow_object *uflow;
3488 struct ib_flow *flow_id; 3474 struct ib_flow *flow_id;
3489 struct ib_uverbs_flow_attr *kern_flow_attr; 3475 struct ib_uverbs_flow_attr *kern_flow_attr;
3490 struct ib_flow_attr *flow_attr; 3476 struct ib_flow_attr *flow_attr;
@@ -3623,13 +3609,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
3623 err = PTR_ERR(flow_id); 3609 err = PTR_ERR(flow_id);
3624 goto err_free; 3610 goto err_free;
3625 } 3611 }
3626 atomic_inc(&qp->usecnt); 3612
3627 flow_id->qp = qp; 3613 ib_set_flow(uobj, flow_id, qp, qp->device, uflow_res);
3628 flow_id->device = qp->device;
3629 flow_id->uobject = uobj;
3630 uobj->object = flow_id;
3631 uflow = container_of(uobj, typeof(*uflow), uobject);
3632 uflow->resources = uflow_res;
3633 3614
3634 memset(&resp, 0, sizeof(resp)); 3615 memset(&resp, 0, sizeof(resp));
3635 resp.flow_handle = uobj->id; 3616 resp.flow_handle = uobj->id;
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 1a6b229e3db3..b0e493e8d860 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -57,6 +57,7 @@ struct bundle_priv {
57 struct ib_uverbs_attr *uattrs; 57 struct ib_uverbs_attr *uattrs;
58 58
59 DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN); 59 DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN);
60 DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN);
60 61
61 /* 62 /*
62 * Must be last. bundle ends in a flex array which overlaps 63 * Must be last. bundle ends in a flex array which overlaps
@@ -143,6 +144,86 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
143 0, uattr->len - len); 144 0, uattr->len - len);
144} 145}
145 146
147static int uverbs_process_idrs_array(struct bundle_priv *pbundle,
148 const struct uverbs_api_attr *attr_uapi,
149 struct uverbs_objs_arr_attr *attr,
150 struct ib_uverbs_attr *uattr,
151 u32 attr_bkey)
152{
153 const struct uverbs_attr_spec *spec = &attr_uapi->spec;
154 size_t array_len;
155 u32 *idr_vals;
156 int ret = 0;
157 size_t i;
158
159 if (uattr->attr_data.reserved)
160 return -EINVAL;
161
162 if (uattr->len % sizeof(u32))
163 return -EINVAL;
164
165 array_len = uattr->len / sizeof(u32);
166 if (array_len < spec->u2.objs_arr.min_len ||
167 array_len > spec->u2.objs_arr.max_len)
168 return -EINVAL;
169
170 attr->uobjects =
171 uverbs_alloc(&pbundle->bundle,
172 array_size(array_len, sizeof(*attr->uobjects)));
173 if (IS_ERR(attr->uobjects))
174 return PTR_ERR(attr->uobjects);
175
176 /*
177 * Since idr is 4B and *uobjects is >= 4B, we can use attr->uobjects
178 * to store idrs array and avoid additional memory allocation. The
179 * idrs array is offset to the end of the uobjects array so we will be
180 * able to read idr and replace with a pointer.
181 */
182 idr_vals = (u32 *)(attr->uobjects + array_len) - array_len;
183
184 if (uattr->len > sizeof(uattr->data)) {
185 ret = copy_from_user(idr_vals, u64_to_user_ptr(uattr->data),
186 uattr->len);
187 if (ret)
188 return -EFAULT;
189 } else {
190 memcpy(idr_vals, &uattr->data, uattr->len);
191 }
192
193 for (i = 0; i != array_len; i++) {
194 attr->uobjects[i] = uverbs_get_uobject_from_file(
195 spec->u2.objs_arr.obj_type, pbundle->bundle.ufile,
196 spec->u2.objs_arr.access, idr_vals[i]);
197 if (IS_ERR(attr->uobjects[i])) {
198 ret = PTR_ERR(attr->uobjects[i]);
199 break;
200 }
201 }
202
203 attr->len = i;
204 __set_bit(attr_bkey, pbundle->spec_finalize);
205 return ret;
206}
207
208static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi,
209 struct uverbs_objs_arr_attr *attr,
210 bool commit)
211{
212 const struct uverbs_attr_spec *spec = &attr_uapi->spec;
213 int current_ret;
214 int ret = 0;
215 size_t i;
216
217 for (i = 0; i != attr->len; i++) {
218 current_ret = uverbs_finalize_object(
219 attr->uobjects[i], spec->u2.objs_arr.access, commit);
220 if (!ret)
221 ret = current_ret;
222 }
223
224 return ret;
225}
226
146static int uverbs_process_attr(struct bundle_priv *pbundle, 227static int uverbs_process_attr(struct bundle_priv *pbundle,
147 const struct uverbs_api_attr *attr_uapi, 228 const struct uverbs_api_attr *attr_uapi,
148 struct ib_uverbs_attr *uattr, u32 attr_bkey) 229 struct ib_uverbs_attr *uattr, u32 attr_bkey)
@@ -246,6 +327,11 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
246 } 327 }
247 328
248 break; 329 break;
330
331 case UVERBS_ATTR_TYPE_IDRS_ARRAY:
332 return uverbs_process_idrs_array(pbundle, attr_uapi,
333 &e->objs_arr_attr, uattr,
334 attr_bkey);
249 default: 335 default:
250 return -EOPNOTSUPP; 336 return -EOPNOTSUPP;
251 } 337 }
@@ -300,8 +386,7 @@ static int uverbs_set_attr(struct bundle_priv *pbundle,
300 return -EPROTONOSUPPORT; 386 return -EPROTONOSUPPORT;
301 return 0; 387 return 0;
302 } 388 }
303 attr = srcu_dereference( 389 attr = rcu_dereference_protected(*slot, true);
304 *slot, &pbundle->bundle.ufile->device->disassociate_srcu);
305 390
306 /* Reject duplicate attributes from user-space */ 391 /* Reject duplicate attributes from user-space */
307 if (test_bit(attr_bkey, pbundle->bundle.attr_present)) 392 if (test_bit(attr_bkey, pbundle->bundle.attr_present))
@@ -384,6 +469,7 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
384 unsigned int i; 469 unsigned int i;
385 int ret = 0; 470 int ret = 0;
386 471
472 /* fast path for simple uobjects */
387 i = -1; 473 i = -1;
388 while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len, 474 while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len,
389 i + 1)) < key_bitmap_len) { 475 i + 1)) < key_bitmap_len) {
@@ -397,6 +483,30 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
397 ret = current_ret; 483 ret = current_ret;
398 } 484 }
399 485
486 i = -1;
487 while ((i = find_next_bit(pbundle->spec_finalize, key_bitmap_len,
488 i + 1)) < key_bitmap_len) {
489 struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
490 const struct uverbs_api_attr *attr_uapi;
491 void __rcu **slot;
492 int current_ret;
493
494 slot = uapi_get_attr_for_method(
495 pbundle,
496 pbundle->method_key | uapi_bkey_to_key_attr(i));
497 if (WARN_ON(!slot))
498 continue;
499
500 attr_uapi = rcu_dereference_protected(*slot, true);
501
502 if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
503 current_ret = uverbs_free_idrs_array(
504 attr_uapi, &attr->objs_arr_attr, commit);
505 if (!ret)
506 ret = current_ret;
507 }
508 }
509
400 for (memblock = pbundle->allocated_mem; memblock;) { 510 for (memblock = pbundle->allocated_mem; memblock;) {
401 struct bundle_alloc_head *tmp = memblock; 511 struct bundle_alloc_head *tmp = memblock;
402 512
@@ -429,7 +539,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
429 uapi_key_ioctl_method(hdr->method_id)); 539 uapi_key_ioctl_method(hdr->method_id));
430 if (unlikely(!slot)) 540 if (unlikely(!slot))
431 return -EPROTONOSUPPORT; 541 return -EPROTONOSUPPORT;
432 method_elm = srcu_dereference(*slot, &ufile->device->disassociate_srcu); 542 method_elm = rcu_dereference_protected(*slot, true);
433 543
434 if (!method_elm->use_stack) { 544 if (!method_elm->use_stack) {
435 pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL); 545 pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL);
@@ -461,6 +571,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
461 memset(pbundle->bundle.attr_present, 0, 571 memset(pbundle->bundle.attr_present, 0,
462 sizeof(pbundle->bundle.attr_present)); 572 sizeof(pbundle->bundle.attr_present));
463 memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize)); 573 memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize));
574 memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize));
464 575
465 ret = ib_uverbs_run_method(pbundle, hdr->num_attrs); 576 ret = ib_uverbs_run_method(pbundle, hdr->num_attrs);
466 destroy_ret = bundle_destroy(pbundle, ret == 0); 577 destroy_ret = bundle_destroy(pbundle, ret == 0);
@@ -611,3 +722,26 @@ int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx,
611 return 0; 722 return 0;
612} 723}
613EXPORT_SYMBOL(uverbs_copy_to); 724EXPORT_SYMBOL(uverbs_copy_to);
725
726int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
727 size_t idx, s64 lower_bound, u64 upper_bound,
728 s64 *def_val)
729{
730 const struct uverbs_attr *attr;
731
732 attr = uverbs_attr_get(attrs_bundle, idx);
733 if (IS_ERR(attr)) {
734 if ((PTR_ERR(attr) != -ENOENT) || !def_val)
735 return PTR_ERR(attr);
736
737 *to = *def_val;
738 } else {
739 *to = attr->ptr_attr.data;
740 }
741
742 if (*to < lower_bound || (*to > 0 && (u64)*to > upper_bound))
743 return -EINVAL;
744
745 return 0;
746}
747EXPORT_SYMBOL(_uverbs_get_const);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 50152c1b1004..6d373f5515b7 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -45,6 +45,7 @@
45#include <linux/cdev.h> 45#include <linux/cdev.h>
46#include <linux/anon_inodes.h> 46#include <linux/anon_inodes.h>
47#include <linux/slab.h> 47#include <linux/slab.h>
48#include <linux/sched/mm.h>
48 49
49#include <linux/uaccess.h> 50#include <linux/uaccess.h>
50 51
@@ -72,7 +73,7 @@ enum {
72static dev_t dynamic_uverbs_dev; 73static dev_t dynamic_uverbs_dev;
73static struct class *uverbs_class; 74static struct class *uverbs_class;
74 75
75static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); 76static DEFINE_IDA(uverbs_ida);
76 77
77static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, 78static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
78 const char __user *buf, int in_len, 79 const char __user *buf, int in_len,
@@ -169,20 +170,16 @@ int uverbs_dealloc_mw(struct ib_mw *mw)
169 return ret; 170 return ret;
170} 171}
171 172
172static void ib_uverbs_release_dev(struct kobject *kobj) 173static void ib_uverbs_release_dev(struct device *device)
173{ 174{
174 struct ib_uverbs_device *dev = 175 struct ib_uverbs_device *dev =
175 container_of(kobj, struct ib_uverbs_device, kobj); 176 container_of(device, struct ib_uverbs_device, dev);
176 177
177 uverbs_destroy_api(dev->uapi); 178 uverbs_destroy_api(dev->uapi);
178 cleanup_srcu_struct(&dev->disassociate_srcu); 179 cleanup_srcu_struct(&dev->disassociate_srcu);
179 kfree(dev); 180 kfree(dev);
180} 181}
181 182
182static struct kobj_type ib_uverbs_dev_ktype = {
183 .release = ib_uverbs_release_dev,
184};
185
186static void ib_uverbs_release_async_event_file(struct kref *ref) 183static void ib_uverbs_release_async_event_file(struct kref *ref)
187{ 184{
188 struct ib_uverbs_async_event_file *file = 185 struct ib_uverbs_async_event_file *file =
@@ -265,7 +262,7 @@ void ib_uverbs_release_file(struct kref *ref)
265 if (atomic_dec_and_test(&file->device->refcount)) 262 if (atomic_dec_and_test(&file->device->refcount))
266 ib_uverbs_comp_dev(file->device); 263 ib_uverbs_comp_dev(file->device);
267 264
268 kobject_put(&file->device->kobj); 265 put_device(&file->device->dev);
269 kfree(file); 266 kfree(file);
270} 267}
271 268
@@ -817,6 +814,226 @@ out:
817} 814}
818 815
819/* 816/*
817 * Each time we map IO memory into user space this keeps track of the mapping.
818 * When the device is hot-unplugged we 'zap' the mmaps in user space to point
819 * to the zero page and allow the hot unplug to proceed.
820 *
821 * This is necessary for cases like PCI physical hot unplug as the actual BAR
822 * memory may vanish after this and access to it from userspace could MCE.
823 *
824 * RDMA drivers supporting disassociation must have their user space designed
825 * to cope in some way with their IO pages going to the zero page.
826 */
827struct rdma_umap_priv {
828 struct vm_area_struct *vma;
829 struct list_head list;
830};
831
832static const struct vm_operations_struct rdma_umap_ops;
833
834static void rdma_umap_priv_init(struct rdma_umap_priv *priv,
835 struct vm_area_struct *vma)
836{
837 struct ib_uverbs_file *ufile = vma->vm_file->private_data;
838
839 priv->vma = vma;
840 vma->vm_private_data = priv;
841 vma->vm_ops = &rdma_umap_ops;
842
843 mutex_lock(&ufile->umap_lock);
844 list_add(&priv->list, &ufile->umaps);
845 mutex_unlock(&ufile->umap_lock);
846}
847
848/*
849 * The VMA has been dup'd, initialize the vm_private_data with a new tracking
850 * struct
851 */
852static void rdma_umap_open(struct vm_area_struct *vma)
853{
854 struct ib_uverbs_file *ufile = vma->vm_file->private_data;
855 struct rdma_umap_priv *opriv = vma->vm_private_data;
856 struct rdma_umap_priv *priv;
857
858 if (!opriv)
859 return;
860
861 /* We are racing with disassociation */
862 if (!down_read_trylock(&ufile->hw_destroy_rwsem))
863 goto out_zap;
864 /*
865 * Disassociation already completed, the VMA should already be zapped.
866 */
867 if (!ufile->ucontext)
868 goto out_unlock;
869
870 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
871 if (!priv)
872 goto out_unlock;
873 rdma_umap_priv_init(priv, vma);
874
875 up_read(&ufile->hw_destroy_rwsem);
876 return;
877
878out_unlock:
879 up_read(&ufile->hw_destroy_rwsem);
880out_zap:
881 /*
882 * We can't allow the VMA to be created with the actual IO pages, that
883 * would break our API contract, and it can't be stopped at this
884 * point, so zap it.
885 */
886 vma->vm_private_data = NULL;
887 zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
888}
889
890static void rdma_umap_close(struct vm_area_struct *vma)
891{
892 struct ib_uverbs_file *ufile = vma->vm_file->private_data;
893 struct rdma_umap_priv *priv = vma->vm_private_data;
894
895 if (!priv)
896 return;
897
898 /*
899 * The vma holds a reference on the struct file that created it, which
900 * in turn means that the ib_uverbs_file is guaranteed to exist at
901 * this point.
902 */
903 mutex_lock(&ufile->umap_lock);
904 list_del(&priv->list);
905 mutex_unlock(&ufile->umap_lock);
906 kfree(priv);
907}
908
909static const struct vm_operations_struct rdma_umap_ops = {
910 .open = rdma_umap_open,
911 .close = rdma_umap_close,
912};
913
914static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
915 struct vm_area_struct *vma,
916 unsigned long size)
917{
918 struct ib_uverbs_file *ufile = ucontext->ufile;
919 struct rdma_umap_priv *priv;
920
921 if (vma->vm_end - vma->vm_start != size)
922 return ERR_PTR(-EINVAL);
923
924 /* Driver is using this wrong, must be called by ib_uverbs_mmap */
925 if (WARN_ON(!vma->vm_file ||
926 vma->vm_file->private_data != ufile))
927 return ERR_PTR(-EINVAL);
928 lockdep_assert_held(&ufile->device->disassociate_srcu);
929
930 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
931 if (!priv)
932 return ERR_PTR(-ENOMEM);
933 return priv;
934}
935
936/*
937 * Map IO memory into a process. This is to be called by drivers as part of
938 * their mmap() functions if they wish to send something like PCI-E BAR memory
939 * to userspace.
940 */
941int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
942 unsigned long pfn, unsigned long size, pgprot_t prot)
943{
944 struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size);
945
946 if (IS_ERR(priv))
947 return PTR_ERR(priv);
948
949 vma->vm_page_prot = prot;
950 if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
951 kfree(priv);
952 return -EAGAIN;
953 }
954
955 rdma_umap_priv_init(priv, vma);
956 return 0;
957}
958EXPORT_SYMBOL(rdma_user_mmap_io);
959
960/*
961 * The page case is here for a slightly different reason, the driver expects
962 * to be able to free the page it is sharing to user space when it destroys
963 * its ucontext, which means we need to zap the user space references.
964 *
965 * We could handle this differently by providing an API to allocate a shared
966 * page and then only freeing the shared page when the last ufile is
967 * destroyed.
968 */
969int rdma_user_mmap_page(struct ib_ucontext *ucontext,
970 struct vm_area_struct *vma, struct page *page,
971 unsigned long size)
972{
973 struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size);
974
975 if (IS_ERR(priv))
976 return PTR_ERR(priv);
977
978 if (remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size,
979 vma->vm_page_prot)) {
980 kfree(priv);
981 return -EAGAIN;
982 }
983
984 rdma_umap_priv_init(priv, vma);
985 return 0;
986}
987EXPORT_SYMBOL(rdma_user_mmap_page);
988
989void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
990{
991 struct rdma_umap_priv *priv, *next_priv;
992
993 lockdep_assert_held(&ufile->hw_destroy_rwsem);
994
995 while (1) {
996 struct mm_struct *mm = NULL;
997
998 /* Get an arbitrary mm pointer that hasn't been cleaned yet */
999 mutex_lock(&ufile->umap_lock);
1000 if (!list_empty(&ufile->umaps)) {
1001 mm = list_first_entry(&ufile->umaps,
1002 struct rdma_umap_priv, list)
1003 ->vma->vm_mm;
1004 mmget(mm);
1005 }
1006 mutex_unlock(&ufile->umap_lock);
1007 if (!mm)
1008 return;
1009
1010 /*
1011 * The umap_lock is nested under mmap_sem since it used within
1012 * the vma_ops callbacks, so we have to clean the list one mm
1013 * at a time to get the lock ordering right. Typically there
1014 * will only be one mm, so no big deal.
1015 */
1016 down_write(&mm->mmap_sem);
1017 mutex_lock(&ufile->umap_lock);
1018 list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
1019 list) {
1020 struct vm_area_struct *vma = priv->vma;
1021
1022 if (vma->vm_mm != mm)
1023 continue;
1024 list_del_init(&priv->list);
1025
1026 zap_vma_ptes(vma, vma->vm_start,
1027 vma->vm_end - vma->vm_start);
1028 vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
1029 }
1030 mutex_unlock(&ufile->umap_lock);
1031 up_write(&mm->mmap_sem);
1032 mmput(mm);
1033 }
1034}
1035
1036/*
820 * ib_uverbs_open() does not need the BKL: 1037 * ib_uverbs_open() does not need the BKL:
821 * 1038 *
822 * - the ib_uverbs_device structures are properly reference counted and 1039 * - the ib_uverbs_device structures are properly reference counted and
@@ -839,6 +1056,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
839 if (!atomic_inc_not_zero(&dev->refcount)) 1056 if (!atomic_inc_not_zero(&dev->refcount))
840 return -ENXIO; 1057 return -ENXIO;
841 1058
1059 get_device(&dev->dev);
842 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1060 srcu_key = srcu_read_lock(&dev->disassociate_srcu);
843 mutex_lock(&dev->lists_mutex); 1061 mutex_lock(&dev->lists_mutex);
844 ib_dev = srcu_dereference(dev->ib_dev, 1062 ib_dev = srcu_dereference(dev->ib_dev,
@@ -876,9 +1094,10 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
876 spin_lock_init(&file->uobjects_lock); 1094 spin_lock_init(&file->uobjects_lock);
877 INIT_LIST_HEAD(&file->uobjects); 1095 INIT_LIST_HEAD(&file->uobjects);
878 init_rwsem(&file->hw_destroy_rwsem); 1096 init_rwsem(&file->hw_destroy_rwsem);
1097 mutex_init(&file->umap_lock);
1098 INIT_LIST_HEAD(&file->umaps);
879 1099
880 filp->private_data = file; 1100 filp->private_data = file;
881 kobject_get(&dev->kobj);
882 list_add_tail(&file->list, &dev->uverbs_file_list); 1101 list_add_tail(&file->list, &dev->uverbs_file_list);
883 mutex_unlock(&dev->lists_mutex); 1102 mutex_unlock(&dev->lists_mutex);
884 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1103 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
@@ -899,6 +1118,7 @@ err:
899 if (atomic_dec_and_test(&dev->refcount)) 1118 if (atomic_dec_and_test(&dev->refcount))
900 ib_uverbs_comp_dev(dev); 1119 ib_uverbs_comp_dev(dev);
901 1120
1121 put_device(&dev->dev);
902 return ret; 1122 return ret;
903} 1123}
904 1124
@@ -909,10 +1129,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
909 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE); 1129 uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
910 1130
911 mutex_lock(&file->device->lists_mutex); 1131 mutex_lock(&file->device->lists_mutex);
912 if (!file->is_closed) { 1132 list_del_init(&file->list);
913 list_del(&file->list);
914 file->is_closed = 1;
915 }
916 mutex_unlock(&file->device->lists_mutex); 1133 mutex_unlock(&file->device->lists_mutex);
917 1134
918 if (file->async_file) 1135 if (file->async_file)
@@ -951,37 +1168,34 @@ static struct ib_client uverbs_client = {
951 .remove = ib_uverbs_remove_one 1168 .remove = ib_uverbs_remove_one
952}; 1169};
953 1170
954static ssize_t show_ibdev(struct device *device, struct device_attribute *attr, 1171static ssize_t ibdev_show(struct device *device, struct device_attribute *attr,
955 char *buf) 1172 char *buf)
956{ 1173{
1174 struct ib_uverbs_device *dev =
1175 container_of(device, struct ib_uverbs_device, dev);
957 int ret = -ENODEV; 1176 int ret = -ENODEV;
958 int srcu_key; 1177 int srcu_key;
959 struct ib_uverbs_device *dev = dev_get_drvdata(device);
960 struct ib_device *ib_dev; 1178 struct ib_device *ib_dev;
961 1179
962 if (!dev)
963 return -ENODEV;
964
965 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1180 srcu_key = srcu_read_lock(&dev->disassociate_srcu);
966 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); 1181 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
967 if (ib_dev) 1182 if (ib_dev)
968 ret = sprintf(buf, "%s\n", ib_dev->name); 1183 ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev));
969 srcu_read_unlock(&dev->disassociate_srcu, srcu_key); 1184 srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
970 1185
971 return ret; 1186 return ret;
972} 1187}
973static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 1188static DEVICE_ATTR_RO(ibdev);
974 1189
975static ssize_t show_dev_abi_version(struct device *device, 1190static ssize_t abi_version_show(struct device *device,
976 struct device_attribute *attr, char *buf) 1191 struct device_attribute *attr, char *buf)
977{ 1192{
978 struct ib_uverbs_device *dev = dev_get_drvdata(device); 1193 struct ib_uverbs_device *dev =
1194 container_of(device, struct ib_uverbs_device, dev);
979 int ret = -ENODEV; 1195 int ret = -ENODEV;
980 int srcu_key; 1196 int srcu_key;
981 struct ib_device *ib_dev; 1197 struct ib_device *ib_dev;
982 1198
983 if (!dev)
984 return -ENODEV;
985 srcu_key = srcu_read_lock(&dev->disassociate_srcu); 1199 srcu_key = srcu_read_lock(&dev->disassociate_srcu);
986 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); 1200 ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
987 if (ib_dev) 1201 if (ib_dev)
@@ -990,7 +1204,17 @@ static ssize_t show_dev_abi_version(struct device *device,
990 1204
991 return ret; 1205 return ret;
992} 1206}
993static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); 1207static DEVICE_ATTR_RO(abi_version);
1208
1209static struct attribute *ib_dev_attrs[] = {
1210 &dev_attr_abi_version.attr,
1211 &dev_attr_ibdev.attr,
1212 NULL,
1213};
1214
1215static const struct attribute_group dev_attr_group = {
1216 .attrs = ib_dev_attrs,
1217};
994 1218
995static CLASS_ATTR_STRING(abi_version, S_IRUGO, 1219static CLASS_ATTR_STRING(abi_version, S_IRUGO,
996 __stringify(IB_USER_VERBS_ABI_VERSION)); 1220 __stringify(IB_USER_VERBS_ABI_VERSION));
@@ -1028,65 +1252,56 @@ static void ib_uverbs_add_one(struct ib_device *device)
1028 return; 1252 return;
1029 } 1253 }
1030 1254
1255 device_initialize(&uverbs_dev->dev);
1256 uverbs_dev->dev.class = uverbs_class;
1257 uverbs_dev->dev.parent = device->dev.parent;
1258 uverbs_dev->dev.release = ib_uverbs_release_dev;
1259 uverbs_dev->groups[0] = &dev_attr_group;
1260 uverbs_dev->dev.groups = uverbs_dev->groups;
1031 atomic_set(&uverbs_dev->refcount, 1); 1261 atomic_set(&uverbs_dev->refcount, 1);
1032 init_completion(&uverbs_dev->comp); 1262 init_completion(&uverbs_dev->comp);
1033 uverbs_dev->xrcd_tree = RB_ROOT; 1263 uverbs_dev->xrcd_tree = RB_ROOT;
1034 mutex_init(&uverbs_dev->xrcd_tree_mutex); 1264 mutex_init(&uverbs_dev->xrcd_tree_mutex);
1035 kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
1036 mutex_init(&uverbs_dev->lists_mutex); 1265 mutex_init(&uverbs_dev->lists_mutex);
1037 INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); 1266 INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
1038 INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list); 1267 INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
1268 rcu_assign_pointer(uverbs_dev->ib_dev, device);
1269 uverbs_dev->num_comp_vectors = device->num_comp_vectors;
1039 1270
1040 devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); 1271 devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1,
1041 if (devnum >= IB_UVERBS_MAX_DEVICES) 1272 GFP_KERNEL);
1273 if (devnum < 0)
1042 goto err; 1274 goto err;
1043 uverbs_dev->devnum = devnum; 1275 uverbs_dev->devnum = devnum;
1044 set_bit(devnum, dev_map);
1045 if (devnum >= IB_UVERBS_NUM_FIXED_MINOR) 1276 if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
1046 base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR; 1277 base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
1047 else 1278 else
1048 base = IB_UVERBS_BASE_DEV + devnum; 1279 base = IB_UVERBS_BASE_DEV + devnum;
1049 1280
1050 rcu_assign_pointer(uverbs_dev->ib_dev, device);
1051 uverbs_dev->num_comp_vectors = device->num_comp_vectors;
1052
1053 if (ib_uverbs_create_uapi(device, uverbs_dev)) 1281 if (ib_uverbs_create_uapi(device, uverbs_dev))
1054 goto err_uapi; 1282 goto err_uapi;
1055 1283
1056 cdev_init(&uverbs_dev->cdev, NULL); 1284 uverbs_dev->dev.devt = base;
1285 dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum);
1286
1287 cdev_init(&uverbs_dev->cdev,
1288 device->mmap ? &uverbs_mmap_fops : &uverbs_fops);
1057 uverbs_dev->cdev.owner = THIS_MODULE; 1289 uverbs_dev->cdev.owner = THIS_MODULE;
1058 uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
1059 cdev_set_parent(&uverbs_dev->cdev, &uverbs_dev->kobj);
1060 kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
1061 if (cdev_add(&uverbs_dev->cdev, base, 1))
1062 goto err_cdev;
1063
1064 uverbs_dev->dev = device_create(uverbs_class, device->dev.parent,
1065 uverbs_dev->cdev.dev, uverbs_dev,
1066 "uverbs%d", uverbs_dev->devnum);
1067 if (IS_ERR(uverbs_dev->dev))
1068 goto err_cdev;
1069
1070 if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
1071 goto err_class;
1072 if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
1073 goto err_class;
1074 1290
1075 ib_set_client_data(device, &uverbs_client, uverbs_dev); 1291 ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev);
1292 if (ret)
1293 goto err_uapi;
1076 1294
1295 ib_set_client_data(device, &uverbs_client, uverbs_dev);
1077 return; 1296 return;
1078 1297
1079err_class:
1080 device_destroy(uverbs_class, uverbs_dev->cdev.dev);
1081err_cdev:
1082 cdev_del(&uverbs_dev->cdev);
1083err_uapi: 1298err_uapi:
1084 clear_bit(devnum, dev_map); 1299 ida_free(&uverbs_ida, devnum);
1085err: 1300err:
1086 if (atomic_dec_and_test(&uverbs_dev->refcount)) 1301 if (atomic_dec_and_test(&uverbs_dev->refcount))
1087 ib_uverbs_comp_dev(uverbs_dev); 1302 ib_uverbs_comp_dev(uverbs_dev);
1088 wait_for_completion(&uverbs_dev->comp); 1303 wait_for_completion(&uverbs_dev->comp);
1089 kobject_put(&uverbs_dev->kobj); 1304 put_device(&uverbs_dev->dev);
1090 return; 1305 return;
1091} 1306}
1092 1307
@@ -1107,8 +1322,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
1107 while (!list_empty(&uverbs_dev->uverbs_file_list)) { 1322 while (!list_empty(&uverbs_dev->uverbs_file_list)) {
1108 file = list_first_entry(&uverbs_dev->uverbs_file_list, 1323 file = list_first_entry(&uverbs_dev->uverbs_file_list,
1109 struct ib_uverbs_file, list); 1324 struct ib_uverbs_file, list);
1110 file->is_closed = 1; 1325 list_del_init(&file->list);
1111 list_del(&file->list);
1112 kref_get(&file->ref); 1326 kref_get(&file->ref);
1113 1327
1114 /* We must release the mutex before going ahead and calling 1328 /* We must release the mutex before going ahead and calling
@@ -1156,10 +1370,8 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
1156 if (!uverbs_dev) 1370 if (!uverbs_dev)
1157 return; 1371 return;
1158 1372
1159 dev_set_drvdata(uverbs_dev->dev, NULL); 1373 cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
1160 device_destroy(uverbs_class, uverbs_dev->cdev.dev); 1374 ida_free(&uverbs_ida, uverbs_dev->devnum);
1161 cdev_del(&uverbs_dev->cdev);
1162 clear_bit(uverbs_dev->devnum, dev_map);
1163 1375
1164 if (device->disassociate_ucontext) { 1376 if (device->disassociate_ucontext) {
1165 /* We disassociate HW resources and immediately return. 1377 /* We disassociate HW resources and immediately return.
@@ -1182,7 +1394,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
1182 if (wait_clients) 1394 if (wait_clients)
1183 wait_for_completion(&uverbs_dev->comp); 1395 wait_for_completion(&uverbs_dev->comp);
1184 1396
1185 kobject_put(&uverbs_dev->kobj); 1397 put_device(&uverbs_dev->dev);
1186} 1398}
1187 1399
1188static char *uverbs_devnode(struct device *dev, umode_t *mode) 1400static char *uverbs_devnode(struct device *dev, umode_t *mode)
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index d8cfafe23bd9..cb9486ad5c67 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -326,11 +326,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
326 if (IS_ERR(action)) 326 if (IS_ERR(action))
327 return PTR_ERR(action); 327 return PTR_ERR(action);
328 328
329 atomic_set(&action->usecnt, 0); 329 uverbs_flow_action_fill_action(action, uobj, ib_dev,
330 action->device = ib_dev; 330 IB_FLOW_ACTION_ESP);
331 action->type = IB_FLOW_ACTION_ESP;
332 action->uobject = uobj;
333 uobj->object = action;
334 331
335 return 0; 332 return 0;
336} 333}
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index be854628a7c6..86f3fc5e04b4 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -73,6 +73,18 @@ static int uapi_merge_method(struct uverbs_api *uapi,
73 if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN) 73 if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN)
74 method_elm->driver_method |= is_driver; 74 method_elm->driver_method |= is_driver;
75 75
76 /*
77 * Like other uobject based things we only support a single
78 * uobject being NEW'd or DESTROY'd
79 */
80 if (attr->attr.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
81 u8 access = attr->attr.u2.objs_arr.access;
82
83 if (WARN_ON(access == UVERBS_ACCESS_NEW ||
84 access == UVERBS_ACCESS_DESTROY))
85 return -EINVAL;
86 }
87
76 attr_slot = 88 attr_slot =
77 uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id), 89 uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id),
78 sizeof(*attr_slot)); 90 sizeof(*attr_slot));
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 8ec7418e99f0..178899e3ce73 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -264,7 +264,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
264 } 264 }
265 265
266 pd->res.type = RDMA_RESTRACK_PD; 266 pd->res.type = RDMA_RESTRACK_PD;
267 pd->res.kern_name = caller; 267 rdma_restrack_set_task(&pd->res, caller);
268 rdma_restrack_add(&pd->res); 268 rdma_restrack_add(&pd->res);
269 269
270 if (mr_access_flags) { 270 if (mr_access_flags) {
@@ -710,7 +710,7 @@ static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
710 710
711 ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid, 711 ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid,
712 ah_attr->roce.dmac, 712 ah_attr->roce.dmac,
713 sgid_attr->ndev, &hop_limit); 713 sgid_attr, &hop_limit);
714 714
715 grh->hop_limit = hop_limit; 715 grh->hop_limit = hop_limit;
716 return ret; 716 return ret;
@@ -1509,8 +1509,7 @@ static const struct {
1509}; 1509};
1510 1510
1511bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, 1511bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
1512 enum ib_qp_type type, enum ib_qp_attr_mask mask, 1512 enum ib_qp_type type, enum ib_qp_attr_mask mask)
1513 enum rdma_link_layer ll)
1514{ 1513{
1515 enum ib_qp_attr_mask req_param, opt_param; 1514 enum ib_qp_attr_mask req_param, opt_param;
1516 1515
@@ -1629,14 +1628,16 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
1629 1628
1630 if (rdma_ib_or_roce(qp->device, port)) { 1629 if (rdma_ib_or_roce(qp->device, port)) {
1631 if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) { 1630 if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
1632 pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n", 1631 dev_warn(&qp->device->dev,
1633 __func__, qp->device->name); 1632 "%s rq_psn overflow, masking to 24 bits\n",
1633 __func__);
1634 attr->rq_psn &= 0xffffff; 1634 attr->rq_psn &= 0xffffff;
1635 } 1635 }
1636 1636
1637 if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) { 1637 if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) {
1638 pr_warn("%s: %s sq_psn overflow, masking to 24 bits\n", 1638 dev_warn(&qp->device->dev,
1639 __func__, qp->device->name); 1639 " %s sq_psn overflow, masking to 24 bits\n",
1640 __func__);
1640 attr->sq_psn &= 0xffffff; 1641 attr->sq_psn &= 0xffffff;
1641 } 1642 }
1642 } 1643 }
@@ -1888,7 +1889,7 @@ struct ib_cq *__ib_create_cq(struct ib_device *device,
1888 cq->cq_context = cq_context; 1889 cq->cq_context = cq_context;
1889 atomic_set(&cq->usecnt, 0); 1890 atomic_set(&cq->usecnt, 0);
1890 cq->res.type = RDMA_RESTRACK_CQ; 1891 cq->res.type = RDMA_RESTRACK_CQ;
1891 cq->res.kern_name = caller; 1892 rdma_restrack_set_task(&cq->res, caller);
1892 rdma_restrack_add(&cq->res); 1893 rdma_restrack_add(&cq->res);
1893 } 1894 }
1894 1895
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 96f76896488d..31baa8939a4f 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -40,7 +40,6 @@
40#ifndef __BNXT_RE_H__ 40#ifndef __BNXT_RE_H__
41#define __BNXT_RE_H__ 41#define __BNXT_RE_H__
42#define ROCE_DRV_MODULE_NAME "bnxt_re" 42#define ROCE_DRV_MODULE_NAME "bnxt_re"
43#define ROCE_DRV_MODULE_VERSION "1.0.0"
44 43
45#define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver" 44#define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver"
46#define BNXT_RE_PAGE_SHIFT_4K (12) 45#define BNXT_RE_PAGE_SHIFT_4K (12)
@@ -120,6 +119,8 @@ struct bnxt_re_dev {
120#define BNXT_RE_FLAG_HAVE_L2_REF 3 119#define BNXT_RE_FLAG_HAVE_L2_REF 3
121#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4 120#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4
122#define BNXT_RE_FLAG_QOS_WORK_REG 5 121#define BNXT_RE_FLAG_QOS_WORK_REG 5
122#define BNXT_RE_FLAG_RESOURCES_ALLOCATED 7
123#define BNXT_RE_FLAG_RESOURCES_INITIALIZED 8
123#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29 124#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
124 struct net_device *netdev; 125 struct net_device *netdev;
125 unsigned int version, major, minor; 126 unsigned int version, major, minor;
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index 77416bc61e6e..604b71875f5f 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -68,6 +68,8 @@ static const char * const bnxt_re_stat_name[] = {
68 [BNXT_RE_TX_PKTS] = "tx_pkts", 68 [BNXT_RE_TX_PKTS] = "tx_pkts",
69 [BNXT_RE_TX_BYTES] = "tx_bytes", 69 [BNXT_RE_TX_BYTES] = "tx_bytes",
70 [BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors", 70 [BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors",
71 [BNXT_RE_RX_DROPS] = "rx_roce_drops",
72 [BNXT_RE_RX_DISCARDS] = "rx_roce_discards",
71 [BNXT_RE_TO_RETRANSMITS] = "to_retransmits", 73 [BNXT_RE_TO_RETRANSMITS] = "to_retransmits",
72 [BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd", 74 [BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd",
73 [BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded", 75 [BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded",
@@ -106,7 +108,8 @@ static const char * const bnxt_re_stat_name[] = {
106 [BNXT_RE_RES_CQ_LOAD_ERR] = "res_cq_load_err", 108 [BNXT_RE_RES_CQ_LOAD_ERR] = "res_cq_load_err",
107 [BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err", 109 [BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err",
108 [BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err", 110 [BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err",
109 [BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err" 111 [BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err",
112 [BNXT_RE_OUT_OF_SEQ_ERR] = "oos_drop_count"
110}; 113};
111 114
112int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, 115int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
@@ -128,6 +131,10 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
128 if (bnxt_re_stats) { 131 if (bnxt_re_stats) {
129 stats->value[BNXT_RE_RECOVERABLE_ERRORS] = 132 stats->value[BNXT_RE_RECOVERABLE_ERRORS] =
130 le64_to_cpu(bnxt_re_stats->tx_bcast_pkts); 133 le64_to_cpu(bnxt_re_stats->tx_bcast_pkts);
134 stats->value[BNXT_RE_RX_DROPS] =
135 le64_to_cpu(bnxt_re_stats->rx_drop_pkts);
136 stats->value[BNXT_RE_RX_DISCARDS] =
137 le64_to_cpu(bnxt_re_stats->rx_discard_pkts);
131 stats->value[BNXT_RE_RX_PKTS] = 138 stats->value[BNXT_RE_RX_PKTS] =
132 le64_to_cpu(bnxt_re_stats->rx_ucast_pkts); 139 le64_to_cpu(bnxt_re_stats->rx_ucast_pkts);
133 stats->value[BNXT_RE_RX_BYTES] = 140 stats->value[BNXT_RE_RX_BYTES] =
@@ -220,6 +227,8 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
220 rdev->stats.res_tx_pci_err; 227 rdev->stats.res_tx_pci_err;
221 stats->value[BNXT_RE_RES_RX_PCI_ERR] = 228 stats->value[BNXT_RE_RES_RX_PCI_ERR] =
222 rdev->stats.res_rx_pci_err; 229 rdev->stats.res_rx_pci_err;
230 stats->value[BNXT_RE_OUT_OF_SEQ_ERR] =
231 rdev->stats.res_oos_drop_count;
223 } 232 }
224 233
225 return ARRAY_SIZE(bnxt_re_stat_name); 234 return ARRAY_SIZE(bnxt_re_stat_name);
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h
index a01a922717d5..76399f477e5c 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.h
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h
@@ -51,6 +51,8 @@ enum bnxt_re_hw_stats {
51 BNXT_RE_TX_PKTS, 51 BNXT_RE_TX_PKTS,
52 BNXT_RE_TX_BYTES, 52 BNXT_RE_TX_BYTES,
53 BNXT_RE_RECOVERABLE_ERRORS, 53 BNXT_RE_RECOVERABLE_ERRORS,
54 BNXT_RE_RX_DROPS,
55 BNXT_RE_RX_DISCARDS,
54 BNXT_RE_TO_RETRANSMITS, 56 BNXT_RE_TO_RETRANSMITS,
55 BNXT_RE_SEQ_ERR_NAKS_RCVD, 57 BNXT_RE_SEQ_ERR_NAKS_RCVD,
56 BNXT_RE_MAX_RETRY_EXCEEDED, 58 BNXT_RE_MAX_RETRY_EXCEEDED,
@@ -90,6 +92,7 @@ enum bnxt_re_hw_stats {
90 BNXT_RE_RES_SRQ_LOAD_ERR, 92 BNXT_RE_RES_SRQ_LOAD_ERR,
91 BNXT_RE_RES_TX_PCI_ERR, 93 BNXT_RE_RES_TX_PCI_ERR,
92 BNXT_RE_RES_RX_PCI_ERR, 94 BNXT_RE_RES_RX_PCI_ERR,
95 BNXT_RE_OUT_OF_SEQ_ERR,
93 BNXT_RE_NUM_COUNTERS 96 BNXT_RE_NUM_COUNTERS
94}; 97};
95 98
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index bc2b9e038439..54fdd4cf5288 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -1598,8 +1598,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
1598 curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state); 1598 curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state);
1599 new_qp_state = qp_attr->qp_state; 1599 new_qp_state = qp_attr->qp_state;
1600 if (!ib_modify_qp_is_ok(curr_qp_state, new_qp_state, 1600 if (!ib_modify_qp_is_ok(curr_qp_state, new_qp_state,
1601 ib_qp->qp_type, qp_attr_mask, 1601 ib_qp->qp_type, qp_attr_mask)) {
1602 IB_LINK_LAYER_ETHERNET)) {
1603 dev_err(rdev_to_dev(rdev), 1602 dev_err(rdev_to_dev(rdev),
1604 "Invalid attribute mask: %#x specified ", 1603 "Invalid attribute mask: %#x specified ",
1605 qp_attr_mask); 1604 qp_attr_mask);
@@ -2664,6 +2663,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
2664 nq->budget++; 2663 nq->budget++;
2665 2664
2666 atomic_inc(&rdev->cq_count); 2665 atomic_inc(&rdev->cq_count);
2666 spin_lock_init(&cq->cq_lock);
2667 2667
2668 if (context) { 2668 if (context) {
2669 struct bnxt_re_cq_resp resp; 2669 struct bnxt_re_cq_resp resp;
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 85cd1a3593d6..cf2282654210 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -67,7 +67,7 @@
67#include "hw_counters.h" 67#include "hw_counters.h"
68 68
69static char version[] = 69static char version[] =
70 BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n"; 70 BNXT_RE_DESC "\n";
71 71
72MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>"); 72MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
73MODULE_DESCRIPTION(BNXT_RE_DESC " Driver"); 73MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
@@ -535,6 +535,34 @@ static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
535 return en_dev; 535 return en_dev;
536} 536}
537 537
538static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
539 char *buf)
540{
541 struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
542
543 return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
544}
545static DEVICE_ATTR_RO(hw_rev);
546
547static ssize_t hca_type_show(struct device *device,
548 struct device_attribute *attr, char *buf)
549{
550 struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
551
552 return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
553}
554static DEVICE_ATTR_RO(hca_type);
555
556static struct attribute *bnxt_re_attributes[] = {
557 &dev_attr_hw_rev.attr,
558 &dev_attr_hca_type.attr,
559 NULL
560};
561
562static const struct attribute_group bnxt_re_dev_attr_group = {
563 .attrs = bnxt_re_attributes,
564};
565
538static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev) 566static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev)
539{ 567{
540 ib_unregister_device(&rdev->ibdev); 568 ib_unregister_device(&rdev->ibdev);
@@ -547,7 +575,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
547 /* ib device init */ 575 /* ib device init */
548 ibdev->owner = THIS_MODULE; 576 ibdev->owner = THIS_MODULE;
549 ibdev->node_type = RDMA_NODE_IB_CA; 577 ibdev->node_type = RDMA_NODE_IB_CA;
550 strlcpy(ibdev->name, "bnxt_re%d", IB_DEVICE_NAME_MAX);
551 strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA", 578 strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
552 strlen(BNXT_RE_DESC) + 5); 579 strlen(BNXT_RE_DESC) + 5);
553 ibdev->phys_port_cnt = 1; 580 ibdev->phys_port_cnt = 1;
@@ -639,34 +666,11 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
639 ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats; 666 ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats;
640 ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats; 667 ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats;
641 668
669 rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group);
642 ibdev->driver_id = RDMA_DRIVER_BNXT_RE; 670 ibdev->driver_id = RDMA_DRIVER_BNXT_RE;
643 return ib_register_device(ibdev, NULL); 671 return ib_register_device(ibdev, "bnxt_re%d", NULL);
644}
645
646static ssize_t show_rev(struct device *device, struct device_attribute *attr,
647 char *buf)
648{
649 struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
650
651 return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
652}
653
654static ssize_t show_hca(struct device *device, struct device_attribute *attr,
655 char *buf)
656{
657 struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
658
659 return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
660} 672}
661 673
662static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL);
663static DEVICE_ATTR(hca_type, 0444, show_hca, NULL);
664
665static struct device_attribute *bnxt_re_attributes[] = {
666 &dev_attr_hw_rev,
667 &dev_attr_hca_type
668};
669
670static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev) 674static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
671{ 675{
672 dev_put(rdev->netdev); 676 dev_put(rdev->netdev);
@@ -864,10 +868,8 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
864{ 868{
865 int i; 869 int i;
866 870
867 if (rdev->nq[0].hwq.max_elements) { 871 for (i = 1; i < rdev->num_msix; i++)
868 for (i = 1; i < rdev->num_msix; i++) 872 bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
869 bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
870 }
871 873
872 if (rdev->qplib_res.rcfw) 874 if (rdev->qplib_res.rcfw)
873 bnxt_qplib_cleanup_res(&rdev->qplib_res); 875 bnxt_qplib_cleanup_res(&rdev->qplib_res);
@@ -876,6 +878,7 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
876static int bnxt_re_init_res(struct bnxt_re_dev *rdev) 878static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
877{ 879{
878 int rc = 0, i; 880 int rc = 0, i;
881 int num_vec_enabled = 0;
879 882
880 bnxt_qplib_init_res(&rdev->qplib_res); 883 bnxt_qplib_init_res(&rdev->qplib_res);
881 884
@@ -891,9 +894,13 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
891 "Failed to enable NQ with rc = 0x%x", rc); 894 "Failed to enable NQ with rc = 0x%x", rc);
892 goto fail; 895 goto fail;
893 } 896 }
897 num_vec_enabled++;
894 } 898 }
895 return 0; 899 return 0;
896fail: 900fail:
901 for (i = num_vec_enabled; i >= 0; i--)
902 bnxt_qplib_disable_nq(&rdev->nq[i]);
903
897 return rc; 904 return rc;
898} 905}
899 906
@@ -925,6 +932,7 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
925static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) 932static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
926{ 933{
927 int rc = 0, i; 934 int rc = 0, i;
935 int num_vec_created = 0;
928 936
929 /* Configure and allocate resources for qplib */ 937 /* Configure and allocate resources for qplib */
930 rdev->qplib_res.rcfw = &rdev->rcfw; 938 rdev->qplib_res.rcfw = &rdev->rcfw;
@@ -951,7 +959,7 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
951 if (rc) { 959 if (rc) {
952 dev_err(rdev_to_dev(rdev), "Alloc Failed NQ%d rc:%#x", 960 dev_err(rdev_to_dev(rdev), "Alloc Failed NQ%d rc:%#x",
953 i, rc); 961 i, rc);
954 goto dealloc_dpi; 962 goto free_nq;
955 } 963 }
956 rc = bnxt_re_net_ring_alloc 964 rc = bnxt_re_net_ring_alloc
957 (rdev, rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr, 965 (rdev, rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr,
@@ -964,14 +972,17 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
964 dev_err(rdev_to_dev(rdev), 972 dev_err(rdev_to_dev(rdev),
965 "Failed to allocate NQ fw id with rc = 0x%x", 973 "Failed to allocate NQ fw id with rc = 0x%x",
966 rc); 974 rc);
975 bnxt_qplib_free_nq(&rdev->nq[i]);
967 goto free_nq; 976 goto free_nq;
968 } 977 }
978 num_vec_created++;
969 } 979 }
970 return 0; 980 return 0;
971free_nq: 981free_nq:
972 for (i = 0; i < rdev->num_msix - 1; i++) 982 for (i = num_vec_created; i >= 0; i--) {
983 bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id);
973 bnxt_qplib_free_nq(&rdev->nq[i]); 984 bnxt_qplib_free_nq(&rdev->nq[i]);
974dealloc_dpi: 985 }
975 bnxt_qplib_dealloc_dpi(&rdev->qplib_res, 986 bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
976 &rdev->qplib_res.dpi_tbl, 987 &rdev->qplib_res.dpi_tbl,
977 &rdev->dpi_privileged); 988 &rdev->dpi_privileged);
@@ -989,12 +1000,17 @@ static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
989 struct ib_event ib_event; 1000 struct ib_event ib_event;
990 1001
991 ib_event.device = ibdev; 1002 ib_event.device = ibdev;
992 if (qp) 1003 if (qp) {
993 ib_event.element.qp = qp; 1004 ib_event.element.qp = qp;
994 else 1005 ib_event.event = event;
1006 if (qp->event_handler)
1007 qp->event_handler(&ib_event, qp->qp_context);
1008
1009 } else {
995 ib_event.element.port_num = port_num; 1010 ib_event.element.port_num = port_num;
996 ib_event.event = event; 1011 ib_event.event = event;
997 ib_dispatch_event(&ib_event); 1012 ib_dispatch_event(&ib_event);
1013 }
998} 1014}
999 1015
1000#define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN 0x02 1016#define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN 0x02
@@ -1189,20 +1205,20 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
1189 1205
1190static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) 1206static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
1191{ 1207{
1192 int i, rc; 1208 int rc;
1193 1209
1194 if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) { 1210 if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
1195 for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++)
1196 device_remove_file(&rdev->ibdev.dev,
1197 bnxt_re_attributes[i]);
1198 /* Cleanup ib dev */ 1211 /* Cleanup ib dev */
1199 bnxt_re_unregister_ib(rdev); 1212 bnxt_re_unregister_ib(rdev);
1200 } 1213 }
1201 if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags)) 1214 if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
1202 cancel_delayed_work(&rdev->worker); 1215 cancel_delayed_work_sync(&rdev->worker);
1203 1216
1204 bnxt_re_cleanup_res(rdev); 1217 if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED,
1205 bnxt_re_free_res(rdev); 1218 &rdev->flags))
1219 bnxt_re_cleanup_res(rdev);
1220 if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags))
1221 bnxt_re_free_res(rdev);
1206 1222
1207 if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) { 1223 if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
1208 rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw); 1224 rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
@@ -1241,7 +1257,7 @@ static void bnxt_re_worker(struct work_struct *work)
1241 1257
1242static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) 1258static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
1243{ 1259{
1244 int i, j, rc; 1260 int rc;
1245 1261
1246 bool locked; 1262 bool locked;
1247 1263
@@ -1331,12 +1347,15 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
1331 pr_err("Failed to allocate resources: %#x\n", rc); 1347 pr_err("Failed to allocate resources: %#x\n", rc);
1332 goto fail; 1348 goto fail;
1333 } 1349 }
1350 set_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags);
1334 rc = bnxt_re_init_res(rdev); 1351 rc = bnxt_re_init_res(rdev);
1335 if (rc) { 1352 if (rc) {
1336 pr_err("Failed to initialize resources: %#x\n", rc); 1353 pr_err("Failed to initialize resources: %#x\n", rc);
1337 goto fail; 1354 goto fail;
1338 } 1355 }
1339 1356
1357 set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags);
1358
1340 if (!rdev->is_virtfn) { 1359 if (!rdev->is_virtfn) {
1341 rc = bnxt_re_setup_qos(rdev); 1360 rc = bnxt_re_setup_qos(rdev);
1342 if (rc) 1361 if (rc)
@@ -1358,20 +1377,6 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
1358 } 1377 }
1359 set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags); 1378 set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
1360 dev_info(rdev_to_dev(rdev), "Device registered successfully"); 1379 dev_info(rdev_to_dev(rdev), "Device registered successfully");
1361 for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) {
1362 rc = device_create_file(&rdev->ibdev.dev,
1363 bnxt_re_attributes[i]);
1364 if (rc) {
1365 dev_err(rdev_to_dev(rdev),
1366 "Failed to create IB sysfs: %#x", rc);
1367 /* Must clean up all created device files */
1368 for (j = 0; j < i; j++)
1369 device_remove_file(&rdev->ibdev.dev,
1370 bnxt_re_attributes[j]);
1371 bnxt_re_unregister_ib(rdev);
1372 goto fail;
1373 }
1374 }
1375 ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed, 1380 ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
1376 &rdev->active_width); 1381 &rdev->active_width);
1377 set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags); 1382 set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 6ad0d46ab879..b98b054148cd 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -36,6 +36,8 @@
36 * Description: Fast Path Operators 36 * Description: Fast Path Operators
37 */ 37 */
38 38
39#define dev_fmt(fmt) "QPLIB: " fmt
40
39#include <linux/interrupt.h> 41#include <linux/interrupt.h>
40#include <linux/spinlock.h> 42#include <linux/spinlock.h>
41#include <linux/sched.h> 43#include <linux/sched.h>
@@ -71,8 +73,7 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
71 73
72 if (!qp->sq.flushed) { 74 if (!qp->sq.flushed) {
73 dev_dbg(&scq->hwq.pdev->dev, 75 dev_dbg(&scq->hwq.pdev->dev,
74 "QPLIB: FP: Adding to SQ Flush list = %p", 76 "FP: Adding to SQ Flush list = %p\n", qp);
75 qp);
76 bnxt_qplib_cancel_phantom_processing(qp); 77 bnxt_qplib_cancel_phantom_processing(qp);
77 list_add_tail(&qp->sq_flush, &scq->sqf_head); 78 list_add_tail(&qp->sq_flush, &scq->sqf_head);
78 qp->sq.flushed = true; 79 qp->sq.flushed = true;
@@ -80,8 +81,7 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
80 if (!qp->srq) { 81 if (!qp->srq) {
81 if (!qp->rq.flushed) { 82 if (!qp->rq.flushed) {
82 dev_dbg(&rcq->hwq.pdev->dev, 83 dev_dbg(&rcq->hwq.pdev->dev,
83 "QPLIB: FP: Adding to RQ Flush list = %p", 84 "FP: Adding to RQ Flush list = %p\n", qp);
84 qp);
85 list_add_tail(&qp->rq_flush, &rcq->rqf_head); 85 list_add_tail(&qp->rq_flush, &rcq->rqf_head);
86 qp->rq.flushed = true; 86 qp->rq.flushed = true;
87 } 87 }
@@ -207,7 +207,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
207 if (!qp->sq_hdr_buf) { 207 if (!qp->sq_hdr_buf) {
208 rc = -ENOMEM; 208 rc = -ENOMEM;
209 dev_err(&res->pdev->dev, 209 dev_err(&res->pdev->dev,
210 "QPLIB: Failed to create sq_hdr_buf"); 210 "Failed to create sq_hdr_buf\n");
211 goto fail; 211 goto fail;
212 } 212 }
213 } 213 }
@@ -221,7 +221,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
221 if (!qp->rq_hdr_buf) { 221 if (!qp->rq_hdr_buf) {
222 rc = -ENOMEM; 222 rc = -ENOMEM;
223 dev_err(&res->pdev->dev, 223 dev_err(&res->pdev->dev,
224 "QPLIB: Failed to create rq_hdr_buf"); 224 "Failed to create rq_hdr_buf\n");
225 goto fail; 225 goto fail;
226 } 226 }
227 } 227 }
@@ -277,8 +277,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
277 num_cqne_processed++; 277 num_cqne_processed++;
278 else 278 else
279 dev_warn(&nq->pdev->dev, 279 dev_warn(&nq->pdev->dev,
280 "QPLIB: cqn - type 0x%x not handled", 280 "cqn - type 0x%x not handled\n", type);
281 type);
282 spin_unlock_bh(&cq->compl_lock); 281 spin_unlock_bh(&cq->compl_lock);
283 break; 282 break;
284 } 283 }
@@ -298,7 +297,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
298 num_srqne_processed++; 297 num_srqne_processed++;
299 else 298 else
300 dev_warn(&nq->pdev->dev, 299 dev_warn(&nq->pdev->dev,
301 "QPLIB: SRQ event 0x%x not handled", 300 "SRQ event 0x%x not handled\n",
302 nqsrqe->event); 301 nqsrqe->event);
303 break; 302 break;
304 } 303 }
@@ -306,8 +305,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
306 break; 305 break;
307 default: 306 default:
308 dev_warn(&nq->pdev->dev, 307 dev_warn(&nq->pdev->dev,
309 "QPLIB: nqe with type = 0x%x not handled", 308 "nqe with type = 0x%x not handled\n", type);
310 type);
311 break; 309 break;
312 } 310 }
313 raw_cons++; 311 raw_cons++;
@@ -360,7 +358,8 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
360 } 358 }
361 359
362 /* Make sure the HW is stopped! */ 360 /* Make sure the HW is stopped! */
363 bnxt_qplib_nq_stop_irq(nq, true); 361 if (nq->requested)
362 bnxt_qplib_nq_stop_irq(nq, true);
364 363
365 if (nq->bar_reg_iomem) 364 if (nq->bar_reg_iomem)
366 iounmap(nq->bar_reg_iomem); 365 iounmap(nq->bar_reg_iomem);
@@ -396,7 +395,7 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
396 rc = irq_set_affinity_hint(nq->vector, &nq->mask); 395 rc = irq_set_affinity_hint(nq->vector, &nq->mask);
397 if (rc) { 396 if (rc) {
398 dev_warn(&nq->pdev->dev, 397 dev_warn(&nq->pdev->dev,
399 "QPLIB: set affinity failed; vector: %d nq_idx: %d\n", 398 "set affinity failed; vector: %d nq_idx: %d\n",
400 nq->vector, nq_indx); 399 nq->vector, nq_indx);
401 } 400 }
402 nq->requested = true; 401 nq->requested = true;
@@ -443,7 +442,7 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
443 rc = bnxt_qplib_nq_start_irq(nq, nq_idx, msix_vector, true); 442 rc = bnxt_qplib_nq_start_irq(nq, nq_idx, msix_vector, true);
444 if (rc) { 443 if (rc) {
445 dev_err(&nq->pdev->dev, 444 dev_err(&nq->pdev->dev,
446 "QPLIB: Failed to request irq for nq-idx %d", nq_idx); 445 "Failed to request irq for nq-idx %d\n", nq_idx);
447 goto fail; 446 goto fail;
448 } 447 }
449 448
@@ -662,8 +661,8 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
662 661
663 spin_lock(&srq_hwq->lock); 662 spin_lock(&srq_hwq->lock);
664 if (srq->start_idx == srq->last_idx) { 663 if (srq->start_idx == srq->last_idx) {
665 dev_err(&srq_hwq->pdev->dev, "QPLIB: FP: SRQ (0x%x) is full!", 664 dev_err(&srq_hwq->pdev->dev,
666 srq->id); 665 "FP: SRQ (0x%x) is full!\n", srq->id);
667 rc = -EINVAL; 666 rc = -EINVAL;
668 spin_unlock(&srq_hwq->lock); 667 spin_unlock(&srq_hwq->lock);
669 goto done; 668 goto done;
@@ -1324,7 +1323,7 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
1324 } 1323 }
1325 } 1324 }
1326 if (i == res->sgid_tbl.max) 1325 if (i == res->sgid_tbl.max)
1327 dev_warn(&res->pdev->dev, "QPLIB: SGID not found??"); 1326 dev_warn(&res->pdev->dev, "SGID not found??\n");
1328 1327
1329 qp->ah.hop_limit = sb->hop_limit; 1328 qp->ah.hop_limit = sb->hop_limit;
1330 qp->ah.traffic_class = sb->traffic_class; 1329 qp->ah.traffic_class = sb->traffic_class;
@@ -1536,7 +1535,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
1536 1535
1537 if (bnxt_qplib_queue_full(sq)) { 1536 if (bnxt_qplib_queue_full(sq)) {
1538 dev_err(&sq->hwq.pdev->dev, 1537 dev_err(&sq->hwq.pdev->dev,
1539 "QPLIB: prod = %#x cons = %#x qdepth = %#x delta = %#x", 1538 "prod = %#x cons = %#x qdepth = %#x delta = %#x\n",
1540 sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements, 1539 sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements,
1541 sq->q_full_delta); 1540 sq->q_full_delta);
1542 rc = -ENOMEM; 1541 rc = -ENOMEM;
@@ -1561,7 +1560,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
1561 /* Copy the inline data */ 1560 /* Copy the inline data */
1562 if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) { 1561 if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
1563 dev_warn(&sq->hwq.pdev->dev, 1562 dev_warn(&sq->hwq.pdev->dev,
1564 "QPLIB: Inline data length > 96 detected"); 1563 "Inline data length > 96 detected\n");
1565 data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH; 1564 data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH;
1566 } else { 1565 } else {
1567 data_len = wqe->inline_len; 1566 data_len = wqe->inline_len;
@@ -1776,7 +1775,7 @@ done:
1776 queue_work(qp->scq->nq->cqn_wq, &nq_work->work); 1775 queue_work(qp->scq->nq->cqn_wq, &nq_work->work);
1777 } else { 1776 } else {
1778 dev_err(&sq->hwq.pdev->dev, 1777 dev_err(&sq->hwq.pdev->dev,
1779 "QPLIB: FP: Failed to allocate SQ nq_work!"); 1778 "FP: Failed to allocate SQ nq_work!\n");
1780 rc = -ENOMEM; 1779 rc = -ENOMEM;
1781 } 1780 }
1782 } 1781 }
@@ -1815,13 +1814,12 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
1815 if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { 1814 if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
1816 sch_handler = true; 1815 sch_handler = true;
1817 dev_dbg(&rq->hwq.pdev->dev, 1816 dev_dbg(&rq->hwq.pdev->dev,
1818 "%s Error QP. Scheduling for poll_cq\n", 1817 "%s: Error QP. Scheduling for poll_cq\n", __func__);
1819 __func__);
1820 goto queue_err; 1818 goto queue_err;
1821 } 1819 }
1822 if (bnxt_qplib_queue_full(rq)) { 1820 if (bnxt_qplib_queue_full(rq)) {
1823 dev_err(&rq->hwq.pdev->dev, 1821 dev_err(&rq->hwq.pdev->dev,
1824 "QPLIB: FP: QP (0x%x) RQ is full!", qp->id); 1822 "FP: QP (0x%x) RQ is full!\n", qp->id);
1825 rc = -EINVAL; 1823 rc = -EINVAL;
1826 goto done; 1824 goto done;
1827 } 1825 }
@@ -1870,7 +1868,7 @@ queue_err:
1870 queue_work(qp->rcq->nq->cqn_wq, &nq_work->work); 1868 queue_work(qp->rcq->nq->cqn_wq, &nq_work->work);
1871 } else { 1869 } else {
1872 dev_err(&rq->hwq.pdev->dev, 1870 dev_err(&rq->hwq.pdev->dev,
1873 "QPLIB: FP: Failed to allocate RQ nq_work!"); 1871 "FP: Failed to allocate RQ nq_work!\n");
1874 rc = -ENOMEM; 1872 rc = -ENOMEM;
1875 } 1873 }
1876 } 1874 }
@@ -1932,7 +1930,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
1932 1930
1933 if (!cq->dpi) { 1931 if (!cq->dpi) {
1934 dev_err(&rcfw->pdev->dev, 1932 dev_err(&rcfw->pdev->dev,
1935 "QPLIB: FP: CREATE_CQ failed due to NULL DPI"); 1933 "FP: CREATE_CQ failed due to NULL DPI\n");
1936 return -EINVAL; 1934 return -EINVAL;
1937 } 1935 }
1938 req.dpi = cpu_to_le32(cq->dpi->dpi); 1936 req.dpi = cpu_to_le32(cq->dpi->dpi);
@@ -1969,6 +1967,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
1969 INIT_LIST_HEAD(&cq->sqf_head); 1967 INIT_LIST_HEAD(&cq->sqf_head);
1970 INIT_LIST_HEAD(&cq->rqf_head); 1968 INIT_LIST_HEAD(&cq->rqf_head);
1971 spin_lock_init(&cq->compl_lock); 1969 spin_lock_init(&cq->compl_lock);
1970 spin_lock_init(&cq->flush_lock);
1972 1971
1973 bnxt_qplib_arm_cq_enable(cq); 1972 bnxt_qplib_arm_cq_enable(cq);
1974 return 0; 1973 return 0;
@@ -2172,7 +2171,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
2172 * comes back 2171 * comes back
2173 */ 2172 */
2174 dev_dbg(&cq->hwq.pdev->dev, 2173 dev_dbg(&cq->hwq.pdev->dev,
2175 "FP:Got Phantom CQE"); 2174 "FP: Got Phantom CQE\n");
2176 sq->condition = false; 2175 sq->condition = false;
2177 sq->single = true; 2176 sq->single = true;
2178 rc = 0; 2177 rc = 0;
@@ -2189,7 +2188,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
2189 peek_raw_cq_cons++; 2188 peek_raw_cq_cons++;
2190 } 2189 }
2191 dev_err(&cq->hwq.pdev->dev, 2190 dev_err(&cq->hwq.pdev->dev,
2192 "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x", 2191 "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n",
2193 cq_cons, qp->id, sw_sq_cons, cqe_sq_cons); 2192 cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
2194 rc = -EINVAL; 2193 rc = -EINVAL;
2195 } 2194 }
@@ -2213,7 +2212,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
2213 le64_to_cpu(hwcqe->qp_handle)); 2212 le64_to_cpu(hwcqe->qp_handle));
2214 if (!qp) { 2213 if (!qp) {
2215 dev_err(&cq->hwq.pdev->dev, 2214 dev_err(&cq->hwq.pdev->dev,
2216 "QPLIB: FP: Process Req qp is NULL"); 2215 "FP: Process Req qp is NULL\n");
2217 return -EINVAL; 2216 return -EINVAL;
2218 } 2217 }
2219 sq = &qp->sq; 2218 sq = &qp->sq;
@@ -2221,16 +2220,14 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
2221 cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq); 2220 cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
2222 if (cqe_sq_cons > sq->hwq.max_elements) { 2221 if (cqe_sq_cons > sq->hwq.max_elements) {
2223 dev_err(&cq->hwq.pdev->dev, 2222 dev_err(&cq->hwq.pdev->dev,
2224 "QPLIB: FP: CQ Process req reported "); 2223 "FP: CQ Process req reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
2225 dev_err(&cq->hwq.pdev->dev,
2226 "QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
2227 cqe_sq_cons, sq->hwq.max_elements); 2224 cqe_sq_cons, sq->hwq.max_elements);
2228 return -EINVAL; 2225 return -EINVAL;
2229 } 2226 }
2230 2227
2231 if (qp->sq.flushed) { 2228 if (qp->sq.flushed) {
2232 dev_dbg(&cq->hwq.pdev->dev, 2229 dev_dbg(&cq->hwq.pdev->dev,
2233 "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); 2230 "%s: QP in Flush QP = %p\n", __func__, qp);
2234 goto done; 2231 goto done;
2235 } 2232 }
2236 /* Require to walk the sq's swq to fabricate CQEs for all previously 2233 /* Require to walk the sq's swq to fabricate CQEs for all previously
@@ -2262,9 +2259,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
2262 hwcqe->status != CQ_REQ_STATUS_OK) { 2259 hwcqe->status != CQ_REQ_STATUS_OK) {
2263 cqe->status = hwcqe->status; 2260 cqe->status = hwcqe->status;
2264 dev_err(&cq->hwq.pdev->dev, 2261 dev_err(&cq->hwq.pdev->dev,
2265 "QPLIB: FP: CQ Processed Req "); 2262 "FP: CQ Processed Req wr_id[%d] = 0x%llx with status 0x%x\n",
2266 dev_err(&cq->hwq.pdev->dev,
2267 "QPLIB: wr_id[%d] = 0x%llx with status 0x%x",
2268 sw_sq_cons, cqe->wr_id, cqe->status); 2263 sw_sq_cons, cqe->wr_id, cqe->status);
2269 cqe++; 2264 cqe++;
2270 (*budget)--; 2265 (*budget)--;
@@ -2330,12 +2325,12 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
2330 qp = (struct bnxt_qplib_qp *)((unsigned long) 2325 qp = (struct bnxt_qplib_qp *)((unsigned long)
2331 le64_to_cpu(hwcqe->qp_handle)); 2326 le64_to_cpu(hwcqe->qp_handle));
2332 if (!qp) { 2327 if (!qp) {
2333 dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq RC qp is NULL"); 2328 dev_err(&cq->hwq.pdev->dev, "process_cq RC qp is NULL\n");
2334 return -EINVAL; 2329 return -EINVAL;
2335 } 2330 }
2336 if (qp->rq.flushed) { 2331 if (qp->rq.flushed) {
2337 dev_dbg(&cq->hwq.pdev->dev, 2332 dev_dbg(&cq->hwq.pdev->dev,
2338 "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); 2333 "%s: QP in Flush QP = %p\n", __func__, qp);
2339 goto done; 2334 goto done;
2340 } 2335 }
2341 2336
@@ -2356,9 +2351,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
2356 return -EINVAL; 2351 return -EINVAL;
2357 if (wr_id_idx >= srq->hwq.max_elements) { 2352 if (wr_id_idx >= srq->hwq.max_elements) {
2358 dev_err(&cq->hwq.pdev->dev, 2353 dev_err(&cq->hwq.pdev->dev,
2359 "QPLIB: FP: CQ Process RC "); 2354 "FP: CQ Process RC wr_id idx 0x%x exceeded SRQ max 0x%x\n",
2360 dev_err(&cq->hwq.pdev->dev,
2361 "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
2362 wr_id_idx, srq->hwq.max_elements); 2355 wr_id_idx, srq->hwq.max_elements);
2363 return -EINVAL; 2356 return -EINVAL;
2364 } 2357 }
@@ -2371,9 +2364,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
2371 rq = &qp->rq; 2364 rq = &qp->rq;
2372 if (wr_id_idx >= rq->hwq.max_elements) { 2365 if (wr_id_idx >= rq->hwq.max_elements) {
2373 dev_err(&cq->hwq.pdev->dev, 2366 dev_err(&cq->hwq.pdev->dev,
2374 "QPLIB: FP: CQ Process RC "); 2367 "FP: CQ Process RC wr_id idx 0x%x exceeded RQ max 0x%x\n",
2375 dev_err(&cq->hwq.pdev->dev,
2376 "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
2377 wr_id_idx, rq->hwq.max_elements); 2368 wr_id_idx, rq->hwq.max_elements);
2378 return -EINVAL; 2369 return -EINVAL;
2379 } 2370 }
@@ -2409,12 +2400,12 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
2409 qp = (struct bnxt_qplib_qp *)((unsigned long) 2400 qp = (struct bnxt_qplib_qp *)((unsigned long)
2410 le64_to_cpu(hwcqe->qp_handle)); 2401 le64_to_cpu(hwcqe->qp_handle));
2411 if (!qp) { 2402 if (!qp) {
2412 dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq UD qp is NULL"); 2403 dev_err(&cq->hwq.pdev->dev, "process_cq UD qp is NULL\n");
2413 return -EINVAL; 2404 return -EINVAL;
2414 } 2405 }
2415 if (qp->rq.flushed) { 2406 if (qp->rq.flushed) {
2416 dev_dbg(&cq->hwq.pdev->dev, 2407 dev_dbg(&cq->hwq.pdev->dev,
2417 "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); 2408 "%s: QP in Flush QP = %p\n", __func__, qp);
2418 goto done; 2409 goto done;
2419 } 2410 }
2420 cqe = *pcqe; 2411 cqe = *pcqe;
@@ -2439,9 +2430,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
2439 2430
2440 if (wr_id_idx >= srq->hwq.max_elements) { 2431 if (wr_id_idx >= srq->hwq.max_elements) {
2441 dev_err(&cq->hwq.pdev->dev, 2432 dev_err(&cq->hwq.pdev->dev,
2442 "QPLIB: FP: CQ Process UD "); 2433 "FP: CQ Process UD wr_id idx 0x%x exceeded SRQ max 0x%x\n",
2443 dev_err(&cq->hwq.pdev->dev,
2444 "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
2445 wr_id_idx, srq->hwq.max_elements); 2434 wr_id_idx, srq->hwq.max_elements);
2446 return -EINVAL; 2435 return -EINVAL;
2447 } 2436 }
@@ -2454,9 +2443,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
2454 rq = &qp->rq; 2443 rq = &qp->rq;
2455 if (wr_id_idx >= rq->hwq.max_elements) { 2444 if (wr_id_idx >= rq->hwq.max_elements) {
2456 dev_err(&cq->hwq.pdev->dev, 2445 dev_err(&cq->hwq.pdev->dev,
2457 "QPLIB: FP: CQ Process UD "); 2446 "FP: CQ Process UD wr_id idx 0x%x exceeded RQ max 0x%x\n",
2458 dev_err(&cq->hwq.pdev->dev,
2459 "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
2460 wr_id_idx, rq->hwq.max_elements); 2447 wr_id_idx, rq->hwq.max_elements);
2461 return -EINVAL; 2448 return -EINVAL;
2462 } 2449 }
@@ -2508,13 +2495,12 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
2508 qp = (struct bnxt_qplib_qp *)((unsigned long) 2495 qp = (struct bnxt_qplib_qp *)((unsigned long)
2509 le64_to_cpu(hwcqe->qp_handle)); 2496 le64_to_cpu(hwcqe->qp_handle));
2510 if (!qp) { 2497 if (!qp) {
2511 dev_err(&cq->hwq.pdev->dev, 2498 dev_err(&cq->hwq.pdev->dev, "process_cq Raw/QP1 qp is NULL\n");
2512 "QPLIB: process_cq Raw/QP1 qp is NULL");
2513 return -EINVAL; 2499 return -EINVAL;
2514 } 2500 }
2515 if (qp->rq.flushed) { 2501 if (qp->rq.flushed) {
2516 dev_dbg(&cq->hwq.pdev->dev, 2502 dev_dbg(&cq->hwq.pdev->dev,
2517 "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); 2503 "%s: QP in Flush QP = %p\n", __func__, qp);
2518 goto done; 2504 goto done;
2519 } 2505 }
2520 cqe = *pcqe; 2506 cqe = *pcqe;
@@ -2543,14 +2529,12 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
2543 srq = qp->srq; 2529 srq = qp->srq;
2544 if (!srq) { 2530 if (!srq) {
2545 dev_err(&cq->hwq.pdev->dev, 2531 dev_err(&cq->hwq.pdev->dev,
2546 "QPLIB: FP: SRQ used but not defined??"); 2532 "FP: SRQ used but not defined??\n");
2547 return -EINVAL; 2533 return -EINVAL;
2548 } 2534 }
2549 if (wr_id_idx >= srq->hwq.max_elements) { 2535 if (wr_id_idx >= srq->hwq.max_elements) {
2550 dev_err(&cq->hwq.pdev->dev, 2536 dev_err(&cq->hwq.pdev->dev,
2551 "QPLIB: FP: CQ Process Raw/QP1 "); 2537 "FP: CQ Process Raw/QP1 wr_id idx 0x%x exceeded SRQ max 0x%x\n",
2552 dev_err(&cq->hwq.pdev->dev,
2553 "QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
2554 wr_id_idx, srq->hwq.max_elements); 2538 wr_id_idx, srq->hwq.max_elements);
2555 return -EINVAL; 2539 return -EINVAL;
2556 } 2540 }
@@ -2563,9 +2547,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
2563 rq = &qp->rq; 2547 rq = &qp->rq;
2564 if (wr_id_idx >= rq->hwq.max_elements) { 2548 if (wr_id_idx >= rq->hwq.max_elements) {
2565 dev_err(&cq->hwq.pdev->dev, 2549 dev_err(&cq->hwq.pdev->dev,
2566 "QPLIB: FP: CQ Process Raw/QP1 RQ wr_id "); 2550 "FP: CQ Process Raw/QP1 RQ wr_id idx 0x%x exceeded RQ max 0x%x\n",
2567 dev_err(&cq->hwq.pdev->dev,
2568 "QPLIB: ix 0x%x exceeded RQ max 0x%x",
2569 wr_id_idx, rq->hwq.max_elements); 2551 wr_id_idx, rq->hwq.max_elements);
2570 return -EINVAL; 2552 return -EINVAL;
2571 } 2553 }
@@ -2600,14 +2582,14 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
2600 /* Check the Status */ 2582 /* Check the Status */
2601 if (hwcqe->status != CQ_TERMINAL_STATUS_OK) 2583 if (hwcqe->status != CQ_TERMINAL_STATUS_OK)
2602 dev_warn(&cq->hwq.pdev->dev, 2584 dev_warn(&cq->hwq.pdev->dev,
2603 "QPLIB: FP: CQ Process Terminal Error status = 0x%x", 2585 "FP: CQ Process Terminal Error status = 0x%x\n",
2604 hwcqe->status); 2586 hwcqe->status);
2605 2587
2606 qp = (struct bnxt_qplib_qp *)((unsigned long) 2588 qp = (struct bnxt_qplib_qp *)((unsigned long)
2607 le64_to_cpu(hwcqe->qp_handle)); 2589 le64_to_cpu(hwcqe->qp_handle));
2608 if (!qp) { 2590 if (!qp) {
2609 dev_err(&cq->hwq.pdev->dev, 2591 dev_err(&cq->hwq.pdev->dev,
2610 "QPLIB: FP: CQ Process terminal qp is NULL"); 2592 "FP: CQ Process terminal qp is NULL\n");
2611 return -EINVAL; 2593 return -EINVAL;
2612 } 2594 }
2613 2595
@@ -2623,16 +2605,14 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
2623 2605
2624 if (cqe_cons > sq->hwq.max_elements) { 2606 if (cqe_cons > sq->hwq.max_elements) {
2625 dev_err(&cq->hwq.pdev->dev, 2607 dev_err(&cq->hwq.pdev->dev,
2626 "QPLIB: FP: CQ Process terminal reported "); 2608 "FP: CQ Process terminal reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
2627 dev_err(&cq->hwq.pdev->dev,
2628 "QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
2629 cqe_cons, sq->hwq.max_elements); 2609 cqe_cons, sq->hwq.max_elements);
2630 goto do_rq; 2610 goto do_rq;
2631 } 2611 }
2632 2612
2633 if (qp->sq.flushed) { 2613 if (qp->sq.flushed) {
2634 dev_dbg(&cq->hwq.pdev->dev, 2614 dev_dbg(&cq->hwq.pdev->dev,
2635 "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); 2615 "%s: QP in Flush QP = %p\n", __func__, qp);
2636 goto sq_done; 2616 goto sq_done;
2637 } 2617 }
2638 2618
@@ -2673,16 +2653,14 @@ do_rq:
2673 goto done; 2653 goto done;
2674 } else if (cqe_cons > rq->hwq.max_elements) { 2654 } else if (cqe_cons > rq->hwq.max_elements) {
2675 dev_err(&cq->hwq.pdev->dev, 2655 dev_err(&cq->hwq.pdev->dev,
2676 "QPLIB: FP: CQ Processed terminal "); 2656 "FP: CQ Processed terminal reported rq_cons_idx 0x%x exceeds max 0x%x\n",
2677 dev_err(&cq->hwq.pdev->dev,
2678 "QPLIB: reported rq_cons_idx 0x%x exceeds max 0x%x",
2679 cqe_cons, rq->hwq.max_elements); 2657 cqe_cons, rq->hwq.max_elements);
2680 goto done; 2658 goto done;
2681 } 2659 }
2682 2660
2683 if (qp->rq.flushed) { 2661 if (qp->rq.flushed) {
2684 dev_dbg(&cq->hwq.pdev->dev, 2662 dev_dbg(&cq->hwq.pdev->dev,
2685 "%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); 2663 "%s: QP in Flush QP = %p\n", __func__, qp);
2686 rc = 0; 2664 rc = 0;
2687 goto done; 2665 goto done;
2688 } 2666 }
@@ -2704,7 +2682,7 @@ static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq,
2704 /* Check the Status */ 2682 /* Check the Status */
2705 if (hwcqe->status != CQ_CUTOFF_STATUS_OK) { 2683 if (hwcqe->status != CQ_CUTOFF_STATUS_OK) {
2706 dev_err(&cq->hwq.pdev->dev, 2684 dev_err(&cq->hwq.pdev->dev,
2707 "QPLIB: FP: CQ Process Cutoff Error status = 0x%x", 2685 "FP: CQ Process Cutoff Error status = 0x%x\n",
2708 hwcqe->status); 2686 hwcqe->status);
2709 return -EINVAL; 2687 return -EINVAL;
2710 } 2688 }
@@ -2724,16 +2702,12 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
2724 2702
2725 spin_lock_irqsave(&cq->flush_lock, flags); 2703 spin_lock_irqsave(&cq->flush_lock, flags);
2726 list_for_each_entry(qp, &cq->sqf_head, sq_flush) { 2704 list_for_each_entry(qp, &cq->sqf_head, sq_flush) {
2727 dev_dbg(&cq->hwq.pdev->dev, 2705 dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing SQ QP= %p\n", qp);
2728 "QPLIB: FP: Flushing SQ QP= %p",
2729 qp);
2730 __flush_sq(&qp->sq, qp, &cqe, &budget); 2706 __flush_sq(&qp->sq, qp, &cqe, &budget);
2731 } 2707 }
2732 2708
2733 list_for_each_entry(qp, &cq->rqf_head, rq_flush) { 2709 list_for_each_entry(qp, &cq->rqf_head, rq_flush) {
2734 dev_dbg(&cq->hwq.pdev->dev, 2710 dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing RQ QP= %p\n", qp);
2735 "QPLIB: FP: Flushing RQ QP= %p",
2736 qp);
2737 __flush_rq(&qp->rq, qp, &cqe, &budget); 2711 __flush_rq(&qp->rq, qp, &cqe, &budget);
2738 } 2712 }
2739 spin_unlock_irqrestore(&cq->flush_lock, flags); 2713 spin_unlock_irqrestore(&cq->flush_lock, flags);
@@ -2801,7 +2775,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
2801 goto exit; 2775 goto exit;
2802 default: 2776 default:
2803 dev_err(&cq->hwq.pdev->dev, 2777 dev_err(&cq->hwq.pdev->dev,
2804 "QPLIB: process_cq unknown type 0x%lx", 2778 "process_cq unknown type 0x%lx\n",
2805 hw_cqe->cqe_type_toggle & 2779 hw_cqe->cqe_type_toggle &
2806 CQ_BASE_CQE_TYPE_MASK); 2780 CQ_BASE_CQE_TYPE_MASK);
2807 rc = -EINVAL; 2781 rc = -EINVAL;
@@ -2814,7 +2788,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
2814 * next one 2788 * next one
2815 */ 2789 */
2816 dev_err(&cq->hwq.pdev->dev, 2790 dev_err(&cq->hwq.pdev->dev,
2817 "QPLIB: process_cqe error rc = 0x%x", rc); 2791 "process_cqe error rc = 0x%x\n", rc);
2818 } 2792 }
2819 raw_cons++; 2793 raw_cons++;
2820 } 2794 }
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 2852d350ada1..be4e33e9f962 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -35,6 +35,9 @@
35 * 35 *
36 * Description: RDMA Controller HW interface 36 * Description: RDMA Controller HW interface
37 */ 37 */
38
39#define dev_fmt(fmt) "QPLIB: " fmt
40
38#include <linux/interrupt.h> 41#include <linux/interrupt.h>
39#include <linux/spinlock.h> 42#include <linux/spinlock.h>
40#include <linux/pci.h> 43#include <linux/pci.h>
@@ -96,14 +99,13 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
96 opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW && 99 opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW &&
97 opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) { 100 opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) {
98 dev_err(&rcfw->pdev->dev, 101 dev_err(&rcfw->pdev->dev,
99 "QPLIB: RCFW not initialized, reject opcode 0x%x", 102 "RCFW not initialized, reject opcode 0x%x\n", opcode);
100 opcode);
101 return -EINVAL; 103 return -EINVAL;
102 } 104 }
103 105
104 if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) && 106 if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
105 opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) { 107 opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
106 dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!"); 108 dev_err(&rcfw->pdev->dev, "RCFW already initialized!\n");
107 return -EINVAL; 109 return -EINVAL;
108 } 110 }
109 111
@@ -115,7 +117,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
115 */ 117 */
116 spin_lock_irqsave(&cmdq->lock, flags); 118 spin_lock_irqsave(&cmdq->lock, flags);
117 if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) { 119 if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) {
118 dev_err(&rcfw->pdev->dev, "QPLIB: RCFW: CMDQ is full!"); 120 dev_err(&rcfw->pdev->dev, "RCFW: CMDQ is full!\n");
119 spin_unlock_irqrestore(&cmdq->lock, flags); 121 spin_unlock_irqrestore(&cmdq->lock, flags);
120 return -EAGAIN; 122 return -EAGAIN;
121 } 123 }
@@ -154,7 +156,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
154 cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)]; 156 cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)];
155 if (!cmdqe) { 157 if (!cmdqe) {
156 dev_err(&rcfw->pdev->dev, 158 dev_err(&rcfw->pdev->dev,
157 "QPLIB: RCFW request failed with no cmdqe!"); 159 "RCFW request failed with no cmdqe!\n");
158 goto done; 160 goto done;
159 } 161 }
160 /* Copy a segment of the req cmd to the cmdq */ 162 /* Copy a segment of the req cmd to the cmdq */
@@ -210,7 +212,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
210 212
211 if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) { 213 if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) {
212 /* send failed */ 214 /* send failed */
213 dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x send failed", 215 dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x send failed\n",
214 cookie, opcode); 216 cookie, opcode);
215 return rc; 217 return rc;
216 } 218 }
@@ -224,7 +226,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
224 rc = __wait_for_resp(rcfw, cookie); 226 rc = __wait_for_resp(rcfw, cookie);
225 if (rc) { 227 if (rc) {
226 /* timed out */ 228 /* timed out */
227 dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec", 229 dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x timedout (%d)msec\n",
228 cookie, opcode, RCFW_CMD_WAIT_TIME_MS); 230 cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
229 set_bit(FIRMWARE_TIMED_OUT, &rcfw->flags); 231 set_bit(FIRMWARE_TIMED_OUT, &rcfw->flags);
230 return rc; 232 return rc;
@@ -232,7 +234,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
232 234
233 if (evnt->status) { 235 if (evnt->status) {
234 /* failed with status */ 236 /* failed with status */
235 dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x status %#x", 237 dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n",
236 cookie, opcode, evnt->status); 238 cookie, opcode, evnt->status);
237 rc = -EFAULT; 239 rc = -EFAULT;
238 } 240 }
@@ -298,9 +300,9 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
298 qp_id = le32_to_cpu(err_event->xid); 300 qp_id = le32_to_cpu(err_event->xid);
299 qp = rcfw->qp_tbl[qp_id].qp_handle; 301 qp = rcfw->qp_tbl[qp_id].qp_handle;
300 dev_dbg(&rcfw->pdev->dev, 302 dev_dbg(&rcfw->pdev->dev,
301 "QPLIB: Received QP error notification"); 303 "Received QP error notification\n");
302 dev_dbg(&rcfw->pdev->dev, 304 dev_dbg(&rcfw->pdev->dev,
303 "QPLIB: qpid 0x%x, req_err=0x%x, resp_err=0x%x\n", 305 "qpid 0x%x, req_err=0x%x, resp_err=0x%x\n",
304 qp_id, err_event->req_err_state_reason, 306 qp_id, err_event->req_err_state_reason,
305 err_event->res_err_state_reason); 307 err_event->res_err_state_reason);
306 if (!qp) 308 if (!qp)
@@ -309,8 +311,17 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
309 rcfw->aeq_handler(rcfw, qp_event, qp); 311 rcfw->aeq_handler(rcfw, qp_event, qp);
310 break; 312 break;
311 default: 313 default:
312 /* Command Response */ 314 /*
313 spin_lock_irqsave(&cmdq->lock, flags); 315 * Command Response
316 * cmdq->lock needs to be acquired to synchronie
317 * the command send and completion reaping. This function
318 * is always called with creq->lock held. Using
319 * the nested variant of spin_lock.
320 *
321 */
322
323 spin_lock_irqsave_nested(&cmdq->lock, flags,
324 SINGLE_DEPTH_NESTING);
314 cookie = le16_to_cpu(qp_event->cookie); 325 cookie = le16_to_cpu(qp_event->cookie);
315 mcookie = qp_event->cookie; 326 mcookie = qp_event->cookie;
316 blocked = cookie & RCFW_CMD_IS_BLOCKING; 327 blocked = cookie & RCFW_CMD_IS_BLOCKING;
@@ -322,14 +333,16 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
322 memcpy(crsqe->resp, qp_event, sizeof(*qp_event)); 333 memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
323 crsqe->resp = NULL; 334 crsqe->resp = NULL;
324 } else { 335 } else {
325 dev_err(&rcfw->pdev->dev, 336 if (crsqe->resp && crsqe->resp->cookie)
326 "QPLIB: CMD %s resp->cookie = %#x, evnt->cookie = %#x", 337 dev_err(&rcfw->pdev->dev,
327 crsqe->resp ? "mismatch" : "collision", 338 "CMD %s cookie sent=%#x, recd=%#x\n",
328 crsqe->resp ? crsqe->resp->cookie : 0, mcookie); 339 crsqe->resp ? "mismatch" : "collision",
340 crsqe->resp ? crsqe->resp->cookie : 0,
341 mcookie);
329 } 342 }
330 if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap)) 343 if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap))
331 dev_warn(&rcfw->pdev->dev, 344 dev_warn(&rcfw->pdev->dev,
332 "QPLIB: CMD bit %d was not requested", cbit); 345 "CMD bit %d was not requested\n", cbit);
333 cmdq->cons += crsqe->req_size; 346 cmdq->cons += crsqe->req_size;
334 crsqe->req_size = 0; 347 crsqe->req_size = 0;
335 348
@@ -376,14 +389,14 @@ static void bnxt_qplib_service_creq(unsigned long data)
376 (rcfw, (struct creq_func_event *)creqe)) 389 (rcfw, (struct creq_func_event *)creqe))
377 rcfw->creq_func_event_processed++; 390 rcfw->creq_func_event_processed++;
378 else 391 else
379 dev_warn 392 dev_warn(&rcfw->pdev->dev,
380 (&rcfw->pdev->dev, "QPLIB:aeqe:%#x Not handled", 393 "aeqe:%#x Not handled\n", type);
381 type);
382 break; 394 break;
383 default: 395 default:
384 dev_warn(&rcfw->pdev->dev, "QPLIB: creqe with "); 396 if (type != ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT)
385 dev_warn(&rcfw->pdev->dev, 397 dev_warn(&rcfw->pdev->dev,
386 "QPLIB: op_event = 0x%x not handled", type); 398 "creqe with event 0x%x not handled\n",
399 type);
387 break; 400 break;
388 } 401 }
389 raw_cons++; 402 raw_cons++;
@@ -551,7 +564,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
551 BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE, 564 BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE,
552 HWQ_TYPE_L2_CMPL)) { 565 HWQ_TYPE_L2_CMPL)) {
553 dev_err(&rcfw->pdev->dev, 566 dev_err(&rcfw->pdev->dev,
554 "QPLIB: HW channel CREQ allocation failed"); 567 "HW channel CREQ allocation failed\n");
555 goto fail; 568 goto fail;
556 } 569 }
557 rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT; 570 rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT;
@@ -560,7 +573,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
560 BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE, 573 BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE,
561 HWQ_TYPE_CTX)) { 574 HWQ_TYPE_CTX)) {
562 dev_err(&rcfw->pdev->dev, 575 dev_err(&rcfw->pdev->dev,
563 "QPLIB: HW channel CMDQ allocation failed"); 576 "HW channel CMDQ allocation failed\n");
564 goto fail; 577 goto fail;
565 } 578 }
566 579
@@ -605,21 +618,18 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
605 618
606 bnxt_qplib_rcfw_stop_irq(rcfw, true); 619 bnxt_qplib_rcfw_stop_irq(rcfw, true);
607 620
608 if (rcfw->cmdq_bar_reg_iomem) 621 iounmap(rcfw->cmdq_bar_reg_iomem);
609 iounmap(rcfw->cmdq_bar_reg_iomem); 622 iounmap(rcfw->creq_bar_reg_iomem);
610 rcfw->cmdq_bar_reg_iomem = NULL;
611
612 if (rcfw->creq_bar_reg_iomem)
613 iounmap(rcfw->creq_bar_reg_iomem);
614 rcfw->creq_bar_reg_iomem = NULL;
615 623
616 indx = find_first_bit(rcfw->cmdq_bitmap, rcfw->bmap_size); 624 indx = find_first_bit(rcfw->cmdq_bitmap, rcfw->bmap_size);
617 if (indx != rcfw->bmap_size) 625 if (indx != rcfw->bmap_size)
618 dev_err(&rcfw->pdev->dev, 626 dev_err(&rcfw->pdev->dev,
619 "QPLIB: disabling RCFW with pending cmd-bit %lx", indx); 627 "disabling RCFW with pending cmd-bit %lx\n", indx);
620 kfree(rcfw->cmdq_bitmap); 628 kfree(rcfw->cmdq_bitmap);
621 rcfw->bmap_size = 0; 629 rcfw->bmap_size = 0;
622 630
631 rcfw->cmdq_bar_reg_iomem = NULL;
632 rcfw->creq_bar_reg_iomem = NULL;
623 rcfw->aeq_handler = NULL; 633 rcfw->aeq_handler = NULL;
624 rcfw->vector = 0; 634 rcfw->vector = 0;
625} 635}
@@ -681,8 +691,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
681 RCFW_COMM_BASE_OFFSET, 691 RCFW_COMM_BASE_OFFSET,
682 RCFW_COMM_SIZE); 692 RCFW_COMM_SIZE);
683 if (!rcfw->cmdq_bar_reg_iomem) { 693 if (!rcfw->cmdq_bar_reg_iomem) {
684 dev_err(&rcfw->pdev->dev, 694 dev_err(&rcfw->pdev->dev, "CMDQ BAR region %d mapping failed\n",
685 "QPLIB: CMDQ BAR region %d mapping failed",
686 rcfw->cmdq_bar_reg); 695 rcfw->cmdq_bar_reg);
687 return -ENOMEM; 696 return -ENOMEM;
688 } 697 }
@@ -697,14 +706,15 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
697 res_base = pci_resource_start(pdev, rcfw->creq_bar_reg); 706 res_base = pci_resource_start(pdev, rcfw->creq_bar_reg);
698 if (!res_base) 707 if (!res_base)
699 dev_err(&rcfw->pdev->dev, 708 dev_err(&rcfw->pdev->dev,
700 "QPLIB: CREQ BAR region %d resc start is 0!", 709 "CREQ BAR region %d resc start is 0!\n",
701 rcfw->creq_bar_reg); 710 rcfw->creq_bar_reg);
702 rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off, 711 rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off,
703 4); 712 4);
704 if (!rcfw->creq_bar_reg_iomem) { 713 if (!rcfw->creq_bar_reg_iomem) {
705 dev_err(&rcfw->pdev->dev, 714 dev_err(&rcfw->pdev->dev, "CREQ BAR region %d mapping failed\n",
706 "QPLIB: CREQ BAR region %d mapping failed",
707 rcfw->creq_bar_reg); 715 rcfw->creq_bar_reg);
716 iounmap(rcfw->cmdq_bar_reg_iomem);
717 rcfw->cmdq_bar_reg_iomem = NULL;
708 return -ENOMEM; 718 return -ENOMEM;
709 } 719 }
710 rcfw->creq_qp_event_processed = 0; 720 rcfw->creq_qp_event_processed = 0;
@@ -717,7 +727,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
717 rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_vector, true); 727 rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_vector, true);
718 if (rc) { 728 if (rc) {
719 dev_err(&rcfw->pdev->dev, 729 dev_err(&rcfw->pdev->dev,
720 "QPLIB: Failed to request IRQ for CREQ rc = 0x%x", rc); 730 "Failed to request IRQ for CREQ rc = 0x%x\n", rc);
721 bnxt_qplib_disable_rcfw_channel(rcfw); 731 bnxt_qplib_disable_rcfw_channel(rcfw);
722 return rc; 732 return rc;
723 } 733 }
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 46416dfe8830..9a8687dc0a79 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -154,6 +154,8 @@ struct bnxt_qplib_qp_node {
154 void *qp_handle; /* ptr to qplib_qp */ 154 void *qp_handle; /* ptr to qplib_qp */
155}; 155};
156 156
157#define BNXT_QPLIB_OOS_COUNT_MASK 0xFFFFFFFF
158
157/* RCFW Communication Channels */ 159/* RCFW Communication Channels */
158struct bnxt_qplib_rcfw { 160struct bnxt_qplib_rcfw {
159 struct pci_dev *pdev; 161 struct pci_dev *pdev;
@@ -190,6 +192,8 @@ struct bnxt_qplib_rcfw {
190 struct bnxt_qplib_crsq *crsqe_tbl; 192 struct bnxt_qplib_crsq *crsqe_tbl;
191 int qp_tbl_size; 193 int qp_tbl_size;
192 struct bnxt_qplib_qp_node *qp_tbl; 194 struct bnxt_qplib_qp_node *qp_tbl;
195 u64 oos_prev;
196 u32 init_oos_stats;
193}; 197};
194 198
195void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); 199void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 539a5d44e6db..59eeac55626f 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -36,6 +36,8 @@
36 * Description: QPLib resource manager 36 * Description: QPLib resource manager
37 */ 37 */
38 38
39#define dev_fmt(fmt) "QPLIB: " fmt
40
39#include <linux/spinlock.h> 41#include <linux/spinlock.h>
40#include <linux/pci.h> 42#include <linux/pci.h>
41#include <linux/interrupt.h> 43#include <linux/interrupt.h>
@@ -68,8 +70,7 @@ static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
68 pbl->pg_map_arr[i]); 70 pbl->pg_map_arr[i]);
69 else 71 else
70 dev_warn(&pdev->dev, 72 dev_warn(&pdev->dev,
71 "QPLIB: PBL free pg_arr[%d] empty?!", 73 "PBL free pg_arr[%d] empty?!\n", i);
72 i);
73 pbl->pg_arr[i] = NULL; 74 pbl->pg_arr[i] = NULL;
74 } 75 }
75 } 76 }
@@ -537,7 +538,7 @@ static void bnxt_qplib_free_pkey_tbl(struct bnxt_qplib_res *res,
537 struct bnxt_qplib_pkey_tbl *pkey_tbl) 538 struct bnxt_qplib_pkey_tbl *pkey_tbl)
538{ 539{
539 if (!pkey_tbl->tbl) 540 if (!pkey_tbl->tbl)
540 dev_dbg(&res->pdev->dev, "QPLIB: PKEY tbl not present"); 541 dev_dbg(&res->pdev->dev, "PKEY tbl not present\n");
541 else 542 else
542 kfree(pkey_tbl->tbl); 543 kfree(pkey_tbl->tbl);
543 544
@@ -578,7 +579,7 @@ int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
578 struct bnxt_qplib_pd *pd) 579 struct bnxt_qplib_pd *pd)
579{ 580{
580 if (test_and_set_bit(pd->id, pdt->tbl)) { 581 if (test_and_set_bit(pd->id, pdt->tbl)) {
581 dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d", 582 dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d\n",
582 pd->id); 583 pd->id);
583 return -EINVAL; 584 return -EINVAL;
584 } 585 }
@@ -639,11 +640,11 @@ int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
639 struct bnxt_qplib_dpi *dpi) 640 struct bnxt_qplib_dpi *dpi)
640{ 641{
641 if (dpi->dpi >= dpit->max) { 642 if (dpi->dpi >= dpit->max) {
642 dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d", dpi->dpi); 643 dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d\n", dpi->dpi);
643 return -EINVAL; 644 return -EINVAL;
644 } 645 }
645 if (test_and_set_bit(dpi->dpi, dpit->tbl)) { 646 if (test_and_set_bit(dpi->dpi, dpit->tbl)) {
646 dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d", 647 dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d\n",
647 dpi->dpi); 648 dpi->dpi);
648 return -EINVAL; 649 return -EINVAL;
649 } 650 }
@@ -673,22 +674,21 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
673 u32 dbr_len, bytes; 674 u32 dbr_len, bytes;
674 675
675 if (dpit->dbr_bar_reg_iomem) { 676 if (dpit->dbr_bar_reg_iomem) {
676 dev_err(&res->pdev->dev, 677 dev_err(&res->pdev->dev, "DBR BAR region %d already mapped\n",
677 "QPLIB: DBR BAR region %d already mapped", dbr_bar_reg); 678 dbr_bar_reg);
678 return -EALREADY; 679 return -EALREADY;
679 } 680 }
680 681
681 bar_reg_base = pci_resource_start(res->pdev, dbr_bar_reg); 682 bar_reg_base = pci_resource_start(res->pdev, dbr_bar_reg);
682 if (!bar_reg_base) { 683 if (!bar_reg_base) {
683 dev_err(&res->pdev->dev, 684 dev_err(&res->pdev->dev, "BAR region %d resc start failed\n",
684 "QPLIB: BAR region %d resc start failed", dbr_bar_reg); 685 dbr_bar_reg);
685 return -ENOMEM; 686 return -ENOMEM;
686 } 687 }
687 688
688 dbr_len = pci_resource_len(res->pdev, dbr_bar_reg) - dbr_offset; 689 dbr_len = pci_resource_len(res->pdev, dbr_bar_reg) - dbr_offset;
689 if (!dbr_len || ((dbr_len & (PAGE_SIZE - 1)) != 0)) { 690 if (!dbr_len || ((dbr_len & (PAGE_SIZE - 1)) != 0)) {
690 dev_err(&res->pdev->dev, "QPLIB: Invalid DBR length %d", 691 dev_err(&res->pdev->dev, "Invalid DBR length %d\n", dbr_len);
691 dbr_len);
692 return -ENOMEM; 692 return -ENOMEM;
693 } 693 }
694 694
@@ -696,8 +696,7 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
696 dbr_len); 696 dbr_len);
697 if (!dpit->dbr_bar_reg_iomem) { 697 if (!dpit->dbr_bar_reg_iomem) {
698 dev_err(&res->pdev->dev, 698 dev_err(&res->pdev->dev,
699 "QPLIB: FP: DBR BAR region %d mapping failed", 699 "FP: DBR BAR region %d mapping failed\n", dbr_bar_reg);
700 dbr_bar_reg);
701 return -ENOMEM; 700 return -ENOMEM;
702 } 701 }
703 702
@@ -767,7 +766,7 @@ static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
767 stats->dma = dma_alloc_coherent(&pdev->dev, stats->size, 766 stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
768 &stats->dma_map, GFP_KERNEL); 767 &stats->dma_map, GFP_KERNEL);
769 if (!stats->dma) { 768 if (!stats->dma) {
770 dev_err(&pdev->dev, "QPLIB: Stats DMA allocation failed"); 769 dev_err(&pdev->dev, "Stats DMA allocation failed\n");
771 return -ENOMEM; 770 return -ENOMEM;
772 } 771 }
773 return 0; 772 return 0;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 4097f3fa25c5..5216b5f844cc 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -36,6 +36,8 @@
36 * Description: Slow Path Operators 36 * Description: Slow Path Operators
37 */ 37 */
38 38
39#define dev_fmt(fmt) "QPLIB: " fmt
40
39#include <linux/interrupt.h> 41#include <linux/interrupt.h>
40#include <linux/spinlock.h> 42#include <linux/spinlock.h>
41#include <linux/sched.h> 43#include <linux/sched.h>
@@ -89,7 +91,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
89 sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); 91 sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
90 if (!sbuf) { 92 if (!sbuf) {
91 dev_err(&rcfw->pdev->dev, 93 dev_err(&rcfw->pdev->dev,
92 "QPLIB: SP: QUERY_FUNC alloc side buffer failed"); 94 "SP: QUERY_FUNC alloc side buffer failed\n");
93 return -ENOMEM; 95 return -ENOMEM;
94 } 96 }
95 97
@@ -135,8 +137,16 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
135 attr->max_srq = le16_to_cpu(sb->max_srq); 137 attr->max_srq = le16_to_cpu(sb->max_srq);
136 attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1; 138 attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1;
137 attr->max_srq_sges = sb->max_srq_sge; 139 attr->max_srq_sges = sb->max_srq_sge;
138 /* Bono only reports 1 PKEY for now, but it can support > 1 */
139 attr->max_pkey = le32_to_cpu(sb->max_pkeys); 140 attr->max_pkey = le32_to_cpu(sb->max_pkeys);
141 /*
142 * Some versions of FW reports more than 0xFFFF.
143 * Restrict it for now to 0xFFFF to avoid
144 * reporting trucated value
145 */
146 if (attr->max_pkey > 0xFFFF) {
147 /* ib_port_attr::pkey_tbl_len is u16 */
148 attr->max_pkey = 0xFFFF;
149 }
140 150
141 attr->max_inline_data = le32_to_cpu(sb->max_inline_data); 151 attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
142 attr->l2_db_size = (sb->l2_db_space_size + 1) * 152 attr->l2_db_size = (sb->l2_db_space_size + 1) *
@@ -186,8 +196,7 @@ int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
186 (void *)&resp, 196 (void *)&resp,
187 NULL, 0); 197 NULL, 0);
188 if (rc) { 198 if (rc) {
189 dev_err(&res->pdev->dev, 199 dev_err(&res->pdev->dev, "Failed to set function resources\n");
190 "QPLIB: Failed to set function resources");
191 } 200 }
192 return rc; 201 return rc;
193} 202}
@@ -199,7 +208,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
199{ 208{
200 if (index >= sgid_tbl->max) { 209 if (index >= sgid_tbl->max) {
201 dev_err(&res->pdev->dev, 210 dev_err(&res->pdev->dev,
202 "QPLIB: Index %d exceeded SGID table max (%d)", 211 "Index %d exceeded SGID table max (%d)\n",
203 index, sgid_tbl->max); 212 index, sgid_tbl->max);
204 return -EINVAL; 213 return -EINVAL;
205 } 214 }
@@ -217,13 +226,12 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
217 int index; 226 int index;
218 227
219 if (!sgid_tbl) { 228 if (!sgid_tbl) {
220 dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated"); 229 dev_err(&res->pdev->dev, "SGID table not allocated\n");
221 return -EINVAL; 230 return -EINVAL;
222 } 231 }
223 /* Do we need a sgid_lock here? */ 232 /* Do we need a sgid_lock here? */
224 if (!sgid_tbl->active) { 233 if (!sgid_tbl->active) {
225 dev_err(&res->pdev->dev, 234 dev_err(&res->pdev->dev, "SGID table has no active entries\n");
226 "QPLIB: SGID table has no active entries");
227 return -ENOMEM; 235 return -ENOMEM;
228 } 236 }
229 for (index = 0; index < sgid_tbl->max; index++) { 237 for (index = 0; index < sgid_tbl->max; index++) {
@@ -231,7 +239,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
231 break; 239 break;
232 } 240 }
233 if (index == sgid_tbl->max) { 241 if (index == sgid_tbl->max) {
234 dev_warn(&res->pdev->dev, "GID not found in the SGID table"); 242 dev_warn(&res->pdev->dev, "GID not found in the SGID table\n");
235 return 0; 243 return 0;
236 } 244 }
237 /* Remove GID from the SGID table */ 245 /* Remove GID from the SGID table */
@@ -244,7 +252,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
244 RCFW_CMD_PREP(req, DELETE_GID, cmd_flags); 252 RCFW_CMD_PREP(req, DELETE_GID, cmd_flags);
245 if (sgid_tbl->hw_id[index] == 0xFFFF) { 253 if (sgid_tbl->hw_id[index] == 0xFFFF) {
246 dev_err(&res->pdev->dev, 254 dev_err(&res->pdev->dev,
247 "QPLIB: GID entry contains an invalid HW id"); 255 "GID entry contains an invalid HW id\n");
248 return -EINVAL; 256 return -EINVAL;
249 } 257 }
250 req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]); 258 req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]);
@@ -258,7 +266,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
258 sgid_tbl->vlan[index] = 0; 266 sgid_tbl->vlan[index] = 0;
259 sgid_tbl->active--; 267 sgid_tbl->active--;
260 dev_dbg(&res->pdev->dev, 268 dev_dbg(&res->pdev->dev,
261 "QPLIB: SGID deleted hw_id[0x%x] = 0x%x active = 0x%x", 269 "SGID deleted hw_id[0x%x] = 0x%x active = 0x%x\n",
262 index, sgid_tbl->hw_id[index], sgid_tbl->active); 270 index, sgid_tbl->hw_id[index], sgid_tbl->active);
263 sgid_tbl->hw_id[index] = (u16)-1; 271 sgid_tbl->hw_id[index] = (u16)-1;
264 272
@@ -277,20 +285,19 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
277 int i, free_idx; 285 int i, free_idx;
278 286
279 if (!sgid_tbl) { 287 if (!sgid_tbl) {
280 dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated"); 288 dev_err(&res->pdev->dev, "SGID table not allocated\n");
281 return -EINVAL; 289 return -EINVAL;
282 } 290 }
283 /* Do we need a sgid_lock here? */ 291 /* Do we need a sgid_lock here? */
284 if (sgid_tbl->active == sgid_tbl->max) { 292 if (sgid_tbl->active == sgid_tbl->max) {
285 dev_err(&res->pdev->dev, "QPLIB: SGID table is full"); 293 dev_err(&res->pdev->dev, "SGID table is full\n");
286 return -ENOMEM; 294 return -ENOMEM;
287 } 295 }
288 free_idx = sgid_tbl->max; 296 free_idx = sgid_tbl->max;
289 for (i = 0; i < sgid_tbl->max; i++) { 297 for (i = 0; i < sgid_tbl->max; i++) {
290 if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid))) { 298 if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid))) {
291 dev_dbg(&res->pdev->dev, 299 dev_dbg(&res->pdev->dev,
292 "QPLIB: SGID entry already exist in entry %d!", 300 "SGID entry already exist in entry %d!\n", i);
293 i);
294 *index = i; 301 *index = i;
295 return -EALREADY; 302 return -EALREADY;
296 } else if (!memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero, 303 } else if (!memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero,
@@ -301,7 +308,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
301 } 308 }
302 if (free_idx == sgid_tbl->max) { 309 if (free_idx == sgid_tbl->max) {
303 dev_err(&res->pdev->dev, 310 dev_err(&res->pdev->dev,
304 "QPLIB: SGID table is FULL but count is not MAX??"); 311 "SGID table is FULL but count is not MAX??\n");
305 return -ENOMEM; 312 return -ENOMEM;
306 } 313 }
307 if (update) { 314 if (update) {
@@ -348,7 +355,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
348 sgid_tbl->vlan[free_idx] = 1; 355 sgid_tbl->vlan[free_idx] = 1;
349 356
350 dev_dbg(&res->pdev->dev, 357 dev_dbg(&res->pdev->dev,
351 "QPLIB: SGID added hw_id[0x%x] = 0x%x active = 0x%x", 358 "SGID added hw_id[0x%x] = 0x%x active = 0x%x\n",
352 free_idx, sgid_tbl->hw_id[free_idx], sgid_tbl->active); 359 free_idx, sgid_tbl->hw_id[free_idx], sgid_tbl->active);
353 360
354 *index = free_idx; 361 *index = free_idx;
@@ -404,7 +411,7 @@ int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
404 } 411 }
405 if (index >= pkey_tbl->max) { 412 if (index >= pkey_tbl->max) {
406 dev_err(&res->pdev->dev, 413 dev_err(&res->pdev->dev,
407 "QPLIB: Index %d exceeded PKEY table max (%d)", 414 "Index %d exceeded PKEY table max (%d)\n",
408 index, pkey_tbl->max); 415 index, pkey_tbl->max);
409 return -EINVAL; 416 return -EINVAL;
410 } 417 }
@@ -419,14 +426,13 @@ int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
419 int i, rc = 0; 426 int i, rc = 0;
420 427
421 if (!pkey_tbl) { 428 if (!pkey_tbl) {
422 dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated"); 429 dev_err(&res->pdev->dev, "PKEY table not allocated\n");
423 return -EINVAL; 430 return -EINVAL;
424 } 431 }
425 432
426 /* Do we need a pkey_lock here? */ 433 /* Do we need a pkey_lock here? */
427 if (!pkey_tbl->active) { 434 if (!pkey_tbl->active) {
428 dev_err(&res->pdev->dev, 435 dev_err(&res->pdev->dev, "PKEY table has no active entries\n");
429 "QPLIB: PKEY table has no active entries");
430 return -ENOMEM; 436 return -ENOMEM;
431 } 437 }
432 for (i = 0; i < pkey_tbl->max; i++) { 438 for (i = 0; i < pkey_tbl->max; i++) {
@@ -435,8 +441,7 @@ int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
435 } 441 }
436 if (i == pkey_tbl->max) { 442 if (i == pkey_tbl->max) {
437 dev_err(&res->pdev->dev, 443 dev_err(&res->pdev->dev,
438 "QPLIB: PKEY 0x%04x not found in the pkey table", 444 "PKEY 0x%04x not found in the pkey table\n", *pkey);
439 *pkey);
440 return -ENOMEM; 445 return -ENOMEM;
441 } 446 }
442 memset(&pkey_tbl->tbl[i], 0, sizeof(*pkey)); 447 memset(&pkey_tbl->tbl[i], 0, sizeof(*pkey));
@@ -453,13 +458,13 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
453 int i, free_idx, rc = 0; 458 int i, free_idx, rc = 0;
454 459
455 if (!pkey_tbl) { 460 if (!pkey_tbl) {
456 dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated"); 461 dev_err(&res->pdev->dev, "PKEY table not allocated\n");
457 return -EINVAL; 462 return -EINVAL;
458 } 463 }
459 464
460 /* Do we need a pkey_lock here? */ 465 /* Do we need a pkey_lock here? */
461 if (pkey_tbl->active == pkey_tbl->max) { 466 if (pkey_tbl->active == pkey_tbl->max) {
462 dev_err(&res->pdev->dev, "QPLIB: PKEY table is full"); 467 dev_err(&res->pdev->dev, "PKEY table is full\n");
463 return -ENOMEM; 468 return -ENOMEM;
464 } 469 }
465 free_idx = pkey_tbl->max; 470 free_idx = pkey_tbl->max;
@@ -471,7 +476,7 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
471 } 476 }
472 if (free_idx == pkey_tbl->max) { 477 if (free_idx == pkey_tbl->max) {
473 dev_err(&res->pdev->dev, 478 dev_err(&res->pdev->dev,
474 "QPLIB: PKEY table is FULL but count is not MAX??"); 479 "PKEY table is FULL but count is not MAX??\n");
475 return -ENOMEM; 480 return -ENOMEM;
476 } 481 }
477 /* Add PKEY to the pkey_tbl */ 482 /* Add PKEY to the pkey_tbl */
@@ -555,8 +560,7 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
555 int rc; 560 int rc;
556 561
557 if (mrw->lkey == 0xFFFFFFFF) { 562 if (mrw->lkey == 0xFFFFFFFF) {
558 dev_info(&res->pdev->dev, 563 dev_info(&res->pdev->dev, "SP: Free a reserved lkey MRW\n");
559 "QPLIB: SP: Free a reserved lkey MRW");
560 return 0; 564 return 0;
561 } 565 }
562 566
@@ -666,9 +670,8 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
666 pages++; 670 pages++;
667 671
668 if (pages > MAX_PBL_LVL_1_PGS) { 672 if (pages > MAX_PBL_LVL_1_PGS) {
669 dev_err(&res->pdev->dev, "QPLIB: SP: Reg MR pages ");
670 dev_err(&res->pdev->dev, 673 dev_err(&res->pdev->dev,
671 "requested (0x%x) exceeded max (0x%x)", 674 "SP: Reg MR pages requested (0x%x) exceeded max (0x%x)\n",
672 pages, MAX_PBL_LVL_1_PGS); 675 pages, MAX_PBL_LVL_1_PGS);
673 return -ENOMEM; 676 return -ENOMEM;
674 } 677 }
@@ -684,7 +687,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
684 HWQ_TYPE_CTX); 687 HWQ_TYPE_CTX);
685 if (rc) { 688 if (rc) {
686 dev_err(&res->pdev->dev, 689 dev_err(&res->pdev->dev,
687 "SP: Reg MR memory allocation failed"); 690 "SP: Reg MR memory allocation failed\n");
688 return -ENOMEM; 691 return -ENOMEM;
689 } 692 }
690 /* Write to the hwq */ 693 /* Write to the hwq */
@@ -795,7 +798,7 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
795 sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); 798 sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
796 if (!sbuf) { 799 if (!sbuf) {
797 dev_err(&rcfw->pdev->dev, 800 dev_err(&rcfw->pdev->dev,
798 "QPLIB: SP: QUERY_ROCE_STATS alloc side buffer failed"); 801 "SP: QUERY_ROCE_STATS alloc side buffer failed\n");
799 return -ENOMEM; 802 return -ENOMEM;
800 } 803 }
801 804
@@ -845,6 +848,16 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
845 stats->res_srq_load_err = le64_to_cpu(sb->res_srq_load_err); 848 stats->res_srq_load_err = le64_to_cpu(sb->res_srq_load_err);
846 stats->res_tx_pci_err = le64_to_cpu(sb->res_tx_pci_err); 849 stats->res_tx_pci_err = le64_to_cpu(sb->res_tx_pci_err);
847 stats->res_rx_pci_err = le64_to_cpu(sb->res_rx_pci_err); 850 stats->res_rx_pci_err = le64_to_cpu(sb->res_rx_pci_err);
851 if (!rcfw->init_oos_stats) {
852 rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count);
853 rcfw->init_oos_stats = 1;
854 } else {
855 stats->res_oos_drop_count +=
856 (le64_to_cpu(sb->res_oos_drop_count) -
857 rcfw->oos_prev) & BNXT_QPLIB_OOS_COUNT_MASK;
858 rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count);
859 }
860
848bail: 861bail:
849 bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); 862 bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
850 return rc; 863 return rc;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 9d3e8b994945..8079d7f5a008 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -205,6 +205,16 @@ struct bnxt_qplib_roce_stats {
205 /* res_tx_pci_err is 64 b */ 205 /* res_tx_pci_err is 64 b */
206 u64 res_rx_pci_err; 206 u64 res_rx_pci_err;
207 /* res_rx_pci_err is 64 b */ 207 /* res_rx_pci_err is 64 b */
208 u64 res_oos_drop_count;
209 /* res_oos_drop_count */
210 u64 active_qp_count_p0;
211 /* port 0 active qps */
212 u64 active_qp_count_p1;
213 /* port 1 active qps */
214 u64 active_qp_count_p2;
215 /* port 2 active qps */
216 u64 active_qp_count_p3;
217 /* port 3 active qps */
208}; 218};
209 219
210int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res, 220int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index 3e5a4f760d0e..8a9ead419ac2 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -2929,6 +2929,11 @@ struct creq_query_roce_stats_resp_sb {
2929 __le64 res_srq_load_err; 2929 __le64 res_srq_load_err;
2930 __le64 res_tx_pci_err; 2930 __le64 res_tx_pci_err;
2931 __le64 res_rx_pci_err; 2931 __le64 res_rx_pci_err;
2932 __le64 res_oos_drop_count;
2933 __le64 active_qp_count_p0;
2934 __le64 active_qp_count_p1;
2935 __le64 active_qp_count_p2;
2936 __le64 active_qp_count_p3;
2932}; 2937};
2933 2938
2934/* QP error notification event (16 bytes) */ 2939/* QP error notification event (16 bytes) */
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 1b9ff21aa1d5..ebbec02cebe0 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1127,17 +1127,18 @@ static int iwch_query_port(struct ib_device *ibdev,
1127 return 0; 1127 return 0;
1128} 1128}
1129 1129
1130static ssize_t show_rev(struct device *dev, struct device_attribute *attr, 1130static ssize_t hw_rev_show(struct device *dev,
1131 char *buf) 1131 struct device_attribute *attr, char *buf)
1132{ 1132{
1133 struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, 1133 struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
1134 ibdev.dev); 1134 ibdev.dev);
1135 pr_debug("%s dev 0x%p\n", __func__, dev); 1135 pr_debug("%s dev 0x%p\n", __func__, dev);
1136 return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); 1136 return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
1137} 1137}
1138static DEVICE_ATTR_RO(hw_rev);
1138 1139
1139static ssize_t show_hca(struct device *dev, struct device_attribute *attr, 1140static ssize_t hca_type_show(struct device *dev,
1140 char *buf) 1141 struct device_attribute *attr, char *buf)
1141{ 1142{
1142 struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, 1143 struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
1143 ibdev.dev); 1144 ibdev.dev);
@@ -1148,9 +1149,10 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
1148 lldev->ethtool_ops->get_drvinfo(lldev, &info); 1149 lldev->ethtool_ops->get_drvinfo(lldev, &info);
1149 return sprintf(buf, "%s\n", info.driver); 1150 return sprintf(buf, "%s\n", info.driver);
1150} 1151}
1152static DEVICE_ATTR_RO(hca_type);
1151 1153
1152static ssize_t show_board(struct device *dev, struct device_attribute *attr, 1154static ssize_t board_id_show(struct device *dev,
1153 char *buf) 1155 struct device_attribute *attr, char *buf)
1154{ 1156{
1155 struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, 1157 struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
1156 ibdev.dev); 1158 ibdev.dev);
@@ -1158,6 +1160,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
1158 return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor, 1160 return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
1159 iwch_dev->rdev.rnic_info.pdev->device); 1161 iwch_dev->rdev.rnic_info.pdev->device);
1160} 1162}
1163static DEVICE_ATTR_RO(board_id);
1161 1164
1162enum counters { 1165enum counters {
1163 IPINRECEIVES, 1166 IPINRECEIVES,
@@ -1274,14 +1277,15 @@ static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
1274 return stats->num_counters; 1277 return stats->num_counters;
1275} 1278}
1276 1279
1277static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 1280static struct attribute *iwch_class_attributes[] = {
1278static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 1281 &dev_attr_hw_rev.attr,
1279static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 1282 &dev_attr_hca_type.attr,
1283 &dev_attr_board_id.attr,
1284 NULL
1285};
1280 1286
1281static struct device_attribute *iwch_class_attributes[] = { 1287static const struct attribute_group iwch_attr_group = {
1282 &dev_attr_hw_rev, 1288 .attrs = iwch_class_attributes,
1283 &dev_attr_hca_type,
1284 &dev_attr_board_id,
1285}; 1289};
1286 1290
1287static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num, 1291static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
@@ -1316,10 +1320,8 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str)
1316int iwch_register_device(struct iwch_dev *dev) 1320int iwch_register_device(struct iwch_dev *dev)
1317{ 1321{
1318 int ret; 1322 int ret;
1319 int i;
1320 1323
1321 pr_debug("%s iwch_dev %p\n", __func__, dev); 1324 pr_debug("%s iwch_dev %p\n", __func__, dev);
1322 strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
1323 memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); 1325 memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
1324 memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); 1326 memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
1325 dev->ibdev.owner = THIS_MODULE; 1327 dev->ibdev.owner = THIS_MODULE;
@@ -1402,33 +1404,16 @@ int iwch_register_device(struct iwch_dev *dev)
1402 sizeof(dev->ibdev.iwcm->ifname)); 1404 sizeof(dev->ibdev.iwcm->ifname));
1403 1405
1404 dev->ibdev.driver_id = RDMA_DRIVER_CXGB3; 1406 dev->ibdev.driver_id = RDMA_DRIVER_CXGB3;
1405 ret = ib_register_device(&dev->ibdev, NULL); 1407 rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group);
1408 ret = ib_register_device(&dev->ibdev, "cxgb3_%d", NULL);
1406 if (ret) 1409 if (ret)
1407 goto bail1; 1410 kfree(dev->ibdev.iwcm);
1408
1409 for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) {
1410 ret = device_create_file(&dev->ibdev.dev,
1411 iwch_class_attributes[i]);
1412 if (ret) {
1413 goto bail2;
1414 }
1415 }
1416 return 0;
1417bail2:
1418 ib_unregister_device(&dev->ibdev);
1419bail1:
1420 kfree(dev->ibdev.iwcm);
1421 return ret; 1411 return ret;
1422} 1412}
1423 1413
1424void iwch_unregister_device(struct iwch_dev *dev) 1414void iwch_unregister_device(struct iwch_dev *dev)
1425{ 1415{
1426 int i;
1427
1428 pr_debug("%s iwch_dev %p\n", __func__, dev); 1416 pr_debug("%s iwch_dev %p\n", __func__, dev);
1429 for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
1430 device_remove_file(&dev->ibdev.dev,
1431 iwch_class_attributes[i]);
1432 ib_unregister_device(&dev->ibdev); 1417 ib_unregister_device(&dev->ibdev);
1433 kfree(dev->ibdev.iwcm); 1418 kfree(dev->ibdev.iwcm);
1434 return; 1419 return;
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 0f83cbec33f3..615413bd3e8d 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -403,8 +403,7 @@ void _c4iw_free_ep(struct kref *kref)
403 ep->com.local_addr.ss_family); 403 ep->com.local_addr.ss_family);
404 dst_release(ep->dst); 404 dst_release(ep->dst);
405 cxgb4_l2t_release(ep->l2t); 405 cxgb4_l2t_release(ep->l2t);
406 if (ep->mpa_skb) 406 kfree_skb(ep->mpa_skb);
407 kfree_skb(ep->mpa_skb);
408 } 407 }
409 if (!skb_queue_empty(&ep->com.ep_skb_list)) 408 if (!skb_queue_empty(&ep->com.ep_skb_list))
410 skb_queue_purge(&ep->com.ep_skb_list); 409 skb_queue_purge(&ep->com.ep_skb_list);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 6d3042794094..1fd8798d91a7 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -161,7 +161,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
161 cq->gts = rdev->lldi.gts_reg; 161 cq->gts = rdev->lldi.gts_reg;
162 cq->rdev = rdev; 162 cq->rdev = rdev;
163 163
164 cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS, 164 cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, CXGB4_BAR2_QTYPE_INGRESS,
165 &cq->bar2_qid, 165 &cq->bar2_qid,
166 user ? &cq->bar2_pa : NULL); 166 user ? &cq->bar2_pa : NULL);
167 if (user && !cq->bar2_pa) { 167 if (user && !cq->bar2_pa) {
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 4eda6872e617..cbb3c0ddd990 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -373,8 +373,8 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
373 return 0; 373 return 0;
374} 374}
375 375
376static ssize_t show_rev(struct device *dev, struct device_attribute *attr, 376static ssize_t hw_rev_show(struct device *dev,
377 char *buf) 377 struct device_attribute *attr, char *buf)
378{ 378{
379 struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, 379 struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
380 ibdev.dev); 380 ibdev.dev);
@@ -382,9 +382,10 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
382 return sprintf(buf, "%d\n", 382 return sprintf(buf, "%d\n",
383 CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); 383 CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
384} 384}
385static DEVICE_ATTR_RO(hw_rev);
385 386
386static ssize_t show_hca(struct device *dev, struct device_attribute *attr, 387static ssize_t hca_type_show(struct device *dev,
387 char *buf) 388 struct device_attribute *attr, char *buf)
388{ 389{
389 struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, 390 struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
390 ibdev.dev); 391 ibdev.dev);
@@ -395,9 +396,10 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
395 lldev->ethtool_ops->get_drvinfo(lldev, &info); 396 lldev->ethtool_ops->get_drvinfo(lldev, &info);
396 return sprintf(buf, "%s\n", info.driver); 397 return sprintf(buf, "%s\n", info.driver);
397} 398}
399static DEVICE_ATTR_RO(hca_type);
398 400
399static ssize_t show_board(struct device *dev, struct device_attribute *attr, 401static ssize_t board_id_show(struct device *dev, struct device_attribute *attr,
400 char *buf) 402 char *buf)
401{ 403{
402 struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, 404 struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
403 ibdev.dev); 405 ibdev.dev);
@@ -405,6 +407,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
405 return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, 407 return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
406 c4iw_dev->rdev.lldi.pdev->device); 408 c4iw_dev->rdev.lldi.pdev->device);
407} 409}
410static DEVICE_ATTR_RO(board_id);
408 411
409enum counters { 412enum counters {
410 IP4INSEGS, 413 IP4INSEGS,
@@ -461,14 +464,15 @@ static int c4iw_get_mib(struct ib_device *ibdev,
461 return stats->num_counters; 464 return stats->num_counters;
462} 465}
463 466
464static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 467static struct attribute *c4iw_class_attributes[] = {
465static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 468 &dev_attr_hw_rev.attr,
466static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 469 &dev_attr_hca_type.attr,
470 &dev_attr_board_id.attr,
471 NULL
472};
467 473
468static struct device_attribute *c4iw_class_attributes[] = { 474static const struct attribute_group c4iw_attr_group = {
469 &dev_attr_hw_rev, 475 .attrs = c4iw_class_attributes,
470 &dev_attr_hca_type,
471 &dev_attr_board_id,
472}; 476};
473 477
474static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num, 478static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
@@ -530,12 +534,10 @@ static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res)
530void c4iw_register_device(struct work_struct *work) 534void c4iw_register_device(struct work_struct *work)
531{ 535{
532 int ret; 536 int ret;
533 int i;
534 struct uld_ctx *ctx = container_of(work, struct uld_ctx, reg_work); 537 struct uld_ctx *ctx = container_of(work, struct uld_ctx, reg_work);
535 struct c4iw_dev *dev = ctx->dev; 538 struct c4iw_dev *dev = ctx->dev;
536 539
537 pr_debug("c4iw_dev %p\n", dev); 540 pr_debug("c4iw_dev %p\n", dev);
538 strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX);
539 memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); 541 memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
540 memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6); 542 memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
541 dev->ibdev.owner = THIS_MODULE; 543 dev->ibdev.owner = THIS_MODULE;
@@ -626,20 +628,13 @@ void c4iw_register_device(struct work_struct *work)
626 memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name, 628 memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
627 sizeof(dev->ibdev.iwcm->ifname)); 629 sizeof(dev->ibdev.iwcm->ifname));
628 630
631 rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group);
629 dev->ibdev.driver_id = RDMA_DRIVER_CXGB4; 632 dev->ibdev.driver_id = RDMA_DRIVER_CXGB4;
630 ret = ib_register_device(&dev->ibdev, NULL); 633 ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL);
631 if (ret) 634 if (ret)
632 goto err_kfree_iwcm; 635 goto err_kfree_iwcm;
633
634 for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) {
635 ret = device_create_file(&dev->ibdev.dev,
636 c4iw_class_attributes[i]);
637 if (ret)
638 goto err_unregister_device;
639 }
640 return; 636 return;
641err_unregister_device: 637
642 ib_unregister_device(&dev->ibdev);
643err_kfree_iwcm: 638err_kfree_iwcm:
644 kfree(dev->ibdev.iwcm); 639 kfree(dev->ibdev.iwcm);
645err_dealloc_ctx: 640err_dealloc_ctx:
@@ -651,12 +646,7 @@ err_dealloc_ctx:
651 646
652void c4iw_unregister_device(struct c4iw_dev *dev) 647void c4iw_unregister_device(struct c4iw_dev *dev)
653{ 648{
654 int i;
655
656 pr_debug("c4iw_dev %p\n", dev); 649 pr_debug("c4iw_dev %p\n", dev);
657 for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i)
658 device_remove_file(&dev->ibdev.dev,
659 c4iw_class_attributes[i]);
660 ib_unregister_device(&dev->ibdev); 650 ib_unregister_device(&dev->ibdev);
661 kfree(dev->ibdev.iwcm); 651 kfree(dev->ibdev.iwcm);
662 return; 652 return;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 62d6f197ec0b..13478f3b7057 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -279,12 +279,13 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
279 279
280 wq->db = rdev->lldi.db_reg; 280 wq->db = rdev->lldi.db_reg;
281 281
282 wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS, 282 wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid,
283 CXGB4_BAR2_QTYPE_EGRESS,
283 &wq->sq.bar2_qid, 284 &wq->sq.bar2_qid,
284 user ? &wq->sq.bar2_pa : NULL); 285 user ? &wq->sq.bar2_pa : NULL);
285 if (need_rq) 286 if (need_rq)
286 wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, 287 wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid,
287 T4_BAR2_QTYPE_EGRESS, 288 CXGB4_BAR2_QTYPE_EGRESS,
288 &wq->rq.bar2_qid, 289 &wq->rq.bar2_qid,
289 user ? &wq->rq.bar2_pa : NULL); 290 user ? &wq->rq.bar2_pa : NULL);
290 291
@@ -2572,7 +2573,7 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
2572 memset(wq->queue, 0, wq->memsize); 2573 memset(wq->queue, 0, wq->memsize);
2573 dma_unmap_addr_set(wq, mapping, wq->dma_addr); 2574 dma_unmap_addr_set(wq, mapping, wq->dma_addr);
2574 2575
2575 wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS, 2576 wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS,
2576 &wq->bar2_qid, 2577 &wq->bar2_qid,
2577 user ? &wq->bar2_pa : NULL); 2578 user ? &wq->bar2_pa : NULL);
2578 2579
@@ -2813,8 +2814,7 @@ err_free_queue:
2813 free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, 2814 free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
2814 srq->wr_waitp); 2815 srq->wr_waitp);
2815err_free_skb: 2816err_free_skb:
2816 if (srq->destroy_skb) 2817 kfree_skb(srq->destroy_skb);
2817 kfree_skb(srq->destroy_skb);
2818err_free_srq_idx: 2818err_free_srq_idx:
2819 c4iw_free_srq_idx(&rhp->rdev, srq->idx); 2819 c4iw_free_srq_idx(&rhp->rdev, srq->idx);
2820err_free_wr_wait: 2820err_free_wr_wait:
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index f451ba912f47..ff790390c91a 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -8,12 +8,42 @@
8# 8#
9obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o 9obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
10 10
11hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ 11hfi1-y := \
12 eprom.o exp_rcv.o file_ops.o firmware.o \ 12 affinity.o \
13 init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ 13 chip.o \
14 qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \ 14 device.o \
15 uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ 15 driver.o \
16 verbs_txreq.o vnic_main.o vnic_sdma.o 16 efivar.o \
17 eprom.o \
18 exp_rcv.o \
19 file_ops.o \
20 firmware.o \
21 init.o \
22 intr.o \
23 iowait.o \
24 mad.o \
25 mmu_rb.o \
26 msix.o \
27 pcie.o \
28 pio.o \
29 pio_copy.o \
30 platform.o \
31 qp.o \
32 qsfp.o \
33 rc.o \
34 ruc.o \
35 sdma.o \
36 sysfs.o \
37 trace.o \
38 uc.o \
39 ud.o \
40 user_exp_rcv.o \
41 user_pages.o \
42 user_sdma.o \
43 verbs.o \
44 verbs_txreq.o \
45 vnic_main.o \
46 vnic_sdma.o
17 47
18ifdef CONFIG_DEBUG_FS 48ifdef CONFIG_DEBUG_FS
19hfi1-y += debugfs.o 49hfi1-y += debugfs.o
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index bedd5fba33b0..2baf38cc1e23 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -817,10 +817,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
817 set = &entry->def_intr; 817 set = &entry->def_intr;
818 cpumask_set_cpu(cpu, &set->mask); 818 cpumask_set_cpu(cpu, &set->mask);
819 cpumask_set_cpu(cpu, &set->used); 819 cpumask_set_cpu(cpu, &set->used);
820 for (i = 0; i < dd->num_msix_entries; i++) { 820 for (i = 0; i < dd->msix_info.max_requested; i++) {
821 struct hfi1_msix_entry *other_msix; 821 struct hfi1_msix_entry *other_msix;
822 822
823 other_msix = &dd->msix_entries[i]; 823 other_msix = &dd->msix_info.msix_entries[i];
824 if (other_msix->type != IRQ_SDMA || other_msix == msix) 824 if (other_msix->type != IRQ_SDMA || other_msix == msix)
825 continue; 825 continue;
826 826
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index e1668bcc2d13..9b20479dc710 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -67,8 +67,6 @@
67#include "debugfs.h" 67#include "debugfs.h"
68#include "fault.h" 68#include "fault.h"
69 69
70#define NUM_IB_PORTS 1
71
72uint kdeth_qp; 70uint kdeth_qp;
73module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO); 71module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
74MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix"); 72MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
@@ -1100,9 +1098,9 @@ struct err_reg_info {
1100 const char *desc; 1098 const char *desc;
1101}; 1099};
1102 1100
1103#define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START) 1101#define NUM_MISC_ERRS (IS_GENERAL_ERR_END + 1 - IS_GENERAL_ERR_START)
1104#define NUM_DC_ERRS (IS_DC_END - IS_DC_START) 1102#define NUM_DC_ERRS (IS_DC_END + 1 - IS_DC_START)
1105#define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START) 1103#define NUM_VARIOUS (IS_VARIOUS_END + 1 - IS_VARIOUS_START)
1106 1104
1107/* 1105/*
1108 * Helpers for building HFI and DC error interrupt table entries. Different 1106 * Helpers for building HFI and DC error interrupt table entries. Different
@@ -8181,7 +8179,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
8181/** 8179/**
8182 * is_rcv_urgent_int() - User receive context urgent IRQ handler 8180 * is_rcv_urgent_int() - User receive context urgent IRQ handler
8183 * @dd: valid dd 8181 * @dd: valid dd
8184 * @source: logical IRQ source (ofse from IS_RCVURGENT_START) 8182 * @source: logical IRQ source (offset from IS_RCVURGENT_START)
8185 * 8183 *
8186 * RX block receive urgent interrupt. Source is < 160. 8184 * RX block receive urgent interrupt. Source is < 160.
8187 * 8185 *
@@ -8231,7 +8229,7 @@ static const struct is_table is_table[] = {
8231 is_sdma_eng_err_name, is_sdma_eng_err_int }, 8229 is_sdma_eng_err_name, is_sdma_eng_err_int },
8232{ IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, 8230{ IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
8233 is_sendctxt_err_name, is_sendctxt_err_int }, 8231 is_sendctxt_err_name, is_sendctxt_err_int },
8234{ IS_SDMA_START, IS_SDMA_END, 8232{ IS_SDMA_START, IS_SDMA_IDLE_END,
8235 is_sdma_eng_name, is_sdma_eng_int }, 8233 is_sdma_eng_name, is_sdma_eng_int },
8236{ IS_VARIOUS_START, IS_VARIOUS_END, 8234{ IS_VARIOUS_START, IS_VARIOUS_END,
8237 is_various_name, is_various_int }, 8235 is_various_name, is_various_int },
@@ -8257,7 +8255,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
8257 8255
8258 /* avoids a double compare by walking the table in-order */ 8256 /* avoids a double compare by walking the table in-order */
8259 for (entry = &is_table[0]; entry->is_name; entry++) { 8257 for (entry = &is_table[0]; entry->is_name; entry++) {
8260 if (source < entry->end) { 8258 if (source <= entry->end) {
8261 trace_hfi1_interrupt(dd, entry, source); 8259 trace_hfi1_interrupt(dd, entry, source);
8262 entry->is_int(dd, source - entry->start); 8260 entry->is_int(dd, source - entry->start);
8263 return; 8261 return;
@@ -8276,7 +8274,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
8276 * context DATA IRQs are threaded and are not supported by this handler. 8274 * context DATA IRQs are threaded and are not supported by this handler.
8277 * 8275 *
8278 */ 8276 */
8279static irqreturn_t general_interrupt(int irq, void *data) 8277irqreturn_t general_interrupt(int irq, void *data)
8280{ 8278{
8281 struct hfi1_devdata *dd = data; 8279 struct hfi1_devdata *dd = data;
8282 u64 regs[CCE_NUM_INT_CSRS]; 8280 u64 regs[CCE_NUM_INT_CSRS];
@@ -8309,7 +8307,7 @@ static irqreturn_t general_interrupt(int irq, void *data)
8309 return handled; 8307 return handled;
8310} 8308}
8311 8309
8312static irqreturn_t sdma_interrupt(int irq, void *data) 8310irqreturn_t sdma_interrupt(int irq, void *data)
8313{ 8311{
8314 struct sdma_engine *sde = data; 8312 struct sdma_engine *sde = data;
8315 struct hfi1_devdata *dd = sde->dd; 8313 struct hfi1_devdata *dd = sde->dd;
@@ -8401,7 +8399,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
8401 * invoked) is finished. The intent is to avoid extra interrupts while we 8399 * invoked) is finished. The intent is to avoid extra interrupts while we
8402 * are processing packets anyway. 8400 * are processing packets anyway.
8403 */ 8401 */
8404static irqreturn_t receive_context_interrupt(int irq, void *data) 8402irqreturn_t receive_context_interrupt(int irq, void *data)
8405{ 8403{
8406 struct hfi1_ctxtdata *rcd = data; 8404 struct hfi1_ctxtdata *rcd = data;
8407 struct hfi1_devdata *dd = rcd->dd; 8405 struct hfi1_devdata *dd = rcd->dd;
@@ -8441,7 +8439,7 @@ static irqreturn_t receive_context_interrupt(int irq, void *data)
8441 * Receive packet thread handler. This expects to be invoked with the 8439 * Receive packet thread handler. This expects to be invoked with the
8442 * receive interrupt still blocked. 8440 * receive interrupt still blocked.
8443 */ 8441 */
8444static irqreturn_t receive_context_thread(int irq, void *data) 8442irqreturn_t receive_context_thread(int irq, void *data)
8445{ 8443{
8446 struct hfi1_ctxtdata *rcd = data; 8444 struct hfi1_ctxtdata *rcd = data;
8447 int present; 8445 int present;
@@ -9651,30 +9649,10 @@ void qsfp_event(struct work_struct *work)
9651 } 9649 }
9652} 9650}
9653 9651
9654static void init_qsfp_int(struct hfi1_devdata *dd) 9652void init_qsfp_int(struct hfi1_devdata *dd)
9655{ 9653{
9656 struct hfi1_pportdata *ppd = dd->pport; 9654 struct hfi1_pportdata *ppd = dd->pport;
9657 u64 qsfp_mask, cce_int_mask; 9655 u64 qsfp_mask;
9658 const int qsfp1_int_smask = QSFP1_INT % 64;
9659 const int qsfp2_int_smask = QSFP2_INT % 64;
9660
9661 /*
9662 * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
9663 * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
9664 * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
9665 * the index of the appropriate CSR in the CCEIntMask CSR array
9666 */
9667 cce_int_mask = read_csr(dd, CCE_INT_MASK +
9668 (8 * (QSFP1_INT / 64)));
9669 if (dd->hfi1_id) {
9670 cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
9671 write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)),
9672 cce_int_mask);
9673 } else {
9674 cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
9675 write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)),
9676 cce_int_mask);
9677 }
9678 9656
9679 qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N); 9657 qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
9680 /* Clear current status to avoid spurious interrupts */ 9658 /* Clear current status to avoid spurious interrupts */
@@ -9691,6 +9669,12 @@ static void init_qsfp_int(struct hfi1_devdata *dd)
9691 write_csr(dd, 9669 write_csr(dd,
9692 dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT, 9670 dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
9693 qsfp_mask); 9671 qsfp_mask);
9672
9673 /* Enable the appropriate QSFP IRQ source */
9674 if (!dd->hfi1_id)
9675 set_intr_bits(dd, QSFP1_INT, QSFP1_INT, true);
9676 else
9677 set_intr_bits(dd, QSFP2_INT, QSFP2_INT, true);
9694} 9678}
9695 9679
9696/* 9680/*
@@ -10577,12 +10561,29 @@ void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
10577 } 10561 }
10578} 10562}
10579 10563
10580/* 10564/**
10581 * Verify if BCT for data VLs is non-zero. 10565 * data_vls_operational() - Verify if data VL BCT credits and MTU
10566 * are both set.
10567 * @ppd: pointer to hfi1_pportdata structure
10568 *
10569 * Return: true - Ok, false -otherwise.
10582 */ 10570 */
10583static inline bool data_vls_operational(struct hfi1_pportdata *ppd) 10571static inline bool data_vls_operational(struct hfi1_pportdata *ppd)
10584{ 10572{
10585 return !!ppd->actual_vls_operational; 10573 int i;
10574 u64 reg;
10575
10576 if (!ppd->actual_vls_operational)
10577 return false;
10578
10579 for (i = 0; i < ppd->vls_supported; i++) {
10580 reg = read_csr(ppd->dd, SEND_CM_CREDIT_VL + (8 * i));
10581 if ((reg && !ppd->dd->vld[i].mtu) ||
10582 (!reg && ppd->dd->vld[i].mtu))
10583 return false;
10584 }
10585
10586 return true;
10586} 10587}
10587 10588
10588/* 10589/*
@@ -10695,7 +10696,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
10695 10696
10696 if (!data_vls_operational(ppd)) { 10697 if (!data_vls_operational(ppd)) {
10697 dd_dev_err(dd, 10698 dd_dev_err(dd,
10698 "%s: data VLs not operational\n", __func__); 10699 "%s: Invalid data VL credits or mtu\n",
10700 __func__);
10699 ret = -EINVAL; 10701 ret = -EINVAL;
10700 break; 10702 break;
10701 } 10703 }
@@ -11932,10 +11934,16 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
11932 11934
11933 rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK; 11935 rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
11934 } 11936 }
11935 if (op & HFI1_RCVCTRL_INTRAVAIL_ENB) 11937 if (op & HFI1_RCVCTRL_INTRAVAIL_ENB) {
11938 set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
11939 IS_RCVAVAIL_START + rcd->ctxt, true);
11936 rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK; 11940 rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
11937 if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) 11941 }
11942 if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) {
11943 set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
11944 IS_RCVAVAIL_START + rcd->ctxt, false);
11938 rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK; 11945 rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
11946 }
11939 if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr) 11947 if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
11940 rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; 11948 rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
11941 if (op & HFI1_RCVCTRL_TAILUPD_DIS) { 11949 if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
@@ -11965,6 +11973,13 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
11965 rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; 11973 rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
11966 if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS) 11974 if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
11967 rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; 11975 rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
11976 if (op & HFI1_RCVCTRL_URGENT_ENB)
11977 set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
11978 IS_RCVURGENT_START + rcd->ctxt, true);
11979 if (op & HFI1_RCVCTRL_URGENT_DIS)
11980 set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
11981 IS_RCVURGENT_START + rcd->ctxt, false);
11982
11968 hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl); 11983 hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
11969 write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl); 11984 write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl);
11970 11985
@@ -12963,63 +12978,71 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
12963 return ret; 12978 return ret;
12964} 12979}
12965 12980
12981/* ========================================================================= */
12982
12966/** 12983/**
12967 * get_int_mask - get 64 bit int mask 12984 * read_mod_write() - Calculate the IRQ register index and set/clear the bits
12968 * @dd - the devdata 12985 * @dd: valid devdata
12969 * @i - the csr (relative to CCE_INT_MASK) 12986 * @src: IRQ source to determine register index from
12987 * @bits: the bits to set or clear
12988 * @set: true == set the bits, false == clear the bits
12970 * 12989 *
12971 * Returns the mask with the urgent interrupt mask
12972 * bit clear for kernel receive contexts.
12973 */ 12990 */
12974static u64 get_int_mask(struct hfi1_devdata *dd, u32 i) 12991static void read_mod_write(struct hfi1_devdata *dd, u16 src, u64 bits,
12992 bool set)
12975{ 12993{
12976 u64 mask = U64_MAX; /* default to no change */ 12994 u64 reg;
12977 12995 u16 idx = src / BITS_PER_REGISTER;
12978 if (i >= (IS_RCVURGENT_START / 64) && i < (IS_RCVURGENT_END / 64)) {
12979 int j = (i - (IS_RCVURGENT_START / 64)) * 64;
12980 int k = !j ? IS_RCVURGENT_START % 64 : 0;
12981 12996
12982 if (j) 12997 spin_lock(&dd->irq_src_lock);
12983 j -= IS_RCVURGENT_START % 64; 12998 reg = read_csr(dd, CCE_INT_MASK + (8 * idx));
12984 /* j = 0..dd->first_dyn_alloc_ctxt - 1,k = 0..63 */ 12999 if (set)
12985 for (; j < dd->first_dyn_alloc_ctxt && k < 64; j++, k++) 13000 reg |= bits;
12986 /* convert to bit in mask and clear */ 13001 else
12987 mask &= ~BIT_ULL(k); 13002 reg &= ~bits;
12988 } 13003 write_csr(dd, CCE_INT_MASK + (8 * idx), reg);
12989 return mask; 13004 spin_unlock(&dd->irq_src_lock);
12990} 13005}
12991 13006
12992/* ========================================================================= */ 13007/**
12993 13008 * set_intr_bits() - Enable/disable a range (one or more) IRQ sources
12994/* 13009 * @dd: valid devdata
12995 * Enable/disable chip from delivering interrupts. 13010 * @first: first IRQ source to set/clear
13011 * @last: last IRQ source (inclusive) to set/clear
13012 * @set: true == set the bits, false == clear the bits
13013 *
13014 * If first == last, set the exact source.
12996 */ 13015 */
12997void set_intr_state(struct hfi1_devdata *dd, u32 enable) 13016int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set)
12998{ 13017{
12999 int i; 13018 u64 bits = 0;
13019 u64 bit;
13020 u16 src;
13000 13021
13001 /* 13022 if (first > NUM_INTERRUPT_SOURCES || last > NUM_INTERRUPT_SOURCES)
13002 * In HFI, the mask needs to be 1 to allow interrupts. 13023 return -EINVAL;
13003 */
13004 if (enable) {
13005 /* enable all interrupts but urgent on kernel contexts */
13006 for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
13007 u64 mask = get_int_mask(dd, i);
13008 13024
13009 write_csr(dd, CCE_INT_MASK + (8 * i), mask); 13025 if (last < first)
13010 } 13026 return -ERANGE;
13011 13027
13012 init_qsfp_int(dd); 13028 for (src = first; src <= last; src++) {
13013 } else { 13029 bit = src % BITS_PER_REGISTER;
13014 for (i = 0; i < CCE_NUM_INT_CSRS; i++) 13030 /* wrapped to next register? */
13015 write_csr(dd, CCE_INT_MASK + (8 * i), 0ull); 13031 if (!bit && bits) {
13032 read_mod_write(dd, src - 1, bits, set);
13033 bits = 0;
13034 }
13035 bits |= BIT_ULL(bit);
13016 } 13036 }
13037 read_mod_write(dd, last, bits, set);
13038
13039 return 0;
13017} 13040}
13018 13041
13019/* 13042/*
13020 * Clear all interrupt sources on the chip. 13043 * Clear all interrupt sources on the chip.
13021 */ 13044 */
13022static void clear_all_interrupts(struct hfi1_devdata *dd) 13045void clear_all_interrupts(struct hfi1_devdata *dd)
13023{ 13046{
13024 int i; 13047 int i;
13025 13048
@@ -13043,38 +13066,11 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
13043 write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0); 13066 write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
13044} 13067}
13045 13068
13046/**
13047 * hfi1_clean_up_interrupts() - Free all IRQ resources
13048 * @dd: valid device data data structure
13049 *
13050 * Free the MSIx and assoicated PCI resources, if they have been allocated.
13051 */
13052void hfi1_clean_up_interrupts(struct hfi1_devdata *dd)
13053{
13054 int i;
13055 struct hfi1_msix_entry *me = dd->msix_entries;
13056
13057 /* remove irqs - must happen before disabling/turning off */
13058 for (i = 0; i < dd->num_msix_entries; i++, me++) {
13059 if (!me->arg) /* => no irq, no affinity */
13060 continue;
13061 hfi1_put_irq_affinity(dd, me);
13062 pci_free_irq(dd->pcidev, i, me->arg);
13063 }
13064
13065 /* clean structures */
13066 kfree(dd->msix_entries);
13067 dd->msix_entries = NULL;
13068 dd->num_msix_entries = 0;
13069
13070 pci_free_irq_vectors(dd->pcidev);
13071}
13072
13073/* 13069/*
13074 * Remap the interrupt source from the general handler to the given MSI-X 13070 * Remap the interrupt source from the general handler to the given MSI-X
13075 * interrupt. 13071 * interrupt.
13076 */ 13072 */
13077static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr) 13073void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
13078{ 13074{
13079 u64 reg; 13075 u64 reg;
13080 int m, n; 13076 int m, n;
@@ -13098,8 +13094,7 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
13098 write_csr(dd, CCE_INT_MAP + (8 * m), reg); 13094 write_csr(dd, CCE_INT_MAP + (8 * m), reg);
13099} 13095}
13100 13096
13101static void remap_sdma_interrupts(struct hfi1_devdata *dd, 13097void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr)
13102 int engine, int msix_intr)
13103{ 13098{
13104 /* 13099 /*
13105 * SDMA engine interrupt sources grouped by type, rather than 13100 * SDMA engine interrupt sources grouped by type, rather than
@@ -13108,204 +13103,16 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd,
13108 * SDMAProgress 13103 * SDMAProgress
13109 * SDMAIdle 13104 * SDMAIdle
13110 */ 13105 */
13111 remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine, 13106 remap_intr(dd, IS_SDMA_START + engine, msix_intr);
13112 msix_intr); 13107 remap_intr(dd, IS_SDMA_PROGRESS_START + engine, msix_intr);
13113 remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine, 13108 remap_intr(dd, IS_SDMA_IDLE_START + engine, msix_intr);
13114 msix_intr);
13115 remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine,
13116 msix_intr);
13117}
13118
13119static int request_msix_irqs(struct hfi1_devdata *dd)
13120{
13121 int first_general, last_general;
13122 int first_sdma, last_sdma;
13123 int first_rx, last_rx;
13124 int i, ret = 0;
13125
13126 /* calculate the ranges we are going to use */
13127 first_general = 0;
13128 last_general = first_general + 1;
13129 first_sdma = last_general;
13130 last_sdma = first_sdma + dd->num_sdma;
13131 first_rx = last_sdma;
13132 last_rx = first_rx + dd->n_krcv_queues + dd->num_vnic_contexts;
13133
13134 /* VNIC MSIx interrupts get mapped when VNIC contexts are created */
13135 dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues;
13136
13137 /*
13138 * Sanity check - the code expects all SDMA chip source
13139 * interrupts to be in the same CSR, starting at bit 0. Verify
13140 * that this is true by checking the bit location of the start.
13141 */
13142 BUILD_BUG_ON(IS_SDMA_START % 64);
13143
13144 for (i = 0; i < dd->num_msix_entries; i++) {
13145 struct hfi1_msix_entry *me = &dd->msix_entries[i];
13146 const char *err_info;
13147 irq_handler_t handler;
13148 irq_handler_t thread = NULL;
13149 void *arg = NULL;
13150 int idx;
13151 struct hfi1_ctxtdata *rcd = NULL;
13152 struct sdma_engine *sde = NULL;
13153 char name[MAX_NAME_SIZE];
13154
13155 /* obtain the arguments to pci_request_irq */
13156 if (first_general <= i && i < last_general) {
13157 idx = i - first_general;
13158 handler = general_interrupt;
13159 arg = dd;
13160 snprintf(name, sizeof(name),
13161 DRIVER_NAME "_%d", dd->unit);
13162 err_info = "general";
13163 me->type = IRQ_GENERAL;
13164 } else if (first_sdma <= i && i < last_sdma) {
13165 idx = i - first_sdma;
13166 sde = &dd->per_sdma[idx];
13167 handler = sdma_interrupt;
13168 arg = sde;
13169 snprintf(name, sizeof(name),
13170 DRIVER_NAME "_%d sdma%d", dd->unit, idx);
13171 err_info = "sdma";
13172 remap_sdma_interrupts(dd, idx, i);
13173 me->type = IRQ_SDMA;
13174 } else if (first_rx <= i && i < last_rx) {
13175 idx = i - first_rx;
13176 rcd = hfi1_rcd_get_by_index_safe(dd, idx);
13177 if (rcd) {
13178 /*
13179 * Set the interrupt register and mask for this
13180 * context's interrupt.
13181 */
13182 rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
13183 rcd->imask = ((u64)1) <<
13184 ((IS_RCVAVAIL_START + idx) % 64);
13185 handler = receive_context_interrupt;
13186 thread = receive_context_thread;
13187 arg = rcd;
13188 snprintf(name, sizeof(name),
13189 DRIVER_NAME "_%d kctxt%d",
13190 dd->unit, idx);
13191 err_info = "receive context";
13192 remap_intr(dd, IS_RCVAVAIL_START + idx, i);
13193 me->type = IRQ_RCVCTXT;
13194 rcd->msix_intr = i;
13195 hfi1_rcd_put(rcd);
13196 }
13197 } else {
13198 /* not in our expected range - complain, then
13199 * ignore it
13200 */
13201 dd_dev_err(dd,
13202 "Unexpected extra MSI-X interrupt %d\n", i);
13203 continue;
13204 }
13205 /* no argument, no interrupt */
13206 if (!arg)
13207 continue;
13208 /* make sure the name is terminated */
13209 name[sizeof(name) - 1] = 0;
13210 me->irq = pci_irq_vector(dd->pcidev, i);
13211 ret = pci_request_irq(dd->pcidev, i, handler, thread, arg,
13212 name);
13213 if (ret) {
13214 dd_dev_err(dd,
13215 "unable to allocate %s interrupt, irq %d, index %d, err %d\n",
13216 err_info, me->irq, idx, ret);
13217 return ret;
13218 }
13219 /*
13220 * assign arg after pci_request_irq call, so it will be
13221 * cleaned up
13222 */
13223 me->arg = arg;
13224
13225 ret = hfi1_get_irq_affinity(dd, me);
13226 if (ret)
13227 dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
13228 }
13229
13230 return ret;
13231}
13232
13233void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
13234{
13235 int i;
13236
13237 for (i = 0; i < dd->vnic.num_ctxt; i++) {
13238 struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
13239 struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
13240
13241 synchronize_irq(me->irq);
13242 }
13243}
13244
13245void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
13246{
13247 struct hfi1_devdata *dd = rcd->dd;
13248 struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
13249
13250 if (!me->arg) /* => no irq, no affinity */
13251 return;
13252
13253 hfi1_put_irq_affinity(dd, me);
13254 pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
13255
13256 me->arg = NULL;
13257}
13258
13259void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
13260{
13261 struct hfi1_devdata *dd = rcd->dd;
13262 struct hfi1_msix_entry *me;
13263 int idx = rcd->ctxt;
13264 void *arg = rcd;
13265 int ret;
13266
13267 rcd->msix_intr = dd->vnic.msix_idx++;
13268 me = &dd->msix_entries[rcd->msix_intr];
13269
13270 /*
13271 * Set the interrupt register and mask for this
13272 * context's interrupt.
13273 */
13274 rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
13275 rcd->imask = ((u64)1) <<
13276 ((IS_RCVAVAIL_START + idx) % 64);
13277 me->type = IRQ_RCVCTXT;
13278 me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr);
13279 remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
13280
13281 ret = pci_request_irq(dd->pcidev, rcd->msix_intr,
13282 receive_context_interrupt,
13283 receive_context_thread, arg,
13284 DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
13285 if (ret) {
13286 dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n",
13287 me->irq, idx, ret);
13288 return;
13289 }
13290 /*
13291 * assign arg after pci_request_irq call, so it will be
13292 * cleaned up
13293 */
13294 me->arg = arg;
13295
13296 ret = hfi1_get_irq_affinity(dd, me);
13297 if (ret) {
13298 dd_dev_err(dd,
13299 "unable to pin IRQ %d\n", ret);
13300 pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
13301 }
13302} 13109}
13303 13110
13304/* 13111/*
13305 * Set the general handler to accept all interrupts, remap all 13112 * Set the general handler to accept all interrupts, remap all
13306 * chip interrupts back to MSI-X 0. 13113 * chip interrupts back to MSI-X 0.
13307 */ 13114 */
13308static void reset_interrupts(struct hfi1_devdata *dd) 13115void reset_interrupts(struct hfi1_devdata *dd)
13309{ 13116{
13310 int i; 13117 int i;
13311 13118
@@ -13318,54 +13125,33 @@ static void reset_interrupts(struct hfi1_devdata *dd)
13318 write_csr(dd, CCE_INT_MAP + (8 * i), 0); 13125 write_csr(dd, CCE_INT_MAP + (8 * i), 0);
13319} 13126}
13320 13127
13128/**
13129 * set_up_interrupts() - Initialize the IRQ resources and state
13130 * @dd: valid devdata
13131 *
13132 */
13321static int set_up_interrupts(struct hfi1_devdata *dd) 13133static int set_up_interrupts(struct hfi1_devdata *dd)
13322{ 13134{
13323 u32 total; 13135 int ret;
13324 int ret, request;
13325
13326 /*
13327 * Interrupt count:
13328 * 1 general, "slow path" interrupt (includes the SDMA engines
13329 * slow source, SDMACleanupDone)
13330 * N interrupts - one per used SDMA engine
13331 * M interrupt - one per kernel receive context
13332 * V interrupt - one for each VNIC context
13333 */
13334 total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
13335
13336 /* ask for MSI-X interrupts */
13337 request = request_msix(dd, total);
13338 if (request < 0) {
13339 ret = request;
13340 goto fail;
13341 } else {
13342 dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries),
13343 GFP_KERNEL);
13344 if (!dd->msix_entries) {
13345 ret = -ENOMEM;
13346 goto fail;
13347 }
13348 /* using MSI-X */
13349 dd->num_msix_entries = total;
13350 dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
13351 }
13352 13136
13353 /* mask all interrupts */ 13137 /* mask all interrupts */
13354 set_intr_state(dd, 0); 13138 set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
13139
13355 /* clear all pending interrupts */ 13140 /* clear all pending interrupts */
13356 clear_all_interrupts(dd); 13141 clear_all_interrupts(dd);
13357 13142
13358 /* reset general handler mask, chip MSI-X mappings */ 13143 /* reset general handler mask, chip MSI-X mappings */
13359 reset_interrupts(dd); 13144 reset_interrupts(dd);
13360 13145
13361 ret = request_msix_irqs(dd); 13146 /* ask for MSI-X interrupts */
13147 ret = msix_initialize(dd);
13362 if (ret) 13148 if (ret)
13363 goto fail; 13149 return ret;
13364 13150
13365 return 0; 13151 ret = msix_request_irqs(dd);
13152 if (ret)
13153 msix_clean_up_interrupts(dd);
13366 13154
13367fail:
13368 hfi1_clean_up_interrupts(dd);
13369 return ret; 13155 return ret;
13370} 13156}
13371 13157
@@ -14918,20 +14704,16 @@ err_exit:
14918} 14704}
14919 14705
14920/** 14706/**
14921 * Allocate and initialize the device structure for the hfi. 14707 * hfi1_init_dd() - Initialize most of the dd structure.
14922 * @dev: the pci_dev for hfi1_ib device 14708 * @dev: the pci_dev for hfi1_ib device
14923 * @ent: pci_device_id struct for this dev 14709 * @ent: pci_device_id struct for this dev
14924 * 14710 *
14925 * Also allocates, initializes, and returns the devdata struct for this
14926 * device instance
14927 *
14928 * This is global, and is called directly at init to set up the 14711 * This is global, and is called directly at init to set up the
14929 * chip-specific function pointers for later use. 14712 * chip-specific function pointers for later use.
14930 */ 14713 */
14931struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, 14714int hfi1_init_dd(struct hfi1_devdata *dd)
14932 const struct pci_device_id *ent)
14933{ 14715{
14934 struct hfi1_devdata *dd; 14716 struct pci_dev *pdev = dd->pcidev;
14935 struct hfi1_pportdata *ppd; 14717 struct hfi1_pportdata *ppd;
14936 u64 reg; 14718 u64 reg;
14937 int i, ret; 14719 int i, ret;
@@ -14942,13 +14724,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
14942 "Functional simulator" 14724 "Functional simulator"
14943 }; 14725 };
14944 struct pci_dev *parent = pdev->bus->self; 14726 struct pci_dev *parent = pdev->bus->self;
14945 u32 sdma_engines; 14727 u32 sdma_engines = chip_sdma_engines(dd);
14946 14728
14947 dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
14948 sizeof(struct hfi1_pportdata));
14949 if (IS_ERR(dd))
14950 goto bail;
14951 sdma_engines = chip_sdma_engines(dd);
14952 ppd = dd->pport; 14729 ppd = dd->pport;
14953 for (i = 0; i < dd->num_pports; i++, ppd++) { 14730 for (i = 0; i < dd->num_pports; i++, ppd++) {
14954 int vl; 14731 int vl;
@@ -15127,6 +14904,12 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
15127 if (ret) 14904 if (ret)
15128 goto bail_cleanup; 14905 goto bail_cleanup;
15129 14906
14907 /*
14908 * This should probably occur in hfi1_pcie_init(), but historically
14909 * occurs after the do_pcie_gen3_transition() code.
14910 */
14911 tune_pcie_caps(dd);
14912
15130 /* start setting dd values and adjusting CSRs */ 14913 /* start setting dd values and adjusting CSRs */
15131 init_early_variables(dd); 14914 init_early_variables(dd);
15132 14915
@@ -15239,14 +15022,13 @@ bail_free_cntrs:
15239 free_cntrs(dd); 15022 free_cntrs(dd);
15240bail_clear_intr: 15023bail_clear_intr:
15241 hfi1_comp_vectors_clean_up(dd); 15024 hfi1_comp_vectors_clean_up(dd);
15242 hfi1_clean_up_interrupts(dd); 15025 msix_clean_up_interrupts(dd);
15243bail_cleanup: 15026bail_cleanup:
15244 hfi1_pcie_ddcleanup(dd); 15027 hfi1_pcie_ddcleanup(dd);
15245bail_free: 15028bail_free:
15246 hfi1_free_devdata(dd); 15029 hfi1_free_devdata(dd);
15247 dd = ERR_PTR(ret);
15248bail: 15030bail:
15249 return dd; 15031 return ret;
15250} 15032}
15251 15033
15252static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate, 15034static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 36b04d6300e5..6b9c8f12dff8 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -52,9 +52,7 @@
52 */ 52 */
53 53
54/* sizes */ 54/* sizes */
55#define CCE_NUM_MSIX_VECTORS 256 55#define BITS_PER_REGISTER (BITS_PER_BYTE * sizeof(u64))
56#define CCE_NUM_INT_CSRS 12
57#define CCE_NUM_INT_MAP_CSRS 96
58#define NUM_INTERRUPT_SOURCES 768 56#define NUM_INTERRUPT_SOURCES 768
59#define RXE_NUM_CONTEXTS 160 57#define RXE_NUM_CONTEXTS 160
60#define RXE_PER_CONTEXT_SIZE 0x1000 /* 4k */ 58#define RXE_PER_CONTEXT_SIZE 0x1000 /* 4k */
@@ -161,34 +159,49 @@
161 (CR_CREDIT_RETURN_DUE_TO_FORCE_MASK << \ 159 (CR_CREDIT_RETURN_DUE_TO_FORCE_MASK << \
162 CR_CREDIT_RETURN_DUE_TO_FORCE_SHIFT) 160 CR_CREDIT_RETURN_DUE_TO_FORCE_SHIFT)
163 161
164/* interrupt source numbers */ 162/* Specific IRQ sources */
165#define IS_GENERAL_ERR_START 0 163#define CCE_ERR_INT 0
166#define IS_SDMAENG_ERR_START 16 164#define RXE_ERR_INT 1
167#define IS_SENDCTXT_ERR_START 32 165#define MISC_ERR_INT 2
168#define IS_SDMA_START 192 /* includes SDmaProgress,SDmaIdle */ 166#define PIO_ERR_INT 4
167#define SDMA_ERR_INT 5
168#define EGRESS_ERR_INT 6
169#define TXE_ERR_INT 7
170#define PBC_INT 240
171#define GPIO_ASSERT_INT 241
172#define QSFP1_INT 242
173#define QSFP2_INT 243
174#define TCRIT_INT 244
175
176/* interrupt source ranges */
177#define IS_FIRST_SOURCE CCE_ERR_INT
178#define IS_GENERAL_ERR_START 0
179#define IS_SDMAENG_ERR_START 16
180#define IS_SENDCTXT_ERR_START 32
181#define IS_SDMA_START 192
182#define IS_SDMA_PROGRESS_START 208
183#define IS_SDMA_IDLE_START 224
169#define IS_VARIOUS_START 240 184#define IS_VARIOUS_START 240
170#define IS_DC_START 248 185#define IS_DC_START 248
171#define IS_RCVAVAIL_START 256 186#define IS_RCVAVAIL_START 256
172#define IS_RCVURGENT_START 416 187#define IS_RCVURGENT_START 416
173#define IS_SENDCREDIT_START 576 188#define IS_SENDCREDIT_START 576
174#define IS_RESERVED_START 736 189#define IS_RESERVED_START 736
175#define IS_MAX_SOURCES 768 190#define IS_LAST_SOURCE 767
176 191
177/* derived interrupt source values */ 192/* derived interrupt source values */
178#define IS_GENERAL_ERR_END IS_SDMAENG_ERR_START 193#define IS_GENERAL_ERR_END 7
179#define IS_SDMAENG_ERR_END IS_SENDCTXT_ERR_START 194#define IS_SDMAENG_ERR_END 31
180#define IS_SENDCTXT_ERR_END IS_SDMA_START 195#define IS_SENDCTXT_ERR_END 191
181#define IS_SDMA_END IS_VARIOUS_START 196#define IS_SDMA_END 207
182#define IS_VARIOUS_END IS_DC_START 197#define IS_SDMA_PROGRESS_END 223
183#define IS_DC_END IS_RCVAVAIL_START 198#define IS_SDMA_IDLE_END 239
184#define IS_RCVAVAIL_END IS_RCVURGENT_START 199#define IS_VARIOUS_END 244
185#define IS_RCVURGENT_END IS_SENDCREDIT_START 200#define IS_DC_END 255
186#define IS_SENDCREDIT_END IS_RESERVED_START 201#define IS_RCVAVAIL_END 415
187#define IS_RESERVED_END IS_MAX_SOURCES 202#define IS_RCVURGENT_END 575
188 203#define IS_SENDCREDIT_END 735
189/* absolute interrupt numbers for QSFP1Int and QSFP2Int */ 204#define IS_RESERVED_END IS_LAST_SOURCE
190#define QSFP1_INT 242
191#define QSFP2_INT 243
192 205
193/* DCC_CFG_PORT_CONFIG logical link states */ 206/* DCC_CFG_PORT_CONFIG logical link states */
194#define LSTATE_DOWN 0x1 207#define LSTATE_DOWN 0x1
@@ -1416,6 +1429,18 @@ void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality);
1416void hfi1_init_vnic_rsm(struct hfi1_devdata *dd); 1429void hfi1_init_vnic_rsm(struct hfi1_devdata *dd);
1417void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd); 1430void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd);
1418 1431
1432irqreturn_t general_interrupt(int irq, void *data);
1433irqreturn_t sdma_interrupt(int irq, void *data);
1434irqreturn_t receive_context_interrupt(int irq, void *data);
1435irqreturn_t receive_context_thread(int irq, void *data);
1436
1437int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set);
1438void init_qsfp_int(struct hfi1_devdata *dd);
1439void clear_all_interrupts(struct hfi1_devdata *dd);
1440void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr);
1441void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
1442void reset_interrupts(struct hfi1_devdata *dd);
1443
1419/* 1444/*
1420 * Interrupt source table. 1445 * Interrupt source table.
1421 * 1446 *
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index ee6dca5e2a2f..c6163a347e93 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -878,6 +878,10 @@
878#define SEND_CTRL (TXE + 0x000000000000) 878#define SEND_CTRL (TXE + 0x000000000000)
879#define SEND_CTRL_CM_RESET_SMASK 0x4ull 879#define SEND_CTRL_CM_RESET_SMASK 0x4ull
880#define SEND_CTRL_SEND_ENABLE_SMASK 0x1ull 880#define SEND_CTRL_SEND_ENABLE_SMASK 0x1ull
881#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
882#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xFFull
883#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
884 << SEND_CTRL_UNSUPPORTED_VL_SHIFT)
881#define SEND_CTRL_VL_ARBITER_ENABLE_SMASK 0x2ull 885#define SEND_CTRL_VL_ARBITER_ENABLE_SMASK 0x2ull
882#define SEND_CTXT_CHECK_ENABLE (TXE + 0x000000100080) 886#define SEND_CTXT_CHECK_ENABLE (TXE + 0x000000100080)
883#define SEND_CTXT_CHECK_ENABLE_CHECK_BYPASS_VL_MAPPING_SMASK 0x80ull 887#define SEND_CTXT_CHECK_ENABLE_CHECK_BYPASS_VL_MAPPING_SMASK 0x80ull
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 1fc75647e47b..c22ebc774a6a 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -681,7 +681,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
681 HFI1_RCVCTRL_TAILUPD_DIS | 681 HFI1_RCVCTRL_TAILUPD_DIS |
682 HFI1_RCVCTRL_ONE_PKT_EGR_DIS | 682 HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
683 HFI1_RCVCTRL_NO_RHQ_DROP_DIS | 683 HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
684 HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); 684 HFI1_RCVCTRL_NO_EGR_DROP_DIS |
685 HFI1_RCVCTRL_URGENT_DIS, uctxt);
685 /* Clear the context's J_KEY */ 686 /* Clear the context's J_KEY */
686 hfi1_clear_ctxt_jkey(dd, uctxt); 687 hfi1_clear_ctxt_jkey(dd, uctxt);
687 /* 688 /*
@@ -1096,6 +1097,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt)
1096 hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey); 1097 hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey);
1097 1098
1098 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; 1099 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
1100 rcvctrl_ops |= HFI1_RCVCTRL_URGENT_ENB;
1099 if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) 1101 if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP))
1100 rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB; 1102 rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB;
1101 /* 1103 /*
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index d9470317983f..1401b6ea4a28 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -80,6 +80,7 @@
80#include "qsfp.h" 80#include "qsfp.h"
81#include "platform.h" 81#include "platform.h"
82#include "affinity.h" 82#include "affinity.h"
83#include "msix.h"
83 84
84/* bumped 1 from s/w major version of TrueScale */ 85/* bumped 1 from s/w major version of TrueScale */
85#define HFI1_CHIP_VERS_MAJ 3U 86#define HFI1_CHIP_VERS_MAJ 3U
@@ -620,6 +621,8 @@ struct rvt_sge_state;
620#define HFI1_RCVCTRL_NO_RHQ_DROP_DIS 0x8000 621#define HFI1_RCVCTRL_NO_RHQ_DROP_DIS 0x8000
621#define HFI1_RCVCTRL_NO_EGR_DROP_ENB 0x10000 622#define HFI1_RCVCTRL_NO_EGR_DROP_ENB 0x10000
622#define HFI1_RCVCTRL_NO_EGR_DROP_DIS 0x20000 623#define HFI1_RCVCTRL_NO_EGR_DROP_DIS 0x20000
624#define HFI1_RCVCTRL_URGENT_ENB 0x40000
625#define HFI1_RCVCTRL_URGENT_DIS 0x80000
623 626
624/* partition enforcement flags */ 627/* partition enforcement flags */
625#define HFI1_PART_ENFORCE_IN 0x1 628#define HFI1_PART_ENFORCE_IN 0x1
@@ -667,6 +670,14 @@ struct hfi1_msix_entry {
667 struct irq_affinity_notify notify; 670 struct irq_affinity_notify notify;
668}; 671};
669 672
673struct hfi1_msix_info {
674 /* lock to synchronize in_use_msix access */
675 spinlock_t msix_lock;
676 DECLARE_BITMAP(in_use_msix, CCE_NUM_MSIX_VECTORS);
677 struct hfi1_msix_entry *msix_entries;
678 u16 max_requested;
679};
680
670/* per-SL CCA information */ 681/* per-SL CCA information */
671struct cca_timer { 682struct cca_timer {
672 struct hrtimer hrtimer; 683 struct hrtimer hrtimer;
@@ -992,7 +1003,6 @@ struct hfi1_vnic_data {
992 struct idr vesw_idr; 1003 struct idr vesw_idr;
993 u8 rmt_start; 1004 u8 rmt_start;
994 u8 num_ctxt; 1005 u8 num_ctxt;
995 u32 msix_idx;
996}; 1006};
997 1007
998struct hfi1_vnic_vport_info; 1008struct hfi1_vnic_vport_info;
@@ -1205,11 +1215,6 @@ struct hfi1_devdata {
1205 1215
1206 struct diag_client *diag_client; 1216 struct diag_client *diag_client;
1207 1217
1208 /* MSI-X information */
1209 struct hfi1_msix_entry *msix_entries;
1210 u32 num_msix_entries;
1211 u32 first_dyn_msix_idx;
1212
1213 /* general interrupt: mask of handled interrupts */ 1218 /* general interrupt: mask of handled interrupts */
1214 u64 gi_mask[CCE_NUM_INT_CSRS]; 1219 u64 gi_mask[CCE_NUM_INT_CSRS];
1215 1220
@@ -1223,6 +1228,9 @@ struct hfi1_devdata {
1223 */ 1228 */
1224 struct timer_list synth_stats_timer; 1229 struct timer_list synth_stats_timer;
1225 1230
1231 /* MSI-X information */
1232 struct hfi1_msix_info msix_info;
1233
1226 /* 1234 /*
1227 * device counters 1235 * device counters
1228 */ 1236 */
@@ -1349,6 +1357,8 @@ struct hfi1_devdata {
1349 1357
1350 /* vnic data */ 1358 /* vnic data */
1351 struct hfi1_vnic_data vnic; 1359 struct hfi1_vnic_data vnic;
1360 /* Lock to protect IRQ SRC register access */
1361 spinlock_t irq_src_lock;
1352}; 1362};
1353 1363
1354static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare) 1364static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
@@ -1431,9 +1441,6 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
1431int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread); 1441int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
1432int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread); 1442int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
1433void set_all_slowpath(struct hfi1_devdata *dd); 1443void set_all_slowpath(struct hfi1_devdata *dd);
1434void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd);
1435void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd);
1436void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd);
1437 1444
1438extern const struct pci_device_id hfi1_pci_tbl[]; 1445extern const struct pci_device_id hfi1_pci_tbl[];
1439void hfi1_make_ud_req_9B(struct rvt_qp *qp, 1446void hfi1_make_ud_req_9B(struct rvt_qp *qp,
@@ -1887,10 +1894,8 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
1887#define HFI1_CTXT_WAITING_URG 4 1894#define HFI1_CTXT_WAITING_URG 4
1888 1895
1889/* free up any allocated data at closes */ 1896/* free up any allocated data at closes */
1890struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, 1897int hfi1_init_dd(struct hfi1_devdata *dd);
1891 const struct pci_device_id *ent);
1892void hfi1_free_devdata(struct hfi1_devdata *dd); 1898void hfi1_free_devdata(struct hfi1_devdata *dd);
1893struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
1894 1899
1895/* LED beaconing functions */ 1900/* LED beaconing functions */
1896void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, 1901void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
@@ -1963,6 +1968,7 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
1963 */ 1968 */
1964 1969
1965extern const char ib_hfi1_version[]; 1970extern const char ib_hfi1_version[];
1971extern const struct attribute_group ib_hfi1_attr_group;
1966 1972
1967int hfi1_device_create(struct hfi1_devdata *dd); 1973int hfi1_device_create(struct hfi1_devdata *dd);
1968void hfi1_device_remove(struct hfi1_devdata *dd); 1974void hfi1_device_remove(struct hfi1_devdata *dd);
@@ -1974,16 +1980,15 @@ void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd);
1974/* Hook for sysfs read of QSFP */ 1980/* Hook for sysfs read of QSFP */
1975int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len); 1981int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
1976 1982
1977int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent); 1983int hfi1_pcie_init(struct hfi1_devdata *dd);
1978void hfi1_clean_up_interrupts(struct hfi1_devdata *dd);
1979void hfi1_pcie_cleanup(struct pci_dev *pdev); 1984void hfi1_pcie_cleanup(struct pci_dev *pdev);
1980int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev); 1985int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
1981void hfi1_pcie_ddcleanup(struct hfi1_devdata *); 1986void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
1982int pcie_speeds(struct hfi1_devdata *dd); 1987int pcie_speeds(struct hfi1_devdata *dd);
1983int request_msix(struct hfi1_devdata *dd, u32 msireq);
1984int restore_pci_variables(struct hfi1_devdata *dd); 1988int restore_pci_variables(struct hfi1_devdata *dd);
1985int save_pci_variables(struct hfi1_devdata *dd); 1989int save_pci_variables(struct hfi1_devdata *dd);
1986int do_pcie_gen3_transition(struct hfi1_devdata *dd); 1990int do_pcie_gen3_transition(struct hfi1_devdata *dd);
1991void tune_pcie_caps(struct hfi1_devdata *dd);
1987int parse_platform_config(struct hfi1_devdata *dd); 1992int parse_platform_config(struct hfi1_devdata *dd);
1988int get_platform_config_field(struct hfi1_devdata *dd, 1993int get_platform_config_field(struct hfi1_devdata *dd,
1989 enum platform_config_table_type_encoding 1994 enum platform_config_table_type_encoding
@@ -2124,19 +2129,6 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
2124 return base_sdma_integrity; 2129 return base_sdma_integrity;
2125} 2130}
2126 2131
2127/*
2128 * hfi1_early_err is used (only!) to print early errors before devdata is
2129 * allocated, or when dd->pcidev may not be valid, and at the tail end of
2130 * cleanup when devdata may have been freed, etc. hfi1_dev_porterr is
2131 * the same as dd_dev_err, but is used when the message really needs
2132 * the IB port# to be definitive as to what's happening..
2133 */
2134#define hfi1_early_err(dev, fmt, ...) \
2135 dev_err(dev, fmt, ##__VA_ARGS__)
2136
2137#define hfi1_early_info(dev, fmt, ...) \
2138 dev_info(dev, fmt, ##__VA_ARGS__)
2139
2140#define dd_dev_emerg(dd, fmt, ...) \ 2132#define dd_dev_emerg(dd, fmt, ...) \
2141 dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \ 2133 dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \
2142 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__) 2134 rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 758d273c32cf..09044905284f 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -83,6 +83,8 @@
83#define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */ 83#define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
84#define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */ 84#define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
85 85
86#define NUM_IB_PORTS 1
87
86/* 88/*
87 * Number of user receive contexts we are configured to use (to allow for more 89 * Number of user receive contexts we are configured to use (to allow for more
88 * pio buffers per ctxt, etc.) Zero means use one user context per CPU. 90 * pio buffers per ctxt, etc.) Zero means use one user context per CPU.
@@ -654,9 +656,8 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
654 ppd->part_enforce |= HFI1_PART_ENFORCE_IN; 656 ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
655 657
656 if (loopback) { 658 if (loopback) {
657 hfi1_early_err(&pdev->dev, 659 dd_dev_err(dd, "Faking data partition 0x8001 in idx %u\n",
658 "Faking data partition 0x8001 in idx %u\n", 660 !default_pkey_idx);
659 !default_pkey_idx);
660 ppd->pkeys[!default_pkey_idx] = 0x8001; 661 ppd->pkeys[!default_pkey_idx] = 0x8001;
661 } 662 }
662 663
@@ -702,9 +703,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
702 return; 703 return;
703 704
704bail: 705bail:
705 706 dd_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port);
706 hfi1_early_err(&pdev->dev,
707 "Congestion Control Agent disabled for port %d\n", port);
708} 707}
709 708
710/* 709/*
@@ -833,6 +832,23 @@ wq_error:
833} 832}
834 833
835/** 834/**
835 * enable_general_intr() - Enable the IRQs that will be handled by the
836 * general interrupt handler.
837 * @dd: valid devdata
838 *
839 */
840static void enable_general_intr(struct hfi1_devdata *dd)
841{
842 set_intr_bits(dd, CCE_ERR_INT, MISC_ERR_INT, true);
843 set_intr_bits(dd, PIO_ERR_INT, TXE_ERR_INT, true);
844 set_intr_bits(dd, IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, true);
845 set_intr_bits(dd, PBC_INT, GPIO_ASSERT_INT, true);
846 set_intr_bits(dd, TCRIT_INT, TCRIT_INT, true);
847 set_intr_bits(dd, IS_DC_START, IS_DC_END, true);
848 set_intr_bits(dd, IS_SENDCREDIT_START, IS_SENDCREDIT_END, true);
849}
850
851/**
836 * hfi1_init - do the actual initialization sequence on the chip 852 * hfi1_init - do the actual initialization sequence on the chip
837 * @dd: the hfi1_ib device 853 * @dd: the hfi1_ib device
838 * @reinit: re-initializing, so don't allocate new memory 854 * @reinit: re-initializing, so don't allocate new memory
@@ -916,6 +932,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
916 "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); 932 "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
917 ret = lastfail; 933 ret = lastfail;
918 } 934 }
935 /* enable IRQ */
919 hfi1_rcd_put(rcd); 936 hfi1_rcd_put(rcd);
920 } 937 }
921 938
@@ -954,7 +971,8 @@ done:
954 HFI1_STATUS_INITTED; 971 HFI1_STATUS_INITTED;
955 if (!ret) { 972 if (!ret) {
956 /* enable all interrupts from the chip */ 973 /* enable all interrupts from the chip */
957 set_intr_state(dd, 1); 974 enable_general_intr(dd);
975 init_qsfp_int(dd);
958 976
959 /* chip is OK for user apps; mark it as initialized */ 977 /* chip is OK for user apps; mark it as initialized */
960 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 978 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
@@ -1051,9 +1069,9 @@ static void shutdown_device(struct hfi1_devdata *dd)
1051 } 1069 }
1052 dd->flags &= ~HFI1_INITTED; 1070 dd->flags &= ~HFI1_INITTED;
1053 1071
1054 /* mask and clean up interrupts, but not errors */ 1072 /* mask and clean up interrupts */
1055 set_intr_state(dd, 0); 1073 set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
1056 hfi1_clean_up_interrupts(dd); 1074 msix_clean_up_interrupts(dd);
1057 1075
1058 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 1076 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
1059 ppd = dd->pport + pidx; 1077 ppd = dd->pport + pidx;
@@ -1246,15 +1264,19 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
1246 kobject_put(&dd->kobj); 1264 kobject_put(&dd->kobj);
1247} 1265}
1248 1266
1249/* 1267/**
1250 * Allocate our primary per-unit data structure. Must be done via verbs 1268 * hfi1_alloc_devdata - Allocate our primary per-unit data structure.
1251 * allocator, because the verbs cleanup process both does cleanup and 1269 * @pdev: Valid PCI device
1252 * free of the data structure. 1270 * @extra: How many bytes to alloc past the default
1271 *
1272 * Must be done via verbs allocator, because the verbs cleanup process
1273 * both does cleanup and free of the data structure.
1253 * "extra" is for chip-specific data. 1274 * "extra" is for chip-specific data.
1254 * 1275 *
1255 * Use the idr mechanism to get a unit number for this unit. 1276 * Use the idr mechanism to get a unit number for this unit.
1256 */ 1277 */
1257struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) 1278static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
1279 size_t extra)
1258{ 1280{
1259 unsigned long flags; 1281 unsigned long flags;
1260 struct hfi1_devdata *dd; 1282 struct hfi1_devdata *dd;
@@ -1287,8 +1309,8 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
1287 idr_preload_end(); 1309 idr_preload_end();
1288 1310
1289 if (ret < 0) { 1311 if (ret < 0) {
1290 hfi1_early_err(&pdev->dev, 1312 dev_err(&pdev->dev,
1291 "Could not allocate unit ID: error %d\n", -ret); 1313 "Could not allocate unit ID: error %d\n", -ret);
1292 goto bail; 1314 goto bail;
1293 } 1315 }
1294 rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit); 1316 rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
@@ -1309,6 +1331,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
1309 spin_lock_init(&dd->pio_map_lock); 1331 spin_lock_init(&dd->pio_map_lock);
1310 mutex_init(&dd->dc8051_lock); 1332 mutex_init(&dd->dc8051_lock);
1311 init_waitqueue_head(&dd->event_queue); 1333 init_waitqueue_head(&dd->event_queue);
1334 spin_lock_init(&dd->irq_src_lock);
1312 1335
1313 dd->int_counter = alloc_percpu(u64); 1336 dd->int_counter = alloc_percpu(u64);
1314 if (!dd->int_counter) { 1337 if (!dd->int_counter) {
@@ -1481,9 +1504,6 @@ static int __init hfi1_mod_init(void)
1481 idr_init(&hfi1_unit_table); 1504 idr_init(&hfi1_unit_table);
1482 1505
1483 hfi1_dbg_init(); 1506 hfi1_dbg_init();
1484 ret = hfi1_wss_init();
1485 if (ret < 0)
1486 goto bail_wss;
1487 ret = pci_register_driver(&hfi1_pci_driver); 1507 ret = pci_register_driver(&hfi1_pci_driver);
1488 if (ret < 0) { 1508 if (ret < 0) {
1489 pr_err("Unable to register driver: error %d\n", -ret); 1509 pr_err("Unable to register driver: error %d\n", -ret);
@@ -1492,8 +1512,6 @@ static int __init hfi1_mod_init(void)
1492 goto bail; /* all OK */ 1512 goto bail; /* all OK */
1493 1513
1494bail_dev: 1514bail_dev:
1495 hfi1_wss_exit();
1496bail_wss:
1497 hfi1_dbg_exit(); 1515 hfi1_dbg_exit();
1498 idr_destroy(&hfi1_unit_table); 1516 idr_destroy(&hfi1_unit_table);
1499 dev_cleanup(); 1517 dev_cleanup();
@@ -1510,7 +1528,6 @@ static void __exit hfi1_mod_cleanup(void)
1510{ 1528{
1511 pci_unregister_driver(&hfi1_pci_driver); 1529 pci_unregister_driver(&hfi1_pci_driver);
1512 node_affinity_destroy_all(); 1530 node_affinity_destroy_all();
1513 hfi1_wss_exit();
1514 hfi1_dbg_exit(); 1531 hfi1_dbg_exit();
1515 1532
1516 idr_destroy(&hfi1_unit_table); 1533 idr_destroy(&hfi1_unit_table);
@@ -1604,23 +1621,23 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
1604 hfi1_free_devdata(dd); 1621 hfi1_free_devdata(dd);
1605} 1622}
1606 1623
1607static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt) 1624static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
1608{ 1625{
1609 if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) { 1626 if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
1610 hfi1_early_err(dev, "Receive header queue count too small\n"); 1627 dd_dev_err(dd, "Receive header queue count too small\n");
1611 return -EINVAL; 1628 return -EINVAL;
1612 } 1629 }
1613 1630
1614 if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) { 1631 if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
1615 hfi1_early_err(dev, 1632 dd_dev_err(dd,
1616 "Receive header queue count cannot be greater than %u\n", 1633 "Receive header queue count cannot be greater than %u\n",
1617 HFI1_MAX_HDRQ_EGRBUF_CNT); 1634 HFI1_MAX_HDRQ_EGRBUF_CNT);
1618 return -EINVAL; 1635 return -EINVAL;
1619 } 1636 }
1620 1637
1621 if (thecnt % HDRQ_INCREMENT) { 1638 if (thecnt % HDRQ_INCREMENT) {
1622 hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n", 1639 dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
1623 thecnt, HDRQ_INCREMENT); 1640 thecnt, HDRQ_INCREMENT);
1624 return -EINVAL; 1641 return -EINVAL;
1625 } 1642 }
1626 1643
@@ -1639,22 +1656,29 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1639 /* Validate dev ids */ 1656 /* Validate dev ids */
1640 if (!(ent->device == PCI_DEVICE_ID_INTEL0 || 1657 if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
1641 ent->device == PCI_DEVICE_ID_INTEL1)) { 1658 ent->device == PCI_DEVICE_ID_INTEL1)) {
1642 hfi1_early_err(&pdev->dev, 1659 dev_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n",
1643 "Failing on unknown Intel deviceid 0x%x\n", 1660 ent->device);
1644 ent->device);
1645 ret = -ENODEV; 1661 ret = -ENODEV;
1646 goto bail; 1662 goto bail;
1647 } 1663 }
1648 1664
1665 /* Allocate the dd so we can get to work */
1666 dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
1667 sizeof(struct hfi1_pportdata));
1668 if (IS_ERR(dd)) {
1669 ret = PTR_ERR(dd);
1670 goto bail;
1671 }
1672
1649 /* Validate some global module parameters */ 1673 /* Validate some global module parameters */
1650 ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt); 1674 ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt);
1651 if (ret) 1675 if (ret)
1652 goto bail; 1676 goto bail;
1653 1677
1654 /* use the encoding function as a sanitization check */ 1678 /* use the encoding function as a sanitization check */
1655 if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) { 1679 if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
1656 hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n", 1680 dd_dev_err(dd, "Invalid HdrQ Entry size %u\n",
1657 hfi1_hdrq_entsize); 1681 hfi1_hdrq_entsize);
1658 ret = -EINVAL; 1682 ret = -EINVAL;
1659 goto bail; 1683 goto bail;
1660 } 1684 }
@@ -1676,10 +1700,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1676 clamp_val(eager_buffer_size, 1700 clamp_val(eager_buffer_size,
1677 MIN_EAGER_BUFFER * 8, 1701 MIN_EAGER_BUFFER * 8,
1678 MAX_EAGER_BUFFER_TOTAL); 1702 MAX_EAGER_BUFFER_TOTAL);
1679 hfi1_early_info(&pdev->dev, "Eager buffer size %u\n", 1703 dd_dev_info(dd, "Eager buffer size %u\n",
1680 eager_buffer_size); 1704 eager_buffer_size);
1681 } else { 1705 } else {
1682 hfi1_early_err(&pdev->dev, "Invalid Eager buffer size of 0\n"); 1706 dd_dev_err(dd, "Invalid Eager buffer size of 0\n");
1683 ret = -EINVAL; 1707 ret = -EINVAL;
1684 goto bail; 1708 goto bail;
1685 } 1709 }
@@ -1687,7 +1711,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1687 /* restrict value of hfi1_rcvarr_split */ 1711 /* restrict value of hfi1_rcvarr_split */
1688 hfi1_rcvarr_split = clamp_val(hfi1_rcvarr_split, 0, 100); 1712 hfi1_rcvarr_split = clamp_val(hfi1_rcvarr_split, 0, 100);
1689 1713
1690 ret = hfi1_pcie_init(pdev, ent); 1714 ret = hfi1_pcie_init(dd);
1691 if (ret) 1715 if (ret)
1692 goto bail; 1716 goto bail;
1693 1717
@@ -1695,12 +1719,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1695 * Do device-specific initialization, function table setup, dd 1719 * Do device-specific initialization, function table setup, dd
1696 * allocation, etc. 1720 * allocation, etc.
1697 */ 1721 */
1698 dd = hfi1_init_dd(pdev, ent); 1722 ret = hfi1_init_dd(dd);
1699 1723 if (ret)
1700 if (IS_ERR(dd)) {
1701 ret = PTR_ERR(dd);
1702 goto clean_bail; /* error already printed */ 1724 goto clean_bail; /* error already printed */
1703 }
1704 1725
1705 ret = create_workqueues(dd); 1726 ret = create_workqueues(dd);
1706 if (ret) 1727 if (ret)
@@ -1731,7 +1752,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1731 dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j); 1752 dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j);
1732 1753
1733 if (initfail || ret) { 1754 if (initfail || ret) {
1734 hfi1_clean_up_interrupts(dd); 1755 msix_clean_up_interrupts(dd);
1735 stop_timers(dd); 1756 stop_timers(dd);
1736 flush_workqueue(ib_wq); 1757 flush_workqueue(ib_wq);
1737 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 1758 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
diff --git a/drivers/infiniband/hw/hfi1/iowait.c b/drivers/infiniband/hw/hfi1/iowait.c
new file mode 100644
index 000000000000..582f1ba136ff
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/iowait.c
@@ -0,0 +1,94 @@
1// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
2/*
3 * Copyright(c) 2018 Intel Corporation.
4 *
5 */
6#include "iowait.h"
7#include "trace_iowait.h"
8
9void iowait_set_flag(struct iowait *wait, u32 flag)
10{
11 trace_hfi1_iowait_set(wait, flag);
12 set_bit(flag, &wait->flags);
13}
14
15bool iowait_flag_set(struct iowait *wait, u32 flag)
16{
17 return test_bit(flag, &wait->flags);
18}
19
20inline void iowait_clear_flag(struct iowait *wait, u32 flag)
21{
22 trace_hfi1_iowait_clear(wait, flag);
23 clear_bit(flag, &wait->flags);
24}
25
26/**
27 * iowait_init() - initialize wait structure
28 * @wait: wait struct to initialize
29 * @tx_limit: limit for overflow queuing
30 * @func: restart function for workqueue
31 * @sleep: sleep function for no space
32 * @resume: wakeup function for no space
33 *
34 * This function initializes the iowait
35 * structure embedded in the QP or PQ.
36 *
37 */
38void iowait_init(struct iowait *wait, u32 tx_limit,
39 void (*func)(struct work_struct *work),
40 void (*tidfunc)(struct work_struct *work),
41 int (*sleep)(struct sdma_engine *sde,
42 struct iowait_work *wait,
43 struct sdma_txreq *tx,
44 uint seq,
45 bool pkts_sent),
46 void (*wakeup)(struct iowait *wait, int reason),
47 void (*sdma_drained)(struct iowait *wait))
48{
49 int i;
50
51 wait->count = 0;
52 INIT_LIST_HEAD(&wait->list);
53 init_waitqueue_head(&wait->wait_dma);
54 init_waitqueue_head(&wait->wait_pio);
55 atomic_set(&wait->sdma_busy, 0);
56 atomic_set(&wait->pio_busy, 0);
57 wait->tx_limit = tx_limit;
58 wait->sleep = sleep;
59 wait->wakeup = wakeup;
60 wait->sdma_drained = sdma_drained;
61 wait->flags = 0;
62 for (i = 0; i < IOWAIT_SES; i++) {
63 wait->wait[i].iow = wait;
64 INIT_LIST_HEAD(&wait->wait[i].tx_head);
65 if (i == IOWAIT_IB_SE)
66 INIT_WORK(&wait->wait[i].iowork, func);
67 else
68 INIT_WORK(&wait->wait[i].iowork, tidfunc);
69 }
70}
71
72/**
73 * iowait_cancel_work - cancel all work in iowait
74 * @w: the iowait struct
75 */
76void iowait_cancel_work(struct iowait *w)
77{
78 cancel_work_sync(&iowait_get_ib_work(w)->iowork);
79 cancel_work_sync(&iowait_get_tid_work(w)->iowork);
80}
81
82/**
83 * iowait_set_work_flag - set work flag based on leg
84 * @w - the iowait work struct
85 */
86int iowait_set_work_flag(struct iowait_work *w)
87{
88 if (w == &w->iow->wait[IOWAIT_IB_SE]) {
89 iowait_set_flag(w->iow, IOWAIT_PENDING_IB);
90 return IOWAIT_IB_SE;
91 }
92 iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
93 return IOWAIT_TID_SE;
94}
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 3d9c32c7c340..23a58ac0d47c 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -1,7 +1,7 @@
1#ifndef _HFI1_IOWAIT_H 1#ifndef _HFI1_IOWAIT_H
2#define _HFI1_IOWAIT_H 2#define _HFI1_IOWAIT_H
3/* 3/*
4 * Copyright(c) 2015, 2016 Intel Corporation. 4 * Copyright(c) 2015 - 2018 Intel Corporation.
5 * 5 *
6 * This file is provided under a dual BSD/GPLv2 license. When using or 6 * This file is provided under a dual BSD/GPLv2 license. When using or
7 * redistributing this file, you may do so under either license. 7 * redistributing this file, you may do so under either license.
@@ -49,6 +49,7 @@
49 49
50#include <linux/list.h> 50#include <linux/list.h>
51#include <linux/workqueue.h> 51#include <linux/workqueue.h>
52#include <linux/wait.h>
52#include <linux/sched.h> 53#include <linux/sched.h>
53 54
54#include "sdma_txreq.h" 55#include "sdma_txreq.h"
@@ -59,16 +60,47 @@
59 */ 60 */
60typedef void (*restart_t)(struct work_struct *work); 61typedef void (*restart_t)(struct work_struct *work);
61 62
63#define IOWAIT_PENDING_IB 0x0
64#define IOWAIT_PENDING_TID 0x1
65
66/*
67 * A QP can have multiple Send Engines (SEs).
68 *
69 * The current use case is for supporting a TID RDMA
70 * packet build/xmit mechanism independent from verbs.
71 */
72#define IOWAIT_SES 2
73#define IOWAIT_IB_SE 0
74#define IOWAIT_TID_SE 1
75
62struct sdma_txreq; 76struct sdma_txreq;
63struct sdma_engine; 77struct sdma_engine;
64/** 78/**
65 * struct iowait - linkage for delayed progress/waiting 79 * @iowork: the work struct
80 * @tx_head: list of prebuilt packets
81 * @iow: the parent iowait structure
82 *
83 * This structure is the work item (process) specific
84 * details associated with the each of the two SEs of the
85 * QP.
86 *
87 * The workstruct and the queued TXs are unique to each
88 * SE.
89 */
90struct iowait;
91struct iowait_work {
92 struct work_struct iowork;
93 struct list_head tx_head;
94 struct iowait *iow;
95};
96
97/**
66 * @list: used to add/insert into QP/PQ wait lists 98 * @list: used to add/insert into QP/PQ wait lists
67 * @lock: uses to record the list head lock
68 * @tx_head: overflow list of sdma_txreq's 99 * @tx_head: overflow list of sdma_txreq's
69 * @sleep: no space callback 100 * @sleep: no space callback
70 * @wakeup: space callback wakeup 101 * @wakeup: space callback wakeup
71 * @sdma_drained: sdma count drained 102 * @sdma_drained: sdma count drained
103 * @lock: lock protected head of wait queue
72 * @iowork: workqueue overhead 104 * @iowork: workqueue overhead
73 * @wait_dma: wait for sdma_busy == 0 105 * @wait_dma: wait for sdma_busy == 0
74 * @wait_pio: wait for pio_busy == 0 106 * @wait_pio: wait for pio_busy == 0
@@ -76,6 +108,8 @@ struct sdma_engine;
76 * @count: total number of descriptors in tx_head'ed list 108 * @count: total number of descriptors in tx_head'ed list
77 * @tx_limit: limit for overflow queuing 109 * @tx_limit: limit for overflow queuing
78 * @tx_count: number of tx entry's in tx_head'ed list 110 * @tx_count: number of tx entry's in tx_head'ed list
111 * @flags: wait flags (one per QP)
112 * @wait: SE array
79 * 113 *
80 * This is to be embedded in user's state structure 114 * This is to be embedded in user's state structure
81 * (QP or PQ). 115 * (QP or PQ).
@@ -98,13 +132,11 @@ struct sdma_engine;
98 * Waiters explicity know that, but the destroy 132 * Waiters explicity know that, but the destroy
99 * code that unwaits QPs does not. 133 * code that unwaits QPs does not.
100 */ 134 */
101
102struct iowait { 135struct iowait {
103 struct list_head list; 136 struct list_head list;
104 struct list_head tx_head;
105 int (*sleep)( 137 int (*sleep)(
106 struct sdma_engine *sde, 138 struct sdma_engine *sde,
107 struct iowait *wait, 139 struct iowait_work *wait,
108 struct sdma_txreq *tx, 140 struct sdma_txreq *tx,
109 uint seq, 141 uint seq,
110 bool pkts_sent 142 bool pkts_sent
@@ -112,7 +144,6 @@ struct iowait {
112 void (*wakeup)(struct iowait *wait, int reason); 144 void (*wakeup)(struct iowait *wait, int reason);
113 void (*sdma_drained)(struct iowait *wait); 145 void (*sdma_drained)(struct iowait *wait);
114 seqlock_t *lock; 146 seqlock_t *lock;
115 struct work_struct iowork;
116 wait_queue_head_t wait_dma; 147 wait_queue_head_t wait_dma;
117 wait_queue_head_t wait_pio; 148 wait_queue_head_t wait_pio;
118 atomic_t sdma_busy; 149 atomic_t sdma_busy;
@@ -121,63 +152,37 @@ struct iowait {
121 u32 tx_limit; 152 u32 tx_limit;
122 u32 tx_count; 153 u32 tx_count;
123 u8 starved_cnt; 154 u8 starved_cnt;
155 unsigned long flags;
156 struct iowait_work wait[IOWAIT_SES];
124}; 157};
125 158
126#define SDMA_AVAIL_REASON 0 159#define SDMA_AVAIL_REASON 0
127 160
128/** 161void iowait_set_flag(struct iowait *wait, u32 flag);
129 * iowait_init() - initialize wait structure 162bool iowait_flag_set(struct iowait *wait, u32 flag);
130 * @wait: wait struct to initialize 163void iowait_clear_flag(struct iowait *wait, u32 flag);
131 * @tx_limit: limit for overflow queuing
132 * @func: restart function for workqueue
133 * @sleep: sleep function for no space
134 * @resume: wakeup function for no space
135 *
136 * This function initializes the iowait
137 * structure embedded in the QP or PQ.
138 *
139 */
140 164
141static inline void iowait_init( 165void iowait_init(struct iowait *wait, u32 tx_limit,
142 struct iowait *wait, 166 void (*func)(struct work_struct *work),
143 u32 tx_limit, 167 void (*tidfunc)(struct work_struct *work),
144 void (*func)(struct work_struct *work), 168 int (*sleep)(struct sdma_engine *sde,
145 int (*sleep)( 169 struct iowait_work *wait,
146 struct sdma_engine *sde, 170 struct sdma_txreq *tx,
147 struct iowait *wait, 171 uint seq,
148 struct sdma_txreq *tx, 172 bool pkts_sent),
149 uint seq, 173 void (*wakeup)(struct iowait *wait, int reason),
150 bool pkts_sent), 174 void (*sdma_drained)(struct iowait *wait));
151 void (*wakeup)(struct iowait *wait, int reason),
152 void (*sdma_drained)(struct iowait *wait))
153{
154 wait->count = 0;
155 wait->lock = NULL;
156 INIT_LIST_HEAD(&wait->list);
157 INIT_LIST_HEAD(&wait->tx_head);
158 INIT_WORK(&wait->iowork, func);
159 init_waitqueue_head(&wait->wait_dma);
160 init_waitqueue_head(&wait->wait_pio);
161 atomic_set(&wait->sdma_busy, 0);
162 atomic_set(&wait->pio_busy, 0);
163 wait->tx_limit = tx_limit;
164 wait->sleep = sleep;
165 wait->wakeup = wakeup;
166 wait->sdma_drained = sdma_drained;
167}
168 175
169/** 176/**
170 * iowait_schedule() - initialize wait structure 177 * iowait_schedule() - schedule the default send engine work
171 * @wait: wait struct to schedule 178 * @wait: wait struct to schedule
172 * @wq: workqueue for schedule 179 * @wq: workqueue for schedule
173 * @cpu: cpu 180 * @cpu: cpu
174 */ 181 */
175static inline void iowait_schedule( 182static inline bool iowait_schedule(struct iowait *wait,
176 struct iowait *wait, 183 struct workqueue_struct *wq, int cpu)
177 struct workqueue_struct *wq,
178 int cpu)
179{ 184{
180 queue_work_on(cpu, wq, &wait->iowork); 185 return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork);
181} 186}
182 187
183/** 188/**
@@ -228,6 +233,8 @@ static inline void iowait_sdma_add(struct iowait *wait, int count)
228 */ 233 */
229static inline int iowait_sdma_dec(struct iowait *wait) 234static inline int iowait_sdma_dec(struct iowait *wait)
230{ 235{
236 if (!wait)
237 return 0;
231 return atomic_dec_and_test(&wait->sdma_busy); 238 return atomic_dec_and_test(&wait->sdma_busy);
232} 239}
233 240
@@ -267,11 +274,13 @@ static inline void iowait_pio_inc(struct iowait *wait)
267} 274}
268 275
269/** 276/**
270 * iowait_sdma_dec - note pio complete 277 * iowait_pio_dec - note pio complete
271 * @wait: iowait structure 278 * @wait: iowait structure
272 */ 279 */
273static inline int iowait_pio_dec(struct iowait *wait) 280static inline int iowait_pio_dec(struct iowait *wait)
274{ 281{
282 if (!wait)
283 return 0;
275 return atomic_dec_and_test(&wait->pio_busy); 284 return atomic_dec_and_test(&wait->pio_busy);
276} 285}
277 286
@@ -293,9 +302,9 @@ static inline void iowait_drain_wakeup(struct iowait *wait)
293/** 302/**
294 * iowait_get_txhead() - get packet off of iowait list 303 * iowait_get_txhead() - get packet off of iowait list
295 * 304 *
296 * @wait wait struture 305 * @wait iowait_work struture
297 */ 306 */
298static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait) 307static inline struct sdma_txreq *iowait_get_txhead(struct iowait_work *wait)
299{ 308{
300 struct sdma_txreq *tx = NULL; 309 struct sdma_txreq *tx = NULL;
301 310
@@ -309,6 +318,28 @@ static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
309 return tx; 318 return tx;
310} 319}
311 320
321static inline u16 iowait_get_desc(struct iowait_work *w)
322{
323 u16 num_desc = 0;
324 struct sdma_txreq *tx = NULL;
325
326 if (!list_empty(&w->tx_head)) {
327 tx = list_first_entry(&w->tx_head, struct sdma_txreq,
328 list);
329 num_desc = tx->num_desc;
330 }
331 return num_desc;
332}
333
334static inline u32 iowait_get_all_desc(struct iowait *w)
335{
336 u32 num_desc = 0;
337
338 num_desc = iowait_get_desc(&w->wait[IOWAIT_IB_SE]);
339 num_desc += iowait_get_desc(&w->wait[IOWAIT_TID_SE]);
340 return num_desc;
341}
342
312/** 343/**
313 * iowait_queue - Put the iowait on a wait queue 344 * iowait_queue - Put the iowait on a wait queue
314 * @pkts_sent: have some packets been sent before queuing? 345 * @pkts_sent: have some packets been sent before queuing?
@@ -372,12 +403,57 @@ static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
372} 403}
373 404
374/** 405/**
375 * iowait_packet_queued() - determine if a packet is already built 406 * iowait_packet_queued() - determine if a packet is queued
376 * @wait: the wait structure 407 * @wait: the iowait_work structure
377 */ 408 */
378static inline bool iowait_packet_queued(struct iowait *wait) 409static inline bool iowait_packet_queued(struct iowait_work *wait)
379{ 410{
380 return !list_empty(&wait->tx_head); 411 return !list_empty(&wait->tx_head);
381} 412}
382 413
414/**
415 * inc_wait_count - increment wait counts
416 * @w: the log work struct
417 * @n: the count
418 */
419static inline void iowait_inc_wait_count(struct iowait_work *w, u16 n)
420{
421 if (!w)
422 return;
423 w->iow->tx_count++;
424 w->iow->count += n;
425}
426
427/**
428 * iowait_get_tid_work - return iowait_work for tid SE
429 * @w: the iowait struct
430 */
431static inline struct iowait_work *iowait_get_tid_work(struct iowait *w)
432{
433 return &w->wait[IOWAIT_TID_SE];
434}
435
436/**
437 * iowait_get_ib_work - return iowait_work for ib SE
438 * @w: the iowait struct
439 */
440static inline struct iowait_work *iowait_get_ib_work(struct iowait *w)
441{
442 return &w->wait[IOWAIT_IB_SE];
443}
444
445/**
446 * iowait_ioww_to_iow - return iowait given iowait_work
447 * @w: the iowait_work struct
448 */
449static inline struct iowait *iowait_ioww_to_iow(struct iowait_work *w)
450{
451 if (likely(w))
452 return w->iow;
453 return NULL;
454}
455
456void iowait_cancel_work(struct iowait *w);
457int iowait_set_work_flag(struct iowait_work *w);
458
383#endif 459#endif
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 0307405491e0..88a0cf930136 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright(c) 2015-2017 Intel Corporation. 2 * Copyright(c) 2015-2018 Intel Corporation.
3 * 3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or 4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license. 5 * redistributing this file, you may do so under either license.
@@ -4836,7 +4836,7 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
4836 int ret; 4836 int ret;
4837 int pkey_idx; 4837 int pkey_idx;
4838 int local_mad = 0; 4838 int local_mad = 0;
4839 u32 resp_len = 0; 4839 u32 resp_len = in_wc->byte_len - sizeof(*in_grh);
4840 struct hfi1_ibport *ibp = to_iport(ibdev, port); 4840 struct hfi1_ibport *ibp = to_iport(ibdev, port);
4841 4841
4842 pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY); 4842 pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c
new file mode 100644
index 000000000000..d920b165d696
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/msix.c
@@ -0,0 +1,363 @@
1// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
2/*
3 * Copyright(c) 2018 Intel Corporation.
4 *
5 * This file is provided under a dual BSD/GPLv2 license. When using or
6 * redistributing this file, you may do so under either license.
7 *
8 * GPL LICENSE SUMMARY
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of version 2 of the GNU General Public License as
12 * published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * BSD LICENSE
20 *
21 * Redistribution and use in source and binary forms, with or without
22 * modification, are permitted provided that the following conditions
23 * are met:
24 *
25 * - Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * - Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in
29 * the documentation and/or other materials provided with the
30 * distribution.
31 * - Neither the name of Intel Corporation nor the names of its
32 * contributors may be used to endorse or promote products derived
33 * from this software without specific prior written permission.
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
36 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
37 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
38 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
39 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
42 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
43 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
44 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
45 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 *
47 */
48
49#include "hfi.h"
50#include "affinity.h"
51#include "sdma.h"
52
53/**
54 * msix_initialize() - Calculate, request and configure MSIx IRQs
55 * @dd: valid hfi1 devdata
56 *
57 */
58int msix_initialize(struct hfi1_devdata *dd)
59{
60 u32 total;
61 int ret;
62 struct hfi1_msix_entry *entries;
63
64 /*
65 * MSIx interrupt count:
66 * one for the general, "slow path" interrupt
67 * one per used SDMA engine
68 * one per kernel receive context
69 * one for each VNIC context
70 * ...any new IRQs should be added here.
71 */
72 total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
73
74 if (total >= CCE_NUM_MSIX_VECTORS)
75 return -EINVAL;
76
77 ret = pci_alloc_irq_vectors(dd->pcidev, total, total, PCI_IRQ_MSIX);
78 if (ret < 0) {
79 dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", ret);
80 return ret;
81 }
82
83 entries = kcalloc(total, sizeof(*dd->msix_info.msix_entries),
84 GFP_KERNEL);
85 if (!entries) {
86 pci_free_irq_vectors(dd->pcidev);
87 return -ENOMEM;
88 }
89
90 dd->msix_info.msix_entries = entries;
91 spin_lock_init(&dd->msix_info.msix_lock);
92 bitmap_zero(dd->msix_info.in_use_msix, total);
93 dd->msix_info.max_requested = total;
94 dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
95
96 return 0;
97}
98
99/**
100 * msix_request_irq() - Allocate a free MSIx IRQ
101 * @dd: valid devdata
102 * @arg: context information for the IRQ
103 * @handler: IRQ handler
104 * @thread: IRQ thread handler (could be NULL)
105 * @idx: zero base idx if multiple devices are needed
106 * @type: affinty IRQ type
107 *
108 * Allocated an MSIx vector if available, and then create the appropriate
109 * meta data needed to keep track of the pci IRQ request.
110 *
111 * Return:
112 * < 0 Error
113 * >= 0 MSIx vector
114 *
115 */
116static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
117 irq_handler_t handler, irq_handler_t thread,
118 u32 idx, enum irq_type type)
119{
120 unsigned long nr;
121 int irq;
122 int ret;
123 const char *err_info;
124 char name[MAX_NAME_SIZE];
125 struct hfi1_msix_entry *me;
126
127 /* Allocate an MSIx vector */
128 spin_lock(&dd->msix_info.msix_lock);
129 nr = find_first_zero_bit(dd->msix_info.in_use_msix,
130 dd->msix_info.max_requested);
131 if (nr < dd->msix_info.max_requested)
132 __set_bit(nr, dd->msix_info.in_use_msix);
133 spin_unlock(&dd->msix_info.msix_lock);
134
135 if (nr == dd->msix_info.max_requested)
136 return -ENOSPC;
137
138 /* Specific verification and determine the name */
139 switch (type) {
140 case IRQ_GENERAL:
141 /* general interrupt must be MSIx vector 0 */
142 if (nr) {
143 spin_lock(&dd->msix_info.msix_lock);
144 __clear_bit(nr, dd->msix_info.in_use_msix);
145 spin_unlock(&dd->msix_info.msix_lock);
146 dd_dev_err(dd, "Invalid index %lu for GENERAL IRQ\n",
147 nr);
148 return -EINVAL;
149 }
150 snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
151 err_info = "general";
152 break;
153 case IRQ_SDMA:
154 snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
155 dd->unit, idx);
156 err_info = "sdma";
157 break;
158 case IRQ_RCVCTXT:
159 snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
160 dd->unit, idx);
161 err_info = "receive context";
162 break;
163 case IRQ_OTHER:
164 default:
165 return -EINVAL;
166 }
167 name[sizeof(name) - 1] = 0;
168
169 irq = pci_irq_vector(dd->pcidev, nr);
170 ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name);
171 if (ret) {
172 dd_dev_err(dd,
173 "%s: request for IRQ %d failed, MSIx %d, err %d\n",
174 err_info, irq, idx, ret);
175 spin_lock(&dd->msix_info.msix_lock);
176 __clear_bit(nr, dd->msix_info.in_use_msix);
177 spin_unlock(&dd->msix_info.msix_lock);
178 return ret;
179 }
180
181 /*
182 * assign arg after pci_request_irq call, so it will be
183 * cleaned up
184 */
185 me = &dd->msix_info.msix_entries[nr];
186 me->irq = irq;
187 me->arg = arg;
188 me->type = type;
189
190 /* This is a request, so a failure is not fatal */
191 ret = hfi1_get_irq_affinity(dd, me);
192 if (ret)
193 dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
194
195 return nr;
196}
197
198/**
199 * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
200 * @rcd: valid rcd context
201 *
202 */
203int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
204{
205 int nr;
206
207 nr = msix_request_irq(rcd->dd, rcd, receive_context_interrupt,
208 receive_context_thread, rcd->ctxt, IRQ_RCVCTXT);
209 if (nr < 0)
210 return nr;
211
212 /*
213 * Set the interrupt register and mask for this
214 * context's interrupt.
215 */
216 rcd->ireg = (IS_RCVAVAIL_START + rcd->ctxt) / 64;
217 rcd->imask = ((u64)1) << ((IS_RCVAVAIL_START + rcd->ctxt) % 64);
218 rcd->msix_intr = nr;
219 remap_intr(rcd->dd, IS_RCVAVAIL_START + rcd->ctxt, nr);
220
221 return 0;
222}
223
224/**
225 * msix_request_smda_ira() - Helper for getting SDMA IRQ resources
226 * @sde: valid sdma engine
227 *
228 */
229int msix_request_sdma_irq(struct sdma_engine *sde)
230{
231 int nr;
232
233 nr = msix_request_irq(sde->dd, sde, sdma_interrupt, NULL,
234 sde->this_idx, IRQ_SDMA);
235 if (nr < 0)
236 return nr;
237 sde->msix_intr = nr;
238 remap_sdma_interrupts(sde->dd, sde->this_idx, nr);
239
240 return 0;
241}
242
243/**
244 * enable_sdma_src() - Helper to enable SDMA IRQ srcs
245 * @dd: valid devdata structure
246 * @i: index of SDMA engine
247 */
248static void enable_sdma_srcs(struct hfi1_devdata *dd, int i)
249{
250 set_intr_bits(dd, IS_SDMA_START + i, IS_SDMA_START + i, true);
251 set_intr_bits(dd, IS_SDMA_PROGRESS_START + i,
252 IS_SDMA_PROGRESS_START + i, true);
253 set_intr_bits(dd, IS_SDMA_IDLE_START + i, IS_SDMA_IDLE_START + i, true);
254 set_intr_bits(dd, IS_SDMAENG_ERR_START + i, IS_SDMAENG_ERR_START + i,
255 true);
256}
257
258/**
259 * msix_request_irqs() - Allocate all MSIx IRQs
260 * @dd: valid devdata structure
261 *
262 * Helper function to request the used MSIx IRQs.
263 *
264 */
265int msix_request_irqs(struct hfi1_devdata *dd)
266{
267 int i;
268 int ret;
269
270 ret = msix_request_irq(dd, dd, general_interrupt, NULL, 0, IRQ_GENERAL);
271 if (ret < 0)
272 return ret;
273
274 for (i = 0; i < dd->num_sdma; i++) {
275 struct sdma_engine *sde = &dd->per_sdma[i];
276
277 ret = msix_request_sdma_irq(sde);
278 if (ret)
279 return ret;
280 enable_sdma_srcs(sde->dd, i);
281 }
282
283 for (i = 0; i < dd->n_krcv_queues; i++) {
284 struct hfi1_ctxtdata *rcd = hfi1_rcd_get_by_index_safe(dd, i);
285
286 if (rcd)
287 ret = msix_request_rcd_irq(rcd);
288 hfi1_rcd_put(rcd);
289 if (ret)
290 return ret;
291 }
292
293 return 0;
294}
295
296/**
297 * msix_free_irq() - Free the specified MSIx resources and IRQ
298 * @dd: valid devdata
299 * @msix_intr: MSIx vector to free.
300 *
301 */
302void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr)
303{
304 struct hfi1_msix_entry *me;
305
306 if (msix_intr >= dd->msix_info.max_requested)
307 return;
308
309 me = &dd->msix_info.msix_entries[msix_intr];
310
311 if (!me->arg) /* => no irq, no affinity */
312 return;
313
314 hfi1_put_irq_affinity(dd, me);
315 pci_free_irq(dd->pcidev, msix_intr, me->arg);
316
317 me->arg = NULL;
318
319 spin_lock(&dd->msix_info.msix_lock);
320 __clear_bit(msix_intr, dd->msix_info.in_use_msix);
321 spin_unlock(&dd->msix_info.msix_lock);
322}
323
324/**
325 * hfi1_clean_up_msix_interrupts() - Free all MSIx IRQ resources
326 * @dd: valid device data data structure
327 *
328 * Free the MSIx and associated PCI resources, if they have been allocated.
329 */
330void msix_clean_up_interrupts(struct hfi1_devdata *dd)
331{
332 int i;
333 struct hfi1_msix_entry *me = dd->msix_info.msix_entries;
334
335 /* remove irqs - must happen before disabling/turning off */
336 for (i = 0; i < dd->msix_info.max_requested; i++, me++)
337 msix_free_irq(dd, i);
338
339 /* clean structures */
340 kfree(dd->msix_info.msix_entries);
341 dd->msix_info.msix_entries = NULL;
342 dd->msix_info.max_requested = 0;
343
344 pci_free_irq_vectors(dd->pcidev);
345}
346
347/**
348 * msix_vnic_syncrhonize_irq() - Vnic IRQ synchronize
349 * @dd: valid devdata
350 */
351void msix_vnic_synchronize_irq(struct hfi1_devdata *dd)
352{
353 int i;
354
355 for (i = 0; i < dd->vnic.num_ctxt; i++) {
356 struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
357 struct hfi1_msix_entry *me;
358
359 me = &dd->msix_info.msix_entries[rcd->msix_intr];
360
361 synchronize_irq(me->irq);
362 }
363}
diff --git a/drivers/infiniband/hw/hfi1/msix.h b/drivers/infiniband/hw/hfi1/msix.h
new file mode 100644
index 000000000000..a514881632a4
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/msix.h
@@ -0,0 +1,64 @@
1/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
2/*
3 * Copyright(c) 2018 Intel Corporation.
4 *
5 * This file is provided under a dual BSD/GPLv2 license. When using or
6 * redistributing this file, you may do so under either license.
7 *
8 * GPL LICENSE SUMMARY
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of version 2 of the GNU General Public License as
12 * published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * BSD LICENSE
20 *
21 * Redistribution and use in source and binary forms, with or without
22 * modification, are permitted provided that the following conditions
23 * are met:
24 *
25 * - Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * - Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in
29 * the documentation and/or other materials provided with the
30 * distribution.
31 * - Neither the name of Intel Corporation nor the names of its
32 * contributors may be used to endorse or promote products derived
33 * from this software without specific prior written permission.
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
36 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
37 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
38 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
39 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
42 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
43 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
44 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
45 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 *
47 */
48#ifndef _HFI1_MSIX_H
49#define _HFI1_MSIX_H
50
51#include "hfi.h"
52
53/* MSIx interface */
54int msix_initialize(struct hfi1_devdata *dd);
55int msix_request_irqs(struct hfi1_devdata *dd);
56void msix_clean_up_interrupts(struct hfi1_devdata *dd);
57int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd);
58int msix_request_sdma_irq(struct sdma_engine *sde);
59void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr);
60
61/* VNIC interface */
62void msix_vnic_synchronize_irq(struct hfi1_devdata *dd);
63
64#endif
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index cca413eaa74e..c96d193bb236 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright(c) 2015 - 2017 Intel Corporation. 2 * Copyright(c) 2015 - 2018 Intel Corporation.
3 * 3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or 4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license. 5 * redistributing this file, you may do so under either license.
@@ -61,19 +61,12 @@
61 */ 61 */
62 62
63/* 63/*
64 * Code to adjust PCIe capabilities.
65 */
66static void tune_pcie_caps(struct hfi1_devdata *);
67
68/*
69 * Do all the common PCIe setup and initialization. 64 * Do all the common PCIe setup and initialization.
70 * devdata is not yet allocated, and is not allocated until after this
71 * routine returns success. Therefore dd_dev_err() can't be used for error
72 * printing.
73 */ 65 */
74int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) 66int hfi1_pcie_init(struct hfi1_devdata *dd)
75{ 67{
76 int ret; 68 int ret;
69 struct pci_dev *pdev = dd->pcidev;
77 70
78 ret = pci_enable_device(pdev); 71 ret = pci_enable_device(pdev);
79 if (ret) { 72 if (ret) {
@@ -89,15 +82,13 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
89 * about that, it appears. If the original BAR was retained 82 * about that, it appears. If the original BAR was retained
90 * in the kernel data structures, this may be OK. 83 * in the kernel data structures, this may be OK.
91 */ 84 */
92 hfi1_early_err(&pdev->dev, "pci enable failed: error %d\n", 85 dd_dev_err(dd, "pci enable failed: error %d\n", -ret);
93 -ret); 86 return ret;
94 goto done;
95 } 87 }
96 88
97 ret = pci_request_regions(pdev, DRIVER_NAME); 89 ret = pci_request_regions(pdev, DRIVER_NAME);
98 if (ret) { 90 if (ret) {
99 hfi1_early_err(&pdev->dev, 91 dd_dev_err(dd, "pci_request_regions fails: err %d\n", -ret);
100 "pci_request_regions fails: err %d\n", -ret);
101 goto bail; 92 goto bail;
102 } 93 }
103 94
@@ -110,8 +101,7 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
110 */ 101 */
111 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 102 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
112 if (ret) { 103 if (ret) {
113 hfi1_early_err(&pdev->dev, 104 dd_dev_err(dd, "Unable to set DMA mask: %d\n", ret);
114 "Unable to set DMA mask: %d\n", ret);
115 goto bail; 105 goto bail;
116 } 106 }
117 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); 107 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
@@ -119,18 +109,16 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
119 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 109 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
120 } 110 }
121 if (ret) { 111 if (ret) {
122 hfi1_early_err(&pdev->dev, 112 dd_dev_err(dd, "Unable to set DMA consistent mask: %d\n", ret);
123 "Unable to set DMA consistent mask: %d\n", ret);
124 goto bail; 113 goto bail;
125 } 114 }
126 115
127 pci_set_master(pdev); 116 pci_set_master(pdev);
128 (void)pci_enable_pcie_error_reporting(pdev); 117 (void)pci_enable_pcie_error_reporting(pdev);
129 goto done; 118 return 0;
130 119
131bail: 120bail:
132 hfi1_pcie_cleanup(pdev); 121 hfi1_pcie_cleanup(pdev);
133done:
134 return ret; 122 return ret;
135} 123}
136 124
@@ -206,7 +194,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
206 dd_dev_err(dd, "WC mapping of send buffers failed\n"); 194 dd_dev_err(dd, "WC mapping of send buffers failed\n");
207 goto nomem; 195 goto nomem;
208 } 196 }
209 dd_dev_info(dd, "WC piobase: %p\n for %x", dd->piobase, TXE_PIO_SIZE); 197 dd_dev_info(dd, "WC piobase: %p for %x\n", dd->piobase, TXE_PIO_SIZE);
210 198
211 dd->physaddr = addr; /* used for io_remap, etc. */ 199 dd->physaddr = addr; /* used for io_remap, etc. */
212 200
@@ -344,26 +332,6 @@ int pcie_speeds(struct hfi1_devdata *dd)
344 return 0; 332 return 0;
345} 333}
346 334
347/*
348 * Returns:
349 * - actual number of interrupts allocated or
350 * - error
351 */
352int request_msix(struct hfi1_devdata *dd, u32 msireq)
353{
354 int nvec;
355
356 nvec = pci_alloc_irq_vectors(dd->pcidev, msireq, msireq, PCI_IRQ_MSIX);
357 if (nvec < 0) {
358 dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec);
359 return nvec;
360 }
361
362 tune_pcie_caps(dd);
363
364 return nvec;
365}
366
367/* restore command and BARs after a reset has wiped them out */ 335/* restore command and BARs after a reset has wiped them out */
368int restore_pci_variables(struct hfi1_devdata *dd) 336int restore_pci_variables(struct hfi1_devdata *dd)
369{ 337{
@@ -479,14 +447,19 @@ error:
479 * Check and optionally adjust them to maximize our throughput. 447 * Check and optionally adjust them to maximize our throughput.
480 */ 448 */
481static int hfi1_pcie_caps; 449static int hfi1_pcie_caps;
482module_param_named(pcie_caps, hfi1_pcie_caps, int, S_IRUGO); 450module_param_named(pcie_caps, hfi1_pcie_caps, int, 0444);
483MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)"); 451MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
484 452
485uint aspm_mode = ASPM_MODE_DISABLED; 453uint aspm_mode = ASPM_MODE_DISABLED;
486module_param_named(aspm, aspm_mode, uint, S_IRUGO); 454module_param_named(aspm, aspm_mode, uint, 0444);
487MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic"); 455MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
488 456
489static void tune_pcie_caps(struct hfi1_devdata *dd) 457/**
458 * tune_pcie_caps() - Code to adjust PCIe capabilities.
459 * @dd: Valid device data structure
460 *
461 */
462void tune_pcie_caps(struct hfi1_devdata *dd)
490{ 463{
491 struct pci_dev *parent; 464 struct pci_dev *parent;
492 u16 rc_mpss, rc_mps, ep_mpss, ep_mps; 465 u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
@@ -1028,6 +1001,7 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
1028 const u8 (*ctle_tunings)[4]; 1001 const u8 (*ctle_tunings)[4];
1029 uint static_ctle_mode; 1002 uint static_ctle_mode;
1030 int return_error = 0; 1003 int return_error = 0;
1004 u32 target_width;
1031 1005
1032 /* PCIe Gen3 is for the ASIC only */ 1006 /* PCIe Gen3 is for the ASIC only */
1033 if (dd->icode != ICODE_RTL_SILICON) 1007 if (dd->icode != ICODE_RTL_SILICON)
@@ -1067,6 +1041,9 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
1067 return 0; 1041 return 0;
1068 } 1042 }
1069 1043
1044 /* Previous Gen1/Gen2 bus width */
1045 target_width = dd->lbus_width;
1046
1070 /* 1047 /*
1071 * Do the Gen3 transition. Steps are those of the PCIe Gen3 1048 * Do the Gen3 transition. Steps are those of the PCIe Gen3
1072 * recipe. 1049 * recipe.
@@ -1435,11 +1412,12 @@ retry:
1435 dd_dev_info(dd, "%s: new speed and width: %s\n", __func__, 1412 dd_dev_info(dd, "%s: new speed and width: %s\n", __func__,
1436 dd->lbus_info); 1413 dd->lbus_info);
1437 1414
1438 if (dd->lbus_speed != target_speed) { /* not target */ 1415 if (dd->lbus_speed != target_speed ||
1416 dd->lbus_width < target_width) { /* not target */
1439 /* maybe retry */ 1417 /* maybe retry */
1440 do_retry = retry_count < pcie_retry; 1418 do_retry = retry_count < pcie_retry;
1441 dd_dev_err(dd, "PCIe link speed did not switch to Gen%d%s\n", 1419 dd_dev_err(dd, "PCIe link speed or width did not match target%s\n",
1442 pcie_target, do_retry ? ", retrying" : ""); 1420 do_retry ? ", retrying" : "");
1443 retry_count++; 1421 retry_count++;
1444 if (do_retry) { 1422 if (do_retry) {
1445 msleep(100); /* allow time to settle */ 1423 msleep(100); /* allow time to settle */
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 752057647f09..9ab50d2308dc 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -71,14 +71,6 @@ void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl)
71 } 71 }
72} 72}
73 73
74/* defined in header release 48 and higher */
75#ifndef SEND_CTRL_UNSUPPORTED_VL_SHIFT
76#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
77#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xffull
78#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
79 << SEND_CTRL_UNSUPPORTED_VL_SHIFT)
80#endif
81
82/* global control of PIO send */ 74/* global control of PIO send */
83void pio_send_control(struct hfi1_devdata *dd, int op) 75void pio_send_control(struct hfi1_devdata *dd, int op)
84{ 76{
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 9b1e84a6b1cc..6f3bc4dab858 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -66,7 +66,7 @@ MODULE_PARM_DESC(qp_table_size, "QP table size");
66static void flush_tx_list(struct rvt_qp *qp); 66static void flush_tx_list(struct rvt_qp *qp);
67static int iowait_sleep( 67static int iowait_sleep(
68 struct sdma_engine *sde, 68 struct sdma_engine *sde,
69 struct iowait *wait, 69 struct iowait_work *wait,
70 struct sdma_txreq *stx, 70 struct sdma_txreq *stx,
71 unsigned int seq, 71 unsigned int seq,
72 bool pkts_sent); 72 bool pkts_sent);
@@ -134,15 +134,13 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
134 134
135}; 135};
136 136
137static void flush_tx_list(struct rvt_qp *qp) 137static void flush_list_head(struct list_head *l)
138{ 138{
139 struct hfi1_qp_priv *priv = qp->priv; 139 while (!list_empty(l)) {
140
141 while (!list_empty(&priv->s_iowait.tx_head)) {
142 struct sdma_txreq *tx; 140 struct sdma_txreq *tx;
143 141
144 tx = list_first_entry( 142 tx = list_first_entry(
145 &priv->s_iowait.tx_head, 143 l,
146 struct sdma_txreq, 144 struct sdma_txreq,
147 list); 145 list);
148 list_del_init(&tx->list); 146 list_del_init(&tx->list);
@@ -151,6 +149,14 @@ static void flush_tx_list(struct rvt_qp *qp)
151 } 149 }
152} 150}
153 151
152static void flush_tx_list(struct rvt_qp *qp)
153{
154 struct hfi1_qp_priv *priv = qp->priv;
155
156 flush_list_head(&iowait_get_ib_work(&priv->s_iowait)->tx_head);
157 flush_list_head(&iowait_get_tid_work(&priv->s_iowait)->tx_head);
158}
159
154static void flush_iowait(struct rvt_qp *qp) 160static void flush_iowait(struct rvt_qp *qp)
155{ 161{
156 struct hfi1_qp_priv *priv = qp->priv; 162 struct hfi1_qp_priv *priv = qp->priv;
@@ -282,33 +288,46 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
282} 288}
283 289
284/** 290/**
285 * hfi1_check_send_wqe - validate wqe 291 * hfi1_setup_wqe - set up the wqe
286 * @qp - The qp 292 * @qp - The qp
287 * @wqe - The built wqe 293 * @wqe - The built wqe
294 * @call_send - Determine if the send should be posted or scheduled.
288 * 295 *
289 * validate wqe. This is called 296 * Perform setup of the wqe. This is called
290 * prior to inserting the wqe into 297 * prior to inserting the wqe into the ring but after
291 * the ring but after the wqe has been 298 * the wqe has been setup by RDMAVT. This function
292 * setup. 299 * allows the driver the opportunity to perform
300 * validation and additional setup of the wqe.
293 * 301 *
294 * Returns 0 on success, -EINVAL on failure 302 * Returns 0 on success, -EINVAL on failure
295 * 303 *
296 */ 304 */
297int hfi1_check_send_wqe(struct rvt_qp *qp, 305int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
298 struct rvt_swqe *wqe)
299{ 306{
300 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 307 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
301 struct rvt_ah *ah; 308 struct rvt_ah *ah;
309 struct hfi1_pportdata *ppd;
310 struct hfi1_devdata *dd;
302 311
303 switch (qp->ibqp.qp_type) { 312 switch (qp->ibqp.qp_type) {
304 case IB_QPT_RC: 313 case IB_QPT_RC:
305 case IB_QPT_UC: 314 case IB_QPT_UC:
306 if (wqe->length > 0x80000000U) 315 if (wqe->length > 0x80000000U)
307 return -EINVAL; 316 return -EINVAL;
317 if (wqe->length > qp->pmtu)
318 *call_send = false;
308 break; 319 break;
309 case IB_QPT_SMI: 320 case IB_QPT_SMI:
310 ah = ibah_to_rvtah(wqe->ud_wr.ah); 321 /*
311 if (wqe->length > (1 << ah->log_pmtu)) 322 * SM packets should exclusively use VL15 and their SL is
323 * ignored (IBTA v1.3, Section 3.5.8.2). Therefore, when ah
324 * is created, SL is 0 in most cases and as a result some
325 * fields (vl and pmtu) in ah may not be set correctly,
326 * depending on the SL2SC and SC2VL tables at the time.
327 */
328 ppd = ppd_from_ibp(ibp);
329 dd = dd_from_ppd(ppd);
330 if (wqe->length > dd->vld[15].mtu)
312 return -EINVAL; 331 return -EINVAL;
313 break; 332 break;
314 case IB_QPT_GSI: 333 case IB_QPT_GSI:
@@ -321,7 +340,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
321 default: 340 default:
322 break; 341 break;
323 } 342 }
324 return wqe->length <= piothreshold; 343 return 0;
325} 344}
326 345
327/** 346/**
@@ -333,7 +352,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
333 * It is only used in the post send, which doesn't hold 352 * It is only used in the post send, which doesn't hold
334 * the s_lock. 353 * the s_lock.
335 */ 354 */
336void _hfi1_schedule_send(struct rvt_qp *qp) 355bool _hfi1_schedule_send(struct rvt_qp *qp)
337{ 356{
338 struct hfi1_qp_priv *priv = qp->priv; 357 struct hfi1_qp_priv *priv = qp->priv;
339 struct hfi1_ibport *ibp = 358 struct hfi1_ibport *ibp =
@@ -341,10 +360,10 @@ void _hfi1_schedule_send(struct rvt_qp *qp)
341 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 360 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
342 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); 361 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
343 362
344 iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, 363 return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
345 priv->s_sde ? 364 priv->s_sde ?
346 priv->s_sde->cpu : 365 priv->s_sde->cpu :
347 cpumask_first(cpumask_of_node(dd->node))); 366 cpumask_first(cpumask_of_node(dd->node)));
348} 367}
349 368
350static void qp_pio_drain(struct rvt_qp *qp) 369static void qp_pio_drain(struct rvt_qp *qp)
@@ -372,12 +391,32 @@ static void qp_pio_drain(struct rvt_qp *qp)
372 * 391 *
373 * This schedules qp progress and caller should hold 392 * This schedules qp progress and caller should hold
374 * the s_lock. 393 * the s_lock.
394 * @return true if the first leg is scheduled;
395 * false if the first leg is not scheduled.
375 */ 396 */
376void hfi1_schedule_send(struct rvt_qp *qp) 397bool hfi1_schedule_send(struct rvt_qp *qp)
377{ 398{
378 lockdep_assert_held(&qp->s_lock); 399 lockdep_assert_held(&qp->s_lock);
379 if (hfi1_send_ok(qp)) 400 if (hfi1_send_ok(qp)) {
380 _hfi1_schedule_send(qp); 401 _hfi1_schedule_send(qp);
402 return true;
403 }
404 if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
405 iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
406 IOWAIT_PENDING_IB);
407 return false;
408}
409
410static void hfi1_qp_schedule(struct rvt_qp *qp)
411{
412 struct hfi1_qp_priv *priv = qp->priv;
413 bool ret;
414
415 if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_IB)) {
416 ret = hfi1_schedule_send(qp);
417 if (ret)
418 iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
419 }
381} 420}
382 421
383void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag) 422void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
@@ -388,16 +427,22 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
388 if (qp->s_flags & flag) { 427 if (qp->s_flags & flag) {
389 qp->s_flags &= ~flag; 428 qp->s_flags &= ~flag;
390 trace_hfi1_qpwakeup(qp, flag); 429 trace_hfi1_qpwakeup(qp, flag);
391 hfi1_schedule_send(qp); 430 hfi1_qp_schedule(qp);
392 } 431 }
393 spin_unlock_irqrestore(&qp->s_lock, flags); 432 spin_unlock_irqrestore(&qp->s_lock, flags);
394 /* Notify hfi1_destroy_qp() if it is waiting. */ 433 /* Notify hfi1_destroy_qp() if it is waiting. */
395 rvt_put_qp(qp); 434 rvt_put_qp(qp);
396} 435}
397 436
437void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
438{
439 if (iowait_set_work_flag(wait) == IOWAIT_IB_SE)
440 qp->s_flags &= ~RVT_S_BUSY;
441}
442
398static int iowait_sleep( 443static int iowait_sleep(
399 struct sdma_engine *sde, 444 struct sdma_engine *sde,
400 struct iowait *wait, 445 struct iowait_work *wait,
401 struct sdma_txreq *stx, 446 struct sdma_txreq *stx,
402 uint seq, 447 uint seq,
403 bool pkts_sent) 448 bool pkts_sent)
@@ -438,7 +483,7 @@ static int iowait_sleep(
438 rvt_get_qp(qp); 483 rvt_get_qp(qp);
439 } 484 }
440 write_sequnlock(&dev->iowait_lock); 485 write_sequnlock(&dev->iowait_lock);
441 qp->s_flags &= ~RVT_S_BUSY; 486 hfi1_qp_unbusy(qp, wait);
442 spin_unlock_irqrestore(&qp->s_lock, flags); 487 spin_unlock_irqrestore(&qp->s_lock, flags);
443 ret = -EBUSY; 488 ret = -EBUSY;
444 } else { 489 } else {
@@ -637,6 +682,7 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
637 &priv->s_iowait, 682 &priv->s_iowait,
638 1, 683 1,
639 _hfi1_do_send, 684 _hfi1_do_send,
685 NULL,
640 iowait_sleep, 686 iowait_sleep,
641 iowait_wakeup, 687 iowait_wakeup,
642 iowait_sdma_drained); 688 iowait_sdma_drained);
@@ -686,7 +732,7 @@ void stop_send_queue(struct rvt_qp *qp)
686{ 732{
687 struct hfi1_qp_priv *priv = qp->priv; 733 struct hfi1_qp_priv *priv = qp->priv;
688 734
689 cancel_work_sync(&priv->s_iowait.iowork); 735 iowait_cancel_work(&priv->s_iowait);
690} 736}
691 737
692void quiesce_qp(struct rvt_qp *qp) 738void quiesce_qp(struct rvt_qp *qp)
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index 078cff7560b6..7adb6dff6813 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -58,18 +58,6 @@ extern unsigned int hfi1_qp_table_size;
58extern const struct rvt_operation_params hfi1_post_parms[]; 58extern const struct rvt_operation_params hfi1_post_parms[];
59 59
60/* 60/*
61 * Send if not busy or waiting for I/O and either
62 * a RC response is pending or we can process send work requests.
63 */
64static inline int hfi1_send_ok(struct rvt_qp *qp)
65{
66 return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
67 (verbs_txreq_queued(qp) ||
68 (qp->s_flags & RVT_S_RESP_PENDING) ||
69 !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
70}
71
72/*
73 * Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK 61 * Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK
74 * 62 *
75 * HFI1_S_AHG_VALID - ahg header valid on chip 63 * HFI1_S_AHG_VALID - ahg header valid on chip
@@ -90,6 +78,20 @@ static inline int hfi1_send_ok(struct rvt_qp *qp)
90#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND) 78#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
91 79
92/* 80/*
81 * Send if not busy or waiting for I/O and either
82 * a RC response is pending or we can process send work requests.
83 */
84static inline int hfi1_send_ok(struct rvt_qp *qp)
85{
86 struct hfi1_qp_priv *priv = qp->priv;
87
88 return !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)) &&
89 (verbs_txreq_queued(iowait_get_ib_work(&priv->s_iowait)) ||
90 (qp->s_flags & RVT_S_RESP_PENDING) ||
91 !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
92}
93
94/*
93 * free_ahg - clear ahg from QP 95 * free_ahg - clear ahg from QP
94 */ 96 */
95static inline void clear_ahg(struct rvt_qp *qp) 97static inline void clear_ahg(struct rvt_qp *qp)
@@ -129,8 +131,8 @@ struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5);
129 131
130void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter); 132void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter);
131 133
132void _hfi1_schedule_send(struct rvt_qp *qp); 134bool _hfi1_schedule_send(struct rvt_qp *qp);
133void hfi1_schedule_send(struct rvt_qp *qp); 135bool hfi1_schedule_send(struct rvt_qp *qp);
134 136
135void hfi1_migrate_qp(struct rvt_qp *qp); 137void hfi1_migrate_qp(struct rvt_qp *qp);
136 138
@@ -150,4 +152,5 @@ void quiesce_qp(struct rvt_qp *qp);
150u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu); 152u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
151int mtu_to_path_mtu(u32 mtu); 153int mtu_to_path_mtu(u32 mtu);
152void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl); 154void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl);
155void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait);
153#endif /* _QP_H */ 156#endif /* _QP_H */
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 9bd63abb2dfe..188aa4f686a0 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -309,7 +309,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
309 } 309 }
310 clear_ahg(qp); 310 clear_ahg(qp);
311 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 311 wqe = rvt_get_swqe_ptr(qp, qp->s_last);
312 hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ? 312 rvt_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
313 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); 313 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
314 /* will get called again */ 314 /* will get called again */
315 goto done_free_tx; 315 goto done_free_tx;
@@ -378,9 +378,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
378 wqe->wr.ex.invalidate_rkey); 378 wqe->wr.ex.invalidate_rkey);
379 local_ops = 1; 379 local_ops = 1;
380 } 380 }
381 hfi1_send_complete(qp, wqe, 381 rvt_send_complete(qp, wqe,
382 err ? IB_WC_LOC_PROT_ERR 382 err ? IB_WC_LOC_PROT_ERR
383 : IB_WC_SUCCESS); 383 : IB_WC_SUCCESS);
384 if (local_ops) 384 if (local_ops)
385 atomic_dec(&qp->local_ops_pending); 385 atomic_dec(&qp->local_ops_pending);
386 goto done_free_tx; 386 goto done_free_tx;
@@ -1043,7 +1043,7 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
1043 hfi1_migrate_qp(qp); 1043 hfi1_migrate_qp(qp);
1044 qp->s_retry = qp->s_retry_cnt; 1044 qp->s_retry = qp->s_retry_cnt;
1045 } else if (qp->s_last == qp->s_acked) { 1045 } else if (qp->s_last == qp->s_acked) {
1046 hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); 1046 rvt_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
1047 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); 1047 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1048 return; 1048 return;
1049 } else { /* need to handle delayed completion */ 1049 } else { /* need to handle delayed completion */
@@ -1468,7 +1468,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
1468 ibp->rvp.n_other_naks++; 1468 ibp->rvp.n_other_naks++;
1469class_b: 1469class_b:
1470 if (qp->s_last == qp->s_acked) { 1470 if (qp->s_last == qp->s_acked) {
1471 hfi1_send_complete(qp, wqe, status); 1471 rvt_send_complete(qp, wqe, status);
1472 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); 1472 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1473 } 1473 }
1474 break; 1474 break;
@@ -1644,7 +1644,8 @@ read_middle:
1644 qp->s_rdma_read_len -= pmtu; 1644 qp->s_rdma_read_len -= pmtu;
1645 update_last_psn(qp, psn); 1645 update_last_psn(qp, psn);
1646 spin_unlock_irqrestore(&qp->s_lock, flags); 1646 spin_unlock_irqrestore(&qp->s_lock, flags);
1647 hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, false, false); 1647 rvt_copy_sge(qp, &qp->s_rdma_read_sge,
1648 data, pmtu, false, false);
1648 goto bail; 1649 goto bail;
1649 1650
1650 case OP(RDMA_READ_RESPONSE_ONLY): 1651 case OP(RDMA_READ_RESPONSE_ONLY):
@@ -1684,7 +1685,8 @@ read_last:
1684 if (unlikely(tlen != qp->s_rdma_read_len)) 1685 if (unlikely(tlen != qp->s_rdma_read_len))
1685 goto ack_len_err; 1686 goto ack_len_err;
1686 aeth = be32_to_cpu(ohdr->u.aeth); 1687 aeth = be32_to_cpu(ohdr->u.aeth);
1687 hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, false, false); 1688 rvt_copy_sge(qp, &qp->s_rdma_read_sge,
1689 data, tlen, false, false);
1688 WARN_ON(qp->s_rdma_read_sge.num_sge); 1690 WARN_ON(qp->s_rdma_read_sge.num_sge);
1689 (void)do_rc_ack(qp, aeth, psn, 1691 (void)do_rc_ack(qp, aeth, psn,
1690 OP(RDMA_READ_RESPONSE_LAST), 0, rcd); 1692 OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
@@ -1704,7 +1706,7 @@ ack_len_err:
1704 status = IB_WC_LOC_LEN_ERR; 1706 status = IB_WC_LOC_LEN_ERR;
1705ack_err: 1707ack_err:
1706 if (qp->s_last == qp->s_acked) { 1708 if (qp->s_last == qp->s_acked) {
1707 hfi1_send_complete(qp, wqe, status); 1709 rvt_send_complete(qp, wqe, status);
1708 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); 1710 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1709 } 1711 }
1710ack_done: 1712ack_done:
@@ -2144,7 +2146,7 @@ send_middle:
2144 qp->r_rcv_len += pmtu; 2146 qp->r_rcv_len += pmtu;
2145 if (unlikely(qp->r_rcv_len > qp->r_len)) 2147 if (unlikely(qp->r_rcv_len > qp->r_len))
2146 goto nack_inv; 2148 goto nack_inv;
2147 hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false); 2149 rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
2148 break; 2150 break;
2149 2151
2150 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): 2152 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -2200,7 +2202,7 @@ send_last:
2200 wc.byte_len = tlen + qp->r_rcv_len; 2202 wc.byte_len = tlen + qp->r_rcv_len;
2201 if (unlikely(wc.byte_len > qp->r_len)) 2203 if (unlikely(wc.byte_len > qp->r_len))
2202 goto nack_inv; 2204 goto nack_inv;
2203 hfi1_copy_sge(&qp->r_sge, data, tlen, true, copy_last); 2205 rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, copy_last);
2204 rvt_put_ss(&qp->r_sge); 2206 rvt_put_ss(&qp->r_sge);
2205 qp->r_msn++; 2207 qp->r_msn++;
2206 if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) 2208 if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 5f56f3c1b4c4..7fb317c711df 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -156,333 +156,6 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet)
156} 156}
157 157
158/** 158/**
159 * ruc_loopback - handle UC and RC loopback requests
160 * @sqp: the sending QP
161 *
162 * This is called from hfi1_do_send() to
163 * forward a WQE addressed to the same HFI.
164 * Note that although we are single threaded due to the send engine, we still
165 * have to protect against post_send(). We don't have to worry about
166 * receive interrupts since this is a connected protocol and all packets
167 * will pass through here.
168 */
169static void ruc_loopback(struct rvt_qp *sqp)
170{
171 struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
172 struct rvt_qp *qp;
173 struct rvt_swqe *wqe;
174 struct rvt_sge *sge;
175 unsigned long flags;
176 struct ib_wc wc;
177 u64 sdata;
178 atomic64_t *maddr;
179 enum ib_wc_status send_status;
180 bool release;
181 int ret;
182 bool copy_last = false;
183 int local_ops = 0;
184
185 rcu_read_lock();
186
187 /*
188 * Note that we check the responder QP state after
189 * checking the requester's state.
190 */
191 qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
192 sqp->remote_qpn);
193
194 spin_lock_irqsave(&sqp->s_lock, flags);
195
196 /* Return if we are already busy processing a work request. */
197 if ((sqp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT)) ||
198 !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
199 goto unlock;
200
201 sqp->s_flags |= RVT_S_BUSY;
202
203again:
204 if (sqp->s_last == READ_ONCE(sqp->s_head))
205 goto clr_busy;
206 wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
207
208 /* Return if it is not OK to start a new work request. */
209 if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
210 if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
211 goto clr_busy;
212 /* We are in the error state, flush the work request. */
213 send_status = IB_WC_WR_FLUSH_ERR;
214 goto flush_send;
215 }
216
217 /*
218 * We can rely on the entry not changing without the s_lock
219 * being held until we update s_last.
220 * We increment s_cur to indicate s_last is in progress.
221 */
222 if (sqp->s_last == sqp->s_cur) {
223 if (++sqp->s_cur >= sqp->s_size)
224 sqp->s_cur = 0;
225 }
226 spin_unlock_irqrestore(&sqp->s_lock, flags);
227
228 if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
229 qp->ibqp.qp_type != sqp->ibqp.qp_type) {
230 ibp->rvp.n_pkt_drops++;
231 /*
232 * For RC, the requester would timeout and retry so
233 * shortcut the timeouts and just signal too many retries.
234 */
235 if (sqp->ibqp.qp_type == IB_QPT_RC)
236 send_status = IB_WC_RETRY_EXC_ERR;
237 else
238 send_status = IB_WC_SUCCESS;
239 goto serr;
240 }
241
242 memset(&wc, 0, sizeof(wc));
243 send_status = IB_WC_SUCCESS;
244
245 release = true;
246 sqp->s_sge.sge = wqe->sg_list[0];
247 sqp->s_sge.sg_list = wqe->sg_list + 1;
248 sqp->s_sge.num_sge = wqe->wr.num_sge;
249 sqp->s_len = wqe->length;
250 switch (wqe->wr.opcode) {
251 case IB_WR_REG_MR:
252 goto send_comp;
253
254 case IB_WR_LOCAL_INV:
255 if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
256 if (rvt_invalidate_rkey(sqp,
257 wqe->wr.ex.invalidate_rkey))
258 send_status = IB_WC_LOC_PROT_ERR;
259 local_ops = 1;
260 }
261 goto send_comp;
262
263 case IB_WR_SEND_WITH_INV:
264 if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
265 wc.wc_flags = IB_WC_WITH_INVALIDATE;
266 wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
267 }
268 goto send;
269
270 case IB_WR_SEND_WITH_IMM:
271 wc.wc_flags = IB_WC_WITH_IMM;
272 wc.ex.imm_data = wqe->wr.ex.imm_data;
273 /* FALLTHROUGH */
274 case IB_WR_SEND:
275send:
276 ret = rvt_get_rwqe(qp, false);
277 if (ret < 0)
278 goto op_err;
279 if (!ret)
280 goto rnr_nak;
281 break;
282
283 case IB_WR_RDMA_WRITE_WITH_IMM:
284 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
285 goto inv_err;
286 wc.wc_flags = IB_WC_WITH_IMM;
287 wc.ex.imm_data = wqe->wr.ex.imm_data;
288 ret = rvt_get_rwqe(qp, true);
289 if (ret < 0)
290 goto op_err;
291 if (!ret)
292 goto rnr_nak;
293 /* skip copy_last set and qp_access_flags recheck */
294 goto do_write;
295 case IB_WR_RDMA_WRITE:
296 copy_last = rvt_is_user_qp(qp);
297 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
298 goto inv_err;
299do_write:
300 if (wqe->length == 0)
301 break;
302 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
303 wqe->rdma_wr.remote_addr,
304 wqe->rdma_wr.rkey,
305 IB_ACCESS_REMOTE_WRITE)))
306 goto acc_err;
307 qp->r_sge.sg_list = NULL;
308 qp->r_sge.num_sge = 1;
309 qp->r_sge.total_len = wqe->length;
310 break;
311
312 case IB_WR_RDMA_READ:
313 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
314 goto inv_err;
315 if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
316 wqe->rdma_wr.remote_addr,
317 wqe->rdma_wr.rkey,
318 IB_ACCESS_REMOTE_READ)))
319 goto acc_err;
320 release = false;
321 sqp->s_sge.sg_list = NULL;
322 sqp->s_sge.num_sge = 1;
323 qp->r_sge.sge = wqe->sg_list[0];
324 qp->r_sge.sg_list = wqe->sg_list + 1;
325 qp->r_sge.num_sge = wqe->wr.num_sge;
326 qp->r_sge.total_len = wqe->length;
327 break;
328
329 case IB_WR_ATOMIC_CMP_AND_SWP:
330 case IB_WR_ATOMIC_FETCH_AND_ADD:
331 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
332 goto inv_err;
333 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
334 wqe->atomic_wr.remote_addr,
335 wqe->atomic_wr.rkey,
336 IB_ACCESS_REMOTE_ATOMIC)))
337 goto acc_err;
338 /* Perform atomic OP and save result. */
339 maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
340 sdata = wqe->atomic_wr.compare_add;
341 *(u64 *)sqp->s_sge.sge.vaddr =
342 (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
343 (u64)atomic64_add_return(sdata, maddr) - sdata :
344 (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
345 sdata, wqe->atomic_wr.swap);
346 rvt_put_mr(qp->r_sge.sge.mr);
347 qp->r_sge.num_sge = 0;
348 goto send_comp;
349
350 default:
351 send_status = IB_WC_LOC_QP_OP_ERR;
352 goto serr;
353 }
354
355 sge = &sqp->s_sge.sge;
356 while (sqp->s_len) {
357 u32 len = sqp->s_len;
358
359 if (len > sge->length)
360 len = sge->length;
361 if (len > sge->sge_length)
362 len = sge->sge_length;
363 WARN_ON_ONCE(len == 0);
364 hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last);
365 sge->vaddr += len;
366 sge->length -= len;
367 sge->sge_length -= len;
368 if (sge->sge_length == 0) {
369 if (!release)
370 rvt_put_mr(sge->mr);
371 if (--sqp->s_sge.num_sge)
372 *sge = *sqp->s_sge.sg_list++;
373 } else if (sge->length == 0 && sge->mr->lkey) {
374 if (++sge->n >= RVT_SEGSZ) {
375 if (++sge->m >= sge->mr->mapsz)
376 break;
377 sge->n = 0;
378 }
379 sge->vaddr =
380 sge->mr->map[sge->m]->segs[sge->n].vaddr;
381 sge->length =
382 sge->mr->map[sge->m]->segs[sge->n].length;
383 }
384 sqp->s_len -= len;
385 }
386 if (release)
387 rvt_put_ss(&qp->r_sge);
388
389 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
390 goto send_comp;
391
392 if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
393 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
394 else
395 wc.opcode = IB_WC_RECV;
396 wc.wr_id = qp->r_wr_id;
397 wc.status = IB_WC_SUCCESS;
398 wc.byte_len = wqe->length;
399 wc.qp = &qp->ibqp;
400 wc.src_qp = qp->remote_qpn;
401 wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
402 wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
403 wc.port_num = 1;
404 /* Signal completion event if the solicited bit is set. */
405 rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
406 wqe->wr.send_flags & IB_SEND_SOLICITED);
407
408send_comp:
409 spin_lock_irqsave(&sqp->s_lock, flags);
410 ibp->rvp.n_loop_pkts++;
411flush_send:
412 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
413 hfi1_send_complete(sqp, wqe, send_status);
414 if (local_ops) {
415 atomic_dec(&sqp->local_ops_pending);
416 local_ops = 0;
417 }
418 goto again;
419
420rnr_nak:
421 /* Handle RNR NAK */
422 if (qp->ibqp.qp_type == IB_QPT_UC)
423 goto send_comp;
424 ibp->rvp.n_rnr_naks++;
425 /*
426 * Note: we don't need the s_lock held since the BUSY flag
427 * makes this single threaded.
428 */
429 if (sqp->s_rnr_retry == 0) {
430 send_status = IB_WC_RNR_RETRY_EXC_ERR;
431 goto serr;
432 }
433 if (sqp->s_rnr_retry_cnt < 7)
434 sqp->s_rnr_retry--;
435 spin_lock_irqsave(&sqp->s_lock, flags);
436 if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
437 goto clr_busy;
438 rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
439 IB_AETH_CREDIT_SHIFT);
440 goto clr_busy;
441
442op_err:
443 send_status = IB_WC_REM_OP_ERR;
444 wc.status = IB_WC_LOC_QP_OP_ERR;
445 goto err;
446
447inv_err:
448 send_status = IB_WC_REM_INV_REQ_ERR;
449 wc.status = IB_WC_LOC_QP_OP_ERR;
450 goto err;
451
452acc_err:
453 send_status = IB_WC_REM_ACCESS_ERR;
454 wc.status = IB_WC_LOC_PROT_ERR;
455err:
456 /* responder goes to error state */
457 rvt_rc_error(qp, wc.status);
458
459serr:
460 spin_lock_irqsave(&sqp->s_lock, flags);
461 hfi1_send_complete(sqp, wqe, send_status);
462 if (sqp->ibqp.qp_type == IB_QPT_RC) {
463 int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
464
465 sqp->s_flags &= ~RVT_S_BUSY;
466 spin_unlock_irqrestore(&sqp->s_lock, flags);
467 if (lastwqe) {
468 struct ib_event ev;
469
470 ev.device = sqp->ibqp.device;
471 ev.element.qp = &sqp->ibqp;
472 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
473 sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
474 }
475 goto done;
476 }
477clr_busy:
478 sqp->s_flags &= ~RVT_S_BUSY;
479unlock:
480 spin_unlock_irqrestore(&sqp->s_lock, flags);
481done:
482 rcu_read_unlock();
483}
484
485/**
486 * hfi1_make_grh - construct a GRH header 159 * hfi1_make_grh - construct a GRH header
487 * @ibp: a pointer to the IB port 160 * @ibp: a pointer to the IB port
488 * @hdr: a pointer to the GRH header being constructed 161 * @hdr: a pointer to the GRH header being constructed
@@ -825,8 +498,8 @@ void hfi1_do_send_from_rvt(struct rvt_qp *qp)
825 498
826void _hfi1_do_send(struct work_struct *work) 499void _hfi1_do_send(struct work_struct *work)
827{ 500{
828 struct iowait *wait = container_of(work, struct iowait, iowork); 501 struct iowait_work *w = container_of(work, struct iowait_work, iowork);
829 struct rvt_qp *qp = iowait_to_qp(wait); 502 struct rvt_qp *qp = iowait_to_qp(w->iow);
830 503
831 hfi1_do_send(qp, true); 504 hfi1_do_send(qp, true);
832} 505}
@@ -850,6 +523,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
850 ps.ibp = to_iport(qp->ibqp.device, qp->port_num); 523 ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
851 ps.ppd = ppd_from_ibp(ps.ibp); 524 ps.ppd = ppd_from_ibp(ps.ibp);
852 ps.in_thread = in_thread; 525 ps.in_thread = in_thread;
526 ps.wait = iowait_get_ib_work(&priv->s_iowait);
853 527
854 trace_hfi1_rc_do_send(qp, in_thread); 528 trace_hfi1_rc_do_send(qp, in_thread);
855 529
@@ -858,7 +532,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
858 if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) & 532 if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
859 ~((1 << ps.ppd->lmc) - 1)) == 533 ~((1 << ps.ppd->lmc) - 1)) ==
860 ps.ppd->lid)) { 534 ps.ppd->lid)) {
861 ruc_loopback(qp); 535 rvt_ruc_loopback(qp);
862 return; 536 return;
863 } 537 }
864 make_req = hfi1_make_rc_req; 538 make_req = hfi1_make_rc_req;
@@ -868,7 +542,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
868 if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) & 542 if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
869 ~((1 << ps.ppd->lmc) - 1)) == 543 ~((1 << ps.ppd->lmc) - 1)) ==
870 ps.ppd->lid)) { 544 ps.ppd->lid)) {
871 ruc_loopback(qp); 545 rvt_ruc_loopback(qp);
872 return; 546 return;
873 } 547 }
874 make_req = hfi1_make_uc_req; 548 make_req = hfi1_make_uc_req;
@@ -883,6 +557,8 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
883 557
884 /* Return if we are already busy processing a work request. */ 558 /* Return if we are already busy processing a work request. */
885 if (!hfi1_send_ok(qp)) { 559 if (!hfi1_send_ok(qp)) {
560 if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
561 iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
886 spin_unlock_irqrestore(&qp->s_lock, ps.flags); 562 spin_unlock_irqrestore(&qp->s_lock, ps.flags);
887 return; 563 return;
888 } 564 }
@@ -896,7 +572,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
896 ps.pkts_sent = false; 572 ps.pkts_sent = false;
897 573
898 /* insure a pre-built packet is handled */ 574 /* insure a pre-built packet is handled */
899 ps.s_txreq = get_waiting_verbs_txreq(qp); 575 ps.s_txreq = get_waiting_verbs_txreq(ps.wait);
900 do { 576 do {
901 /* Check for a constructed packet to be sent. */ 577 /* Check for a constructed packet to be sent. */
902 if (ps.s_txreq) { 578 if (ps.s_txreq) {
@@ -907,6 +583,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
907 */ 583 */
908 if (hfi1_verbs_send(qp, &ps)) 584 if (hfi1_verbs_send(qp, &ps))
909 return; 585 return;
586
910 /* allow other tasks to run */ 587 /* allow other tasks to run */
911 if (schedule_send_yield(qp, &ps)) 588 if (schedule_send_yield(qp, &ps))
912 return; 589 return;
@@ -917,44 +594,3 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
917 iowait_starve_clear(ps.pkts_sent, &priv->s_iowait); 594 iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
918 spin_unlock_irqrestore(&qp->s_lock, ps.flags); 595 spin_unlock_irqrestore(&qp->s_lock, ps.flags);
919} 596}
920
921/*
922 * This should be called with s_lock held.
923 */
924void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
925 enum ib_wc_status status)
926{
927 u32 old_last, last;
928
929 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
930 return;
931
932 last = qp->s_last;
933 old_last = last;
934 trace_hfi1_qp_send_completion(qp, wqe, last);
935 if (++last >= qp->s_size)
936 last = 0;
937 trace_hfi1_qp_send_completion(qp, wqe, last);
938 qp->s_last = last;
939 /* See post_send() */
940 barrier();
941 rvt_put_swqe(wqe);
942 if (qp->ibqp.qp_type == IB_QPT_UD ||
943 qp->ibqp.qp_type == IB_QPT_SMI ||
944 qp->ibqp.qp_type == IB_QPT_GSI)
945 atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
946
947 rvt_qp_swqe_complete(qp,
948 wqe,
949 ib_hfi1_wc_opcode[wqe->wr.opcode],
950 status);
951
952 if (qp->s_acked == old_last)
953 qp->s_acked = last;
954 if (qp->s_cur == old_last)
955 qp->s_cur = last;
956 if (qp->s_tail == old_last)
957 qp->s_tail = last;
958 if (qp->state == IB_QPS_SQD && last == qp->s_cur)
959 qp->s_draining = 0;
960}
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 88e326d6cc49..891d2386d1ca 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -378,7 +378,7 @@ static inline void complete_tx(struct sdma_engine *sde,
378 __sdma_txclean(sde->dd, tx); 378 __sdma_txclean(sde->dd, tx);
379 if (complete) 379 if (complete)
380 (*complete)(tx, res); 380 (*complete)(tx, res);
381 if (wait && iowait_sdma_dec(wait)) 381 if (iowait_sdma_dec(wait))
382 iowait_drain_wakeup(wait); 382 iowait_drain_wakeup(wait);
383} 383}
384 384
@@ -1758,7 +1758,6 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
1758 struct iowait *wait, *nw; 1758 struct iowait *wait, *nw;
1759 struct iowait *waits[SDMA_WAIT_BATCH_SIZE]; 1759 struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
1760 uint i, n = 0, seq, max_idx = 0; 1760 uint i, n = 0, seq, max_idx = 0;
1761 struct sdma_txreq *stx;
1762 struct hfi1_ibdev *dev = &sde->dd->verbs_dev; 1761 struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
1763 u8 max_starved_cnt = 0; 1762 u8 max_starved_cnt = 0;
1764 1763
@@ -1779,19 +1778,13 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
1779 nw, 1778 nw,
1780 &sde->dmawait, 1779 &sde->dmawait,
1781 list) { 1780 list) {
1782 u16 num_desc = 0; 1781 u32 num_desc;
1783 1782
1784 if (!wait->wakeup) 1783 if (!wait->wakeup)
1785 continue; 1784 continue;
1786 if (n == ARRAY_SIZE(waits)) 1785 if (n == ARRAY_SIZE(waits))
1787 break; 1786 break;
1788 if (!list_empty(&wait->tx_head)) { 1787 num_desc = iowait_get_all_desc(wait);
1789 stx = list_first_entry(
1790 &wait->tx_head,
1791 struct sdma_txreq,
1792 list);
1793 num_desc = stx->num_desc;
1794 }
1795 if (num_desc > avail) 1788 if (num_desc > avail)
1796 break; 1789 break;
1797 avail -= num_desc; 1790 avail -= num_desc;
@@ -2346,7 +2339,7 @@ static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
2346 */ 2339 */
2347static int sdma_check_progress( 2340static int sdma_check_progress(
2348 struct sdma_engine *sde, 2341 struct sdma_engine *sde,
2349 struct iowait *wait, 2342 struct iowait_work *wait,
2350 struct sdma_txreq *tx, 2343 struct sdma_txreq *tx,
2351 bool pkts_sent) 2344 bool pkts_sent)
2352{ 2345{
@@ -2356,12 +2349,12 @@ static int sdma_check_progress(
2356 if (tx->num_desc <= sde->desc_avail) 2349 if (tx->num_desc <= sde->desc_avail)
2357 return -EAGAIN; 2350 return -EAGAIN;
2358 /* pulse the head_lock */ 2351 /* pulse the head_lock */
2359 if (wait && wait->sleep) { 2352 if (wait && iowait_ioww_to_iow(wait)->sleep) {
2360 unsigned seq; 2353 unsigned seq;
2361 2354
2362 seq = raw_seqcount_begin( 2355 seq = raw_seqcount_begin(
2363 (const seqcount_t *)&sde->head_lock.seqcount); 2356 (const seqcount_t *)&sde->head_lock.seqcount);
2364 ret = wait->sleep(sde, wait, tx, seq, pkts_sent); 2357 ret = wait->iow->sleep(sde, wait, tx, seq, pkts_sent);
2365 if (ret == -EAGAIN) 2358 if (ret == -EAGAIN)
2366 sde->desc_avail = sdma_descq_freecnt(sde); 2359 sde->desc_avail = sdma_descq_freecnt(sde);
2367 } else { 2360 } else {
@@ -2373,7 +2366,7 @@ static int sdma_check_progress(
2373/** 2366/**
2374 * sdma_send_txreq() - submit a tx req to ring 2367 * sdma_send_txreq() - submit a tx req to ring
2375 * @sde: sdma engine to use 2368 * @sde: sdma engine to use
2376 * @wait: wait structure to use when full (may be NULL) 2369 * @wait: SE wait structure to use when full (may be NULL)
2377 * @tx: sdma_txreq to submit 2370 * @tx: sdma_txreq to submit
2378 * @pkts_sent: has any packet been sent yet? 2371 * @pkts_sent: has any packet been sent yet?
2379 * 2372 *
@@ -2386,7 +2379,7 @@ static int sdma_check_progress(
2386 * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state 2379 * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
2387 */ 2380 */
2388int sdma_send_txreq(struct sdma_engine *sde, 2381int sdma_send_txreq(struct sdma_engine *sde,
2389 struct iowait *wait, 2382 struct iowait_work *wait,
2390 struct sdma_txreq *tx, 2383 struct sdma_txreq *tx,
2391 bool pkts_sent) 2384 bool pkts_sent)
2392{ 2385{
@@ -2397,7 +2390,7 @@ int sdma_send_txreq(struct sdma_engine *sde,
2397 /* user should have supplied entire packet */ 2390 /* user should have supplied entire packet */
2398 if (unlikely(tx->tlen)) 2391 if (unlikely(tx->tlen))
2399 return -EINVAL; 2392 return -EINVAL;
2400 tx->wait = wait; 2393 tx->wait = iowait_ioww_to_iow(wait);
2401 spin_lock_irqsave(&sde->tail_lock, flags); 2394 spin_lock_irqsave(&sde->tail_lock, flags);
2402retry: 2395retry:
2403 if (unlikely(!__sdma_running(sde))) 2396 if (unlikely(!__sdma_running(sde)))
@@ -2406,14 +2399,14 @@ retry:
2406 goto nodesc; 2399 goto nodesc;
2407 tail = submit_tx(sde, tx); 2400 tail = submit_tx(sde, tx);
2408 if (wait) 2401 if (wait)
2409 iowait_sdma_inc(wait); 2402 iowait_sdma_inc(iowait_ioww_to_iow(wait));
2410 sdma_update_tail(sde, tail); 2403 sdma_update_tail(sde, tail);
2411unlock: 2404unlock:
2412 spin_unlock_irqrestore(&sde->tail_lock, flags); 2405 spin_unlock_irqrestore(&sde->tail_lock, flags);
2413 return ret; 2406 return ret;
2414unlock_noconn: 2407unlock_noconn:
2415 if (wait) 2408 if (wait)
2416 iowait_sdma_inc(wait); 2409 iowait_sdma_inc(iowait_ioww_to_iow(wait));
2417 tx->next_descq_idx = 0; 2410 tx->next_descq_idx = 0;
2418#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER 2411#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2419 tx->sn = sde->tail_sn++; 2412 tx->sn = sde->tail_sn++;
@@ -2422,10 +2415,7 @@ unlock_noconn:
2422 spin_lock(&sde->flushlist_lock); 2415 spin_lock(&sde->flushlist_lock);
2423 list_add_tail(&tx->list, &sde->flushlist); 2416 list_add_tail(&tx->list, &sde->flushlist);
2424 spin_unlock(&sde->flushlist_lock); 2417 spin_unlock(&sde->flushlist_lock);
2425 if (wait) { 2418 iowait_inc_wait_count(wait, tx->num_desc);
2426 wait->tx_count++;
2427 wait->count += tx->num_desc;
2428 }
2429 schedule_work(&sde->flush_worker); 2419 schedule_work(&sde->flush_worker);
2430 ret = -ECOMM; 2420 ret = -ECOMM;
2431 goto unlock; 2421 goto unlock;
@@ -2442,9 +2432,9 @@ nodesc:
2442/** 2432/**
2443 * sdma_send_txlist() - submit a list of tx req to ring 2433 * sdma_send_txlist() - submit a list of tx req to ring
2444 * @sde: sdma engine to use 2434 * @sde: sdma engine to use
2445 * @wait: wait structure to use when full (may be NULL) 2435 * @wait: SE wait structure to use when full (may be NULL)
2446 * @tx_list: list of sdma_txreqs to submit 2436 * @tx_list: list of sdma_txreqs to submit
2447 * @count: pointer to a u32 which, after return will contain the total number of 2437 * @count: pointer to a u16 which, after return will contain the total number of
2448 * sdma_txreqs removed from the tx_list. This will include sdma_txreqs 2438 * sdma_txreqs removed from the tx_list. This will include sdma_txreqs
2449 * whose SDMA descriptors are submitted to the ring and the sdma_txreqs 2439 * whose SDMA descriptors are submitted to the ring and the sdma_txreqs
2450 * which are added to SDMA engine flush list if the SDMA engine state is 2440 * which are added to SDMA engine flush list if the SDMA engine state is
@@ -2467,8 +2457,8 @@ nodesc:
2467 * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL) 2457 * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
2468 * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state 2458 * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
2469 */ 2459 */
2470int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait, 2460int sdma_send_txlist(struct sdma_engine *sde, struct iowait_work *wait,
2471 struct list_head *tx_list, u32 *count_out) 2461 struct list_head *tx_list, u16 *count_out)
2472{ 2462{
2473 struct sdma_txreq *tx, *tx_next; 2463 struct sdma_txreq *tx, *tx_next;
2474 int ret = 0; 2464 int ret = 0;
@@ -2479,7 +2469,7 @@ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
2479 spin_lock_irqsave(&sde->tail_lock, flags); 2469 spin_lock_irqsave(&sde->tail_lock, flags);
2480retry: 2470retry:
2481 list_for_each_entry_safe(tx, tx_next, tx_list, list) { 2471 list_for_each_entry_safe(tx, tx_next, tx_list, list) {
2482 tx->wait = wait; 2472 tx->wait = iowait_ioww_to_iow(wait);
2483 if (unlikely(!__sdma_running(sde))) 2473 if (unlikely(!__sdma_running(sde)))
2484 goto unlock_noconn; 2474 goto unlock_noconn;
2485 if (unlikely(tx->num_desc > sde->desc_avail)) 2475 if (unlikely(tx->num_desc > sde->desc_avail))
@@ -2500,8 +2490,9 @@ retry:
2500update_tail: 2490update_tail:
2501 total_count = submit_count + flush_count; 2491 total_count = submit_count + flush_count;
2502 if (wait) { 2492 if (wait) {
2503 iowait_sdma_add(wait, total_count); 2493 iowait_sdma_add(iowait_ioww_to_iow(wait), total_count);
2504 iowait_starve_clear(submit_count > 0, wait); 2494 iowait_starve_clear(submit_count > 0,
2495 iowait_ioww_to_iow(wait));
2505 } 2496 }
2506 if (tail != INVALID_TAIL) 2497 if (tail != INVALID_TAIL)
2507 sdma_update_tail(sde, tail); 2498 sdma_update_tail(sde, tail);
@@ -2511,7 +2502,7 @@ update_tail:
2511unlock_noconn: 2502unlock_noconn:
2512 spin_lock(&sde->flushlist_lock); 2503 spin_lock(&sde->flushlist_lock);
2513 list_for_each_entry_safe(tx, tx_next, tx_list, list) { 2504 list_for_each_entry_safe(tx, tx_next, tx_list, list) {
2514 tx->wait = wait; 2505 tx->wait = iowait_ioww_to_iow(wait);
2515 list_del_init(&tx->list); 2506 list_del_init(&tx->list);
2516 tx->next_descq_idx = 0; 2507 tx->next_descq_idx = 0;
2517#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER 2508#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
@@ -2520,10 +2511,7 @@ unlock_noconn:
2520#endif 2511#endif
2521 list_add_tail(&tx->list, &sde->flushlist); 2512 list_add_tail(&tx->list, &sde->flushlist);
2522 flush_count++; 2513 flush_count++;
2523 if (wait) { 2514 iowait_inc_wait_count(wait, tx->num_desc);
2524 wait->tx_count++;
2525 wait->count += tx->num_desc;
2526 }
2527 } 2515 }
2528 spin_unlock(&sde->flushlist_lock); 2516 spin_unlock(&sde->flushlist_lock);
2529 schedule_work(&sde->flush_worker); 2517 schedule_work(&sde->flush_worker);
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 46c775f255d1..6dc63d7c5685 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -1,7 +1,7 @@
1#ifndef _HFI1_SDMA_H 1#ifndef _HFI1_SDMA_H
2#define _HFI1_SDMA_H 2#define _HFI1_SDMA_H
3/* 3/*
4 * Copyright(c) 2015, 2016 Intel Corporation. 4 * Copyright(c) 2015 - 2018 Intel Corporation.
5 * 5 *
6 * This file is provided under a dual BSD/GPLv2 license. When using or 6 * This file is provided under a dual BSD/GPLv2 license. When using or
7 * redistributing this file, you may do so under either license. 7 * redistributing this file, you may do so under either license.
@@ -62,16 +62,6 @@
62/* Hardware limit for SDMA packet size */ 62/* Hardware limit for SDMA packet size */
63#define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1) 63#define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1)
64 64
65#define SDMA_TXREQ_S_OK 0
66#define SDMA_TXREQ_S_SENDERROR 1
67#define SDMA_TXREQ_S_ABORTED 2
68#define SDMA_TXREQ_S_SHUTDOWN 3
69
70/* flags bits */
71#define SDMA_TXREQ_F_URGENT 0x0001
72#define SDMA_TXREQ_F_AHG_COPY 0x0002
73#define SDMA_TXREQ_F_USE_AHG 0x0004
74
75#define SDMA_MAP_NONE 0 65#define SDMA_MAP_NONE 0
76#define SDMA_MAP_SINGLE 1 66#define SDMA_MAP_SINGLE 1
77#define SDMA_MAP_PAGE 2 67#define SDMA_MAP_PAGE 2
@@ -415,6 +405,7 @@ struct sdma_engine {
415 struct list_head flushlist; 405 struct list_head flushlist;
416 struct cpumask cpu_mask; 406 struct cpumask cpu_mask;
417 struct kobject kobj; 407 struct kobject kobj;
408 u32 msix_intr;
418}; 409};
419 410
420int sdma_init(struct hfi1_devdata *dd, u8 port); 411int sdma_init(struct hfi1_devdata *dd, u8 port);
@@ -849,16 +840,16 @@ static inline int sdma_txadd_kvaddr(
849 dd, SDMA_MAP_SINGLE, tx, addr, len); 840 dd, SDMA_MAP_SINGLE, tx, addr, len);
850} 841}
851 842
852struct iowait; 843struct iowait_work;
853 844
854int sdma_send_txreq(struct sdma_engine *sde, 845int sdma_send_txreq(struct sdma_engine *sde,
855 struct iowait *wait, 846 struct iowait_work *wait,
856 struct sdma_txreq *tx, 847 struct sdma_txreq *tx,
857 bool pkts_sent); 848 bool pkts_sent);
858int sdma_send_txlist(struct sdma_engine *sde, 849int sdma_send_txlist(struct sdma_engine *sde,
859 struct iowait *wait, 850 struct iowait_work *wait,
860 struct list_head *tx_list, 851 struct list_head *tx_list,
861 u32 *count); 852 u16 *count_out);
862 853
863int sdma_ahg_alloc(struct sdma_engine *sde); 854int sdma_ahg_alloc(struct sdma_engine *sde);
864void sdma_ahg_free(struct sdma_engine *sde, int ahg_index); 855void sdma_ahg_free(struct sdma_engine *sde, int ahg_index);
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 25e867393463..2be513d4c9da 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -494,17 +494,18 @@ static struct kobj_type hfi1_vl2mtu_ktype = {
494 * Start of per-unit (or driver, in some cases, but replicated 494 * Start of per-unit (or driver, in some cases, but replicated
495 * per unit) functions (these get a device *) 495 * per unit) functions (these get a device *)
496 */ 496 */
497static ssize_t show_rev(struct device *device, struct device_attribute *attr, 497static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
498 char *buf) 498 char *buf)
499{ 499{
500 struct hfi1_ibdev *dev = 500 struct hfi1_ibdev *dev =
501 container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); 501 container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
502 502
503 return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); 503 return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
504} 504}
505static DEVICE_ATTR_RO(hw_rev);
505 506
506static ssize_t show_hfi(struct device *device, struct device_attribute *attr, 507static ssize_t board_id_show(struct device *device,
507 char *buf) 508 struct device_attribute *attr, char *buf)
508{ 509{
509 struct hfi1_ibdev *dev = 510 struct hfi1_ibdev *dev =
510 container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); 511 container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
@@ -517,8 +518,9 @@ static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
517 ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); 518 ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
518 return ret; 519 return ret;
519} 520}
521static DEVICE_ATTR_RO(board_id);
520 522
521static ssize_t show_boardversion(struct device *device, 523static ssize_t boardversion_show(struct device *device,
522 struct device_attribute *attr, char *buf) 524 struct device_attribute *attr, char *buf)
523{ 525{
524 struct hfi1_ibdev *dev = 526 struct hfi1_ibdev *dev =
@@ -528,8 +530,9 @@ static ssize_t show_boardversion(struct device *device,
528 /* The string printed here is already newline-terminated. */ 530 /* The string printed here is already newline-terminated. */
529 return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); 531 return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
530} 532}
533static DEVICE_ATTR_RO(boardversion);
531 534
532static ssize_t show_nctxts(struct device *device, 535static ssize_t nctxts_show(struct device *device,
533 struct device_attribute *attr, char *buf) 536 struct device_attribute *attr, char *buf)
534{ 537{
535 struct hfi1_ibdev *dev = 538 struct hfi1_ibdev *dev =
@@ -546,8 +549,9 @@ static ssize_t show_nctxts(struct device *device,
546 min(dd->num_user_contexts, 549 min(dd->num_user_contexts,
547 (u32)dd->sc_sizes[SC_USER].count)); 550 (u32)dd->sc_sizes[SC_USER].count));
548} 551}
552static DEVICE_ATTR_RO(nctxts);
549 553
550static ssize_t show_nfreectxts(struct device *device, 554static ssize_t nfreectxts_show(struct device *device,
551 struct device_attribute *attr, char *buf) 555 struct device_attribute *attr, char *buf)
552{ 556{
553 struct hfi1_ibdev *dev = 557 struct hfi1_ibdev *dev =
@@ -557,8 +561,9 @@ static ssize_t show_nfreectxts(struct device *device,
557 /* Return the number of free user ports (contexts) available. */ 561 /* Return the number of free user ports (contexts) available. */
558 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); 562 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
559} 563}
564static DEVICE_ATTR_RO(nfreectxts);
560 565
561static ssize_t show_serial(struct device *device, 566static ssize_t serial_show(struct device *device,
562 struct device_attribute *attr, char *buf) 567 struct device_attribute *attr, char *buf)
563{ 568{
564 struct hfi1_ibdev *dev = 569 struct hfi1_ibdev *dev =
@@ -567,8 +572,9 @@ static ssize_t show_serial(struct device *device,
567 572
568 return scnprintf(buf, PAGE_SIZE, "%s", dd->serial); 573 return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
569} 574}
575static DEVICE_ATTR_RO(serial);
570 576
571static ssize_t store_chip_reset(struct device *device, 577static ssize_t chip_reset_store(struct device *device,
572 struct device_attribute *attr, const char *buf, 578 struct device_attribute *attr, const char *buf,
573 size_t count) 579 size_t count)
574{ 580{
@@ -586,6 +592,7 @@ static ssize_t store_chip_reset(struct device *device,
586bail: 592bail:
587 return ret < 0 ? ret : count; 593 return ret < 0 ? ret : count;
588} 594}
595static DEVICE_ATTR_WO(chip_reset);
589 596
590/* 597/*
591 * Convert the reported temperature from an integer (reported in 598 * Convert the reported temperature from an integer (reported in
@@ -598,7 +605,7 @@ bail:
598/* 605/*
599 * Dump tempsense values, in decimal, to ease shell-scripts. 606 * Dump tempsense values, in decimal, to ease shell-scripts.
600 */ 607 */
601static ssize_t show_tempsense(struct device *device, 608static ssize_t tempsense_show(struct device *device,
602 struct device_attribute *attr, char *buf) 609 struct device_attribute *attr, char *buf)
603{ 610{
604 struct hfi1_ibdev *dev = 611 struct hfi1_ibdev *dev =
@@ -622,6 +629,7 @@ static ssize_t show_tempsense(struct device *device,
622 } 629 }
623 return ret; 630 return ret;
624} 631}
632static DEVICE_ATTR_RO(tempsense);
625 633
626/* 634/*
627 * end of per-unit (or driver, in some cases, but replicated 635 * end of per-unit (or driver, in some cases, but replicated
@@ -629,24 +637,20 @@ static ssize_t show_tempsense(struct device *device,
629 */ 637 */
630 638
631/* start of per-unit file structures and support code */ 639/* start of per-unit file structures and support code */
632static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 640static struct attribute *hfi1_attributes[] = {
633static DEVICE_ATTR(board_id, S_IRUGO, show_hfi, NULL); 641 &dev_attr_hw_rev.attr,
634static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL); 642 &dev_attr_board_id.attr,
635static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL); 643 &dev_attr_nctxts.attr,
636static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); 644 &dev_attr_nfreectxts.attr,
637static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); 645 &dev_attr_serial.attr,
638static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL); 646 &dev_attr_boardversion.attr,
639static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset); 647 &dev_attr_tempsense.attr,
640 648 &dev_attr_chip_reset.attr,
641static struct device_attribute *hfi1_attributes[] = { 649 NULL,
642 &dev_attr_hw_rev, 650};
643 &dev_attr_board_id, 651
644 &dev_attr_nctxts, 652const struct attribute_group ib_hfi1_attr_group = {
645 &dev_attr_nfreectxts, 653 .attrs = hfi1_attributes,
646 &dev_attr_serial,
647 &dev_attr_boardversion,
648 &dev_attr_tempsense,
649 &dev_attr_chip_reset,
650}; 654};
651 655
652int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, 656int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
@@ -832,12 +836,6 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
832 struct device *class_dev = &dev->dev; 836 struct device *class_dev = &dev->dev;
833 int i, j, ret; 837 int i, j, ret;
834 838
835 for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
836 ret = device_create_file(&dev->dev, hfi1_attributes[i]);
837 if (ret)
838 goto bail;
839 }
840
841 for (i = 0; i < dd->num_sdma; i++) { 839 for (i = 0; i < dd->num_sdma; i++) {
842 ret = kobject_init_and_add(&dd->per_sdma[i].kobj, 840 ret = kobject_init_and_add(&dd->per_sdma[i].kobj,
843 &sde_ktype, &class_dev->kobj, 841 &sde_ktype, &class_dev->kobj,
@@ -855,9 +853,6 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
855 853
856 return 0; 854 return 0;
857bail: 855bail:
858 for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i)
859 device_remove_file(&dev->dev, hfi1_attributes[i]);
860
861 for (i = 0; i < dd->num_sdma; i++) 856 for (i = 0; i < dd->num_sdma; i++)
862 kobject_del(&dd->per_sdma[i].kobj); 857 kobject_del(&dd->per_sdma[i].kobj);
863 858
diff --git a/drivers/infiniband/hw/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h
index 8540463ef3f7..84458f1325e1 100644
--- a/drivers/infiniband/hw/hfi1/trace.h
+++ b/drivers/infiniband/hw/hfi1/trace.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright(c) 2015 - 2017 Intel Corporation. 2 * Copyright(c) 2015 - 2018 Intel Corporation.
3 * 3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or 4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license. 5 * redistributing this file, you may do so under either license.
@@ -62,3 +62,4 @@ __print_symbolic(etype, \
62#include "trace_rx.h" 62#include "trace_rx.h"
63#include "trace_tx.h" 63#include "trace_tx.h"
64#include "trace_mmu.h" 64#include "trace_mmu.h"
65#include "trace_iowait.h"
diff --git a/drivers/infiniband/hw/hfi1/trace_iowait.h b/drivers/infiniband/hw/hfi1/trace_iowait.h
new file mode 100644
index 000000000000..27f4334ece2b
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/trace_iowait.h
@@ -0,0 +1,54 @@
1/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
2/*
3 * Copyright(c) 2018 Intel Corporation.
4 *
5 */
6#if !defined(__HFI1_TRACE_IOWAIT_H) || defined(TRACE_HEADER_MULTI_READ)
7#define __HFI1_TRACE_IOWAIT_H
8
9#include <linux/tracepoint.h>
10#include "iowait.h"
11#include "verbs.h"
12
13#undef TRACE_SYSTEM
14#define TRACE_SYSTEM hfi1_iowait
15
16DECLARE_EVENT_CLASS(hfi1_iowait_template,
17 TP_PROTO(struct iowait *wait, u32 flag),
18 TP_ARGS(wait, flag),
19 TP_STRUCT__entry(/* entry */
20 __field(unsigned long, addr)
21 __field(unsigned long, flags)
22 __field(u32, flag)
23 __field(u32, qpn)
24 ),
25 TP_fast_assign(/* assign */
26 __entry->addr = (unsigned long)wait;
27 __entry->flags = wait->flags;
28 __entry->flag = (1 << flag);
29 __entry->qpn = iowait_to_qp(wait)->ibqp.qp_num;
30 ),
31 TP_printk(/* print */
32 "iowait 0x%lx qp %u flags 0x%lx flag 0x%x",
33 __entry->addr,
34 __entry->qpn,
35 __entry->flags,
36 __entry->flag
37 )
38 );
39
40DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_set,
41 TP_PROTO(struct iowait *wait, u32 flag),
42 TP_ARGS(wait, flag));
43
44DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_clear,
45 TP_PROTO(struct iowait *wait, u32 flag),
46 TP_ARGS(wait, flag));
47
48#endif /* __HFI1_TRACE_IOWAIT_H */
49
50#undef TRACE_INCLUDE_PATH
51#undef TRACE_INCLUDE_FILE
52#define TRACE_INCLUDE_PATH .
53#define TRACE_INCLUDE_FILE trace_iowait
54#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index e254dcec6f64..6aca0c5a7f97 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -88,7 +88,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
88 } 88 }
89 clear_ahg(qp); 89 clear_ahg(qp);
90 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 90 wqe = rvt_get_swqe_ptr(qp, qp->s_last);
91 hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 91 rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
92 goto done_free_tx; 92 goto done_free_tx;
93 } 93 }
94 94
@@ -140,7 +140,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
140 qp, wqe->wr.ex.invalidate_rkey); 140 qp, wqe->wr.ex.invalidate_rkey);
141 local_ops = 1; 141 local_ops = 1;
142 } 142 }
143 hfi1_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR 143 rvt_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
144 : IB_WC_SUCCESS); 144 : IB_WC_SUCCESS);
145 if (local_ops) 145 if (local_ops)
146 atomic_dec(&qp->local_ops_pending); 146 atomic_dec(&qp->local_ops_pending);
@@ -426,7 +426,7 @@ send_first:
426 qp->r_rcv_len += pmtu; 426 qp->r_rcv_len += pmtu;
427 if (unlikely(qp->r_rcv_len > qp->r_len)) 427 if (unlikely(qp->r_rcv_len > qp->r_len))
428 goto rewind; 428 goto rewind;
429 hfi1_copy_sge(&qp->r_sge, data, pmtu, false, false); 429 rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false);
430 break; 430 break;
431 431
432 case OP(SEND_LAST_WITH_IMMEDIATE): 432 case OP(SEND_LAST_WITH_IMMEDIATE):
@@ -449,7 +449,7 @@ send_last:
449 if (unlikely(wc.byte_len > qp->r_len)) 449 if (unlikely(wc.byte_len > qp->r_len))
450 goto rewind; 450 goto rewind;
451 wc.opcode = IB_WC_RECV; 451 wc.opcode = IB_WC_RECV;
452 hfi1_copy_sge(&qp->r_sge, data, tlen, false, false); 452 rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false);
453 rvt_put_ss(&qp->s_rdma_read_sge); 453 rvt_put_ss(&qp->s_rdma_read_sge);
454last_imm: 454last_imm:
455 wc.wr_id = qp->r_wr_id; 455 wc.wr_id = qp->r_wr_id;
@@ -523,7 +523,7 @@ rdma_first:
523 qp->r_rcv_len += pmtu; 523 qp->r_rcv_len += pmtu;
524 if (unlikely(qp->r_rcv_len > qp->r_len)) 524 if (unlikely(qp->r_rcv_len > qp->r_len))
525 goto drop; 525 goto drop;
526 hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false); 526 rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
527 break; 527 break;
528 528
529 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): 529 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -550,7 +550,7 @@ rdma_last_imm:
550 } 550 }
551 wc.byte_len = qp->r_len; 551 wc.byte_len = qp->r_len;
552 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; 552 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
553 hfi1_copy_sge(&qp->r_sge, data, tlen, true, false); 553 rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
554 rvt_put_ss(&qp->r_sge); 554 rvt_put_ss(&qp->r_sge);
555 goto last_imm; 555 goto last_imm;
556 556
@@ -564,7 +564,7 @@ rdma_last:
564 tlen -= (hdrsize + extra_bytes); 564 tlen -= (hdrsize + extra_bytes);
565 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) 565 if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
566 goto drop; 566 goto drop;
567 hfi1_copy_sge(&qp->r_sge, data, tlen, true, false); 567 rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
568 rvt_put_ss(&qp->r_sge); 568 rvt_put_ss(&qp->r_sge);
569 break; 569 break;
570 570
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 70d39fc450a1..4baa8f4d49de 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -210,8 +210,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
210 } 210 }
211 211
212 hfi1_make_grh(ibp, &grh, &grd, 0, 0); 212 hfi1_make_grh(ibp, &grh, &grd, 0, 0);
213 hfi1_copy_sge(&qp->r_sge, &grh, 213 rvt_copy_sge(qp, &qp->r_sge, &grh,
214 sizeof(grh), true, false); 214 sizeof(grh), true, false);
215 wc.wc_flags |= IB_WC_GRH; 215 wc.wc_flags |= IB_WC_GRH;
216 } else { 216 } else {
217 rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true); 217 rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
@@ -228,7 +228,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
228 if (len > sge->sge_length) 228 if (len > sge->sge_length)
229 len = sge->sge_length; 229 len = sge->sge_length;
230 WARN_ON_ONCE(len == 0); 230 WARN_ON_ONCE(len == 0);
231 hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, true, false); 231 rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
232 sge->vaddr += len; 232 sge->vaddr += len;
233 sge->length -= len; 233 sge->length -= len;
234 sge->sge_length -= len; 234 sge->sge_length -= len;
@@ -518,7 +518,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
518 goto bail; 518 goto bail;
519 } 519 }
520 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 520 wqe = rvt_get_swqe_ptr(qp, qp->s_last);
521 hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 521 rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
522 goto done_free_tx; 522 goto done_free_tx;
523 } 523 }
524 524
@@ -560,7 +560,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
560 ud_loopback(qp, wqe); 560 ud_loopback(qp, wqe);
561 spin_lock_irqsave(&qp->s_lock, tflags); 561 spin_lock_irqsave(&qp->s_lock, tflags);
562 ps->flags = tflags; 562 ps->flags = tflags;
563 hfi1_send_complete(qp, wqe, IB_WC_SUCCESS); 563 rvt_send_complete(qp, wqe, IB_WC_SUCCESS);
564 goto done_free_tx; 564 goto done_free_tx;
565 } 565 }
566 } 566 }
@@ -1019,8 +1019,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
1019 goto drop; 1019 goto drop;
1020 } 1020 }
1021 if (packet->grh) { 1021 if (packet->grh) {
1022 hfi1_copy_sge(&qp->r_sge, packet->grh, 1022 rvt_copy_sge(qp, &qp->r_sge, packet->grh,
1023 sizeof(struct ib_grh), true, false); 1023 sizeof(struct ib_grh), true, false);
1024 wc.wc_flags |= IB_WC_GRH; 1024 wc.wc_flags |= IB_WC_GRH;
1025 } else if (packet->etype == RHF_RCV_TYPE_BYPASS) { 1025 } else if (packet->etype == RHF_RCV_TYPE_BYPASS) {
1026 struct ib_grh grh; 1026 struct ib_grh grh;
@@ -1030,14 +1030,14 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
1030 * out when creating 16B, add back the GRH here. 1030 * out when creating 16B, add back the GRH here.
1031 */ 1031 */
1032 hfi1_make_ext_grh(packet, &grh, slid, dlid); 1032 hfi1_make_ext_grh(packet, &grh, slid, dlid);
1033 hfi1_copy_sge(&qp->r_sge, &grh, 1033 rvt_copy_sge(qp, &qp->r_sge, &grh,
1034 sizeof(struct ib_grh), true, false); 1034 sizeof(struct ib_grh), true, false);
1035 wc.wc_flags |= IB_WC_GRH; 1035 wc.wc_flags |= IB_WC_GRH;
1036 } else { 1036 } else {
1037 rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true); 1037 rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
1038 } 1038 }
1039 hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1039 rvt_copy_sge(qp, &qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
1040 true, false); 1040 true, false);
1041 rvt_put_ss(&qp->r_sge); 1041 rvt_put_ss(&qp->r_sge);
1042 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) 1042 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
1043 return; 1043 return;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 5c88706121c1..3f0aadccd9f6 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright(c) 2015 - 2017 Intel Corporation. 2 * Copyright(c) 2015 - 2018 Intel Corporation.
3 * 3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or 4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license. 5 * redistributing this file, you may do so under either license.
@@ -76,8 +76,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
76 76
77static unsigned initial_pkt_count = 8; 77static unsigned initial_pkt_count = 8;
78 78
79static int user_sdma_send_pkts(struct user_sdma_request *req, 79static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts);
80 unsigned maxpkts);
81static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status); 80static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status);
82static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq); 81static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
83static void user_sdma_free_request(struct user_sdma_request *req, bool unpin); 82static void user_sdma_free_request(struct user_sdma_request *req, bool unpin);
@@ -101,7 +100,7 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
101 100
102static int defer_packet_queue( 101static int defer_packet_queue(
103 struct sdma_engine *sde, 102 struct sdma_engine *sde,
104 struct iowait *wait, 103 struct iowait_work *wait,
105 struct sdma_txreq *txreq, 104 struct sdma_txreq *txreq,
106 uint seq, 105 uint seq,
107 bool pkts_sent); 106 bool pkts_sent);
@@ -124,13 +123,13 @@ static struct mmu_rb_ops sdma_rb_ops = {
124 123
125static int defer_packet_queue( 124static int defer_packet_queue(
126 struct sdma_engine *sde, 125 struct sdma_engine *sde,
127 struct iowait *wait, 126 struct iowait_work *wait,
128 struct sdma_txreq *txreq, 127 struct sdma_txreq *txreq,
129 uint seq, 128 uint seq,
130 bool pkts_sent) 129 bool pkts_sent)
131{ 130{
132 struct hfi1_user_sdma_pkt_q *pq = 131 struct hfi1_user_sdma_pkt_q *pq =
133 container_of(wait, struct hfi1_user_sdma_pkt_q, busy); 132 container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy);
134 struct hfi1_ibdev *dev = &pq->dd->verbs_dev; 133 struct hfi1_ibdev *dev = &pq->dd->verbs_dev;
135 struct user_sdma_txreq *tx = 134 struct user_sdma_txreq *tx =
136 container_of(txreq, struct user_sdma_txreq, txreq); 135 container_of(txreq, struct user_sdma_txreq, txreq);
@@ -187,13 +186,12 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
187 pq->ctxt = uctxt->ctxt; 186 pq->ctxt = uctxt->ctxt;
188 pq->subctxt = fd->subctxt; 187 pq->subctxt = fd->subctxt;
189 pq->n_max_reqs = hfi1_sdma_comp_ring_size; 188 pq->n_max_reqs = hfi1_sdma_comp_ring_size;
190 pq->state = SDMA_PKT_Q_INACTIVE;
191 atomic_set(&pq->n_reqs, 0); 189 atomic_set(&pq->n_reqs, 0);
192 init_waitqueue_head(&pq->wait); 190 init_waitqueue_head(&pq->wait);
193 atomic_set(&pq->n_locked, 0); 191 atomic_set(&pq->n_locked, 0);
194 pq->mm = fd->mm; 192 pq->mm = fd->mm;
195 193
196 iowait_init(&pq->busy, 0, NULL, defer_packet_queue, 194 iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
197 activate_packet_queue, NULL); 195 activate_packet_queue, NULL);
198 pq->reqidx = 0; 196 pq->reqidx = 0;
199 197
@@ -276,7 +274,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
276 /* Wait until all requests have been freed. */ 274 /* Wait until all requests have been freed. */
277 wait_event_interruptible( 275 wait_event_interruptible(
278 pq->wait, 276 pq->wait,
279 (READ_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE)); 277 !atomic_read(&pq->n_reqs));
280 kfree(pq->reqs); 278 kfree(pq->reqs);
281 kfree(pq->req_in_use); 279 kfree(pq->req_in_use);
282 kmem_cache_destroy(pq->txreq_cache); 280 kmem_cache_destroy(pq->txreq_cache);
@@ -312,6 +310,13 @@ static u8 dlid_to_selector(u16 dlid)
312 return mapping[hash]; 310 return mapping[hash];
313} 311}
314 312
313/**
314 * hfi1_user_sdma_process_request() - Process and start a user sdma request
315 * @fd: valid file descriptor
316 * @iovec: array of io vectors to process
317 * @dim: overall iovec array size
318 * @count: number of io vector array entries processed
319 */
315int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, 320int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
316 struct iovec *iovec, unsigned long dim, 321 struct iovec *iovec, unsigned long dim,
317 unsigned long *count) 322 unsigned long *count)
@@ -328,7 +333,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
328 u8 opcode, sc, vl; 333 u8 opcode, sc, vl;
329 u16 pkey; 334 u16 pkey;
330 u32 slid; 335 u32 slid;
331 int req_queued = 0;
332 u16 dlid; 336 u16 dlid;
333 u32 selector; 337 u32 selector;
334 338
@@ -392,7 +396,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
392 req->data_len = 0; 396 req->data_len = 0;
393 req->pq = pq; 397 req->pq = pq;
394 req->cq = cq; 398 req->cq = cq;
395 req->status = -1;
396 req->ahg_idx = -1; 399 req->ahg_idx = -1;
397 req->iov_idx = 0; 400 req->iov_idx = 0;
398 req->sent = 0; 401 req->sent = 0;
@@ -400,12 +403,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
400 req->seqcomp = 0; 403 req->seqcomp = 0;
401 req->seqsubmitted = 0; 404 req->seqsubmitted = 0;
402 req->tids = NULL; 405 req->tids = NULL;
403 req->done = 0;
404 req->has_error = 0; 406 req->has_error = 0;
405 INIT_LIST_HEAD(&req->txps); 407 INIT_LIST_HEAD(&req->txps);
406 408
407 memcpy(&req->info, &info, sizeof(info)); 409 memcpy(&req->info, &info, sizeof(info));
408 410
411 /* The request is initialized, count it */
412 atomic_inc(&pq->n_reqs);
413
409 if (req_opcode(info.ctrl) == EXPECTED) { 414 if (req_opcode(info.ctrl) == EXPECTED) {
410 /* expected must have a TID info and at least one data vector */ 415 /* expected must have a TID info and at least one data vector */
411 if (req->data_iovs < 2) { 416 if (req->data_iovs < 2) {
@@ -500,7 +505,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
500 ret = pin_vector_pages(req, &req->iovs[i]); 505 ret = pin_vector_pages(req, &req->iovs[i]);
501 if (ret) { 506 if (ret) {
502 req->data_iovs = i; 507 req->data_iovs = i;
503 req->status = ret;
504 goto free_req; 508 goto free_req;
505 } 509 }
506 req->data_len += req->iovs[i].iov.iov_len; 510 req->data_len += req->iovs[i].iov.iov_len;
@@ -561,23 +565,11 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
561 req->ahg_idx = sdma_ahg_alloc(req->sde); 565 req->ahg_idx = sdma_ahg_alloc(req->sde);
562 566
563 set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); 567 set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
564 atomic_inc(&pq->n_reqs); 568 pq->state = SDMA_PKT_Q_ACTIVE;
565 req_queued = 1;
566 /* Send the first N packets in the request to buy us some time */ 569 /* Send the first N packets in the request to buy us some time */
567 ret = user_sdma_send_pkts(req, pcount); 570 ret = user_sdma_send_pkts(req, pcount);
568 if (unlikely(ret < 0 && ret != -EBUSY)) { 571 if (unlikely(ret < 0 && ret != -EBUSY))
569 req->status = ret;
570 goto free_req; 572 goto free_req;
571 }
572
573 /*
574 * It is possible that the SDMA engine would have processed all the
575 * submitted packets by the time we get here. Therefore, only set
576 * packet queue state to ACTIVE if there are still uncompleted
577 * requests.
578 */
579 if (atomic_read(&pq->n_reqs))
580 xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
581 573
582 /* 574 /*
583 * This is a somewhat blocking send implementation. 575 * This is a somewhat blocking send implementation.
@@ -588,14 +580,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
588 while (req->seqsubmitted != req->info.npkts) { 580 while (req->seqsubmitted != req->info.npkts) {
589 ret = user_sdma_send_pkts(req, pcount); 581 ret = user_sdma_send_pkts(req, pcount);
590 if (ret < 0) { 582 if (ret < 0) {
591 if (ret != -EBUSY) { 583 if (ret != -EBUSY)
592 req->status = ret; 584 goto free_req;
593 WRITE_ONCE(req->has_error, 1);
594 if (READ_ONCE(req->seqcomp) ==
595 req->seqsubmitted - 1)
596 goto free_req;
597 return ret;
598 }
599 wait_event_interruptible_timeout( 585 wait_event_interruptible_timeout(
600 pq->busy.wait_dma, 586 pq->busy.wait_dma,
601 (pq->state == SDMA_PKT_Q_ACTIVE), 587 (pq->state == SDMA_PKT_Q_ACTIVE),
@@ -606,10 +592,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
606 *count += idx; 592 *count += idx;
607 return 0; 593 return 0;
608free_req: 594free_req:
609 user_sdma_free_request(req, true); 595 /*
610 if (req_queued) 596 * If the submitted seqsubmitted == npkts, the completion routine
597 * controls the final state. If sequbmitted < npkts, wait for any
598 * outstanding packets to finish before cleaning up.
599 */
600 if (req->seqsubmitted < req->info.npkts) {
601 if (req->seqsubmitted)
602 wait_event(pq->busy.wait_dma,
603 (req->seqcomp == req->seqsubmitted - 1));
604 user_sdma_free_request(req, true);
611 pq_update(pq); 605 pq_update(pq);
612 set_comp_state(pq, cq, info.comp_idx, ERROR, req->status); 606 set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
607 }
613 return ret; 608 return ret;
614} 609}
615 610
@@ -760,9 +755,10 @@ static int user_sdma_txadd(struct user_sdma_request *req,
760 return ret; 755 return ret;
761} 756}
762 757
763static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) 758static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
764{ 759{
765 int ret = 0, count; 760 int ret = 0;
761 u16 count;
766 unsigned npkts = 0; 762 unsigned npkts = 0;
767 struct user_sdma_txreq *tx = NULL; 763 struct user_sdma_txreq *tx = NULL;
768 struct hfi1_user_sdma_pkt_q *pq = NULL; 764 struct hfi1_user_sdma_pkt_q *pq = NULL;
@@ -864,8 +860,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
864 860
865 changes = set_txreq_header_ahg(req, tx, 861 changes = set_txreq_header_ahg(req, tx,
866 datalen); 862 datalen);
867 if (changes < 0) 863 if (changes < 0) {
864 ret = changes;
868 goto free_tx; 865 goto free_tx;
866 }
869 } 867 }
870 } else { 868 } else {
871 ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) + 869 ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) +
@@ -914,10 +912,11 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
914 npkts++; 912 npkts++;
915 } 913 }
916dosend: 914dosend:
917 ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count); 915 ret = sdma_send_txlist(req->sde,
916 iowait_get_ib_work(&pq->busy),
917 &req->txps, &count);
918 req->seqsubmitted += count; 918 req->seqsubmitted += count;
919 if (req->seqsubmitted == req->info.npkts) { 919 if (req->seqsubmitted == req->info.npkts) {
920 WRITE_ONCE(req->done, 1);
921 /* 920 /*
922 * The txreq has already been submitted to the HW queue 921 * The txreq has already been submitted to the HW queue
923 * so we can free the AHG entry now. Corruption will not 922 * so we can free the AHG entry now. Corruption will not
@@ -1365,11 +1364,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
1365 return idx; 1364 return idx;
1366} 1365}
1367 1366
1368/* 1367/**
1369 * SDMA tx request completion callback. Called when the SDMA progress 1368 * user_sdma_txreq_cb() - SDMA tx request completion callback.
1370 * state machine gets notification that the SDMA descriptors for this 1369 * @txreq: valid sdma tx request
1371 * tx request have been processed by the DMA engine. Called in 1370 * @status: success/failure of request
1372 * interrupt context. 1371 *
1372 * Called when the SDMA progress state machine gets notification that
1373 * the SDMA descriptors for this tx request have been processed by the
1374 * DMA engine. Called in interrupt context.
1375 * Only do work on completed sequences.
1373 */ 1376 */
1374static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) 1377static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
1375{ 1378{
@@ -1378,7 +1381,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
1378 struct user_sdma_request *req; 1381 struct user_sdma_request *req;
1379 struct hfi1_user_sdma_pkt_q *pq; 1382 struct hfi1_user_sdma_pkt_q *pq;
1380 struct hfi1_user_sdma_comp_q *cq; 1383 struct hfi1_user_sdma_comp_q *cq;
1381 u16 idx; 1384 enum hfi1_sdma_comp_state state = COMPLETE;
1382 1385
1383 if (!tx->req) 1386 if (!tx->req)
1384 return; 1387 return;
@@ -1391,39 +1394,25 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
1391 SDMA_DBG(req, "SDMA completion with error %d", 1394 SDMA_DBG(req, "SDMA completion with error %d",
1392 status); 1395 status);
1393 WRITE_ONCE(req->has_error, 1); 1396 WRITE_ONCE(req->has_error, 1);
1397 state = ERROR;
1394 } 1398 }
1395 1399
1396 req->seqcomp = tx->seqnum; 1400 req->seqcomp = tx->seqnum;
1397 kmem_cache_free(pq->txreq_cache, tx); 1401 kmem_cache_free(pq->txreq_cache, tx);
1398 tx = NULL; 1402
1399 1403 /* sequence isn't complete? We are done */
1400 idx = req->info.comp_idx; 1404 if (req->seqcomp != req->info.npkts - 1)
1401 if (req->status == -1 && status == SDMA_TXREQ_S_OK) { 1405 return;
1402 if (req->seqcomp == req->info.npkts - 1) { 1406
1403 req->status = 0; 1407 user_sdma_free_request(req, false);
1404 user_sdma_free_request(req, false); 1408 set_comp_state(pq, cq, req->info.comp_idx, state, status);
1405 pq_update(pq); 1409 pq_update(pq);
1406 set_comp_state(pq, cq, idx, COMPLETE, 0);
1407 }
1408 } else {
1409 if (status != SDMA_TXREQ_S_OK)
1410 req->status = status;
1411 if (req->seqcomp == (READ_ONCE(req->seqsubmitted) - 1) &&
1412 (READ_ONCE(req->done) ||
1413 READ_ONCE(req->has_error))) {
1414 user_sdma_free_request(req, false);
1415 pq_update(pq);
1416 set_comp_state(pq, cq, idx, ERROR, req->status);
1417 }
1418 }
1419} 1410}
1420 1411
1421static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq) 1412static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
1422{ 1413{
1423 if (atomic_dec_and_test(&pq->n_reqs)) { 1414 if (atomic_dec_and_test(&pq->n_reqs))
1424 xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
1425 wake_up(&pq->wait); 1415 wake_up(&pq->wait);
1426 }
1427} 1416}
1428 1417
1429static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) 1418static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
@@ -1448,6 +1437,8 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
1448 if (!node) 1437 if (!node)
1449 continue; 1438 continue;
1450 1439
1440 req->iovs[i].node = NULL;
1441
1451 if (unpin) 1442 if (unpin)
1452 hfi1_mmu_rb_remove(req->pq->handler, 1443 hfi1_mmu_rb_remove(req->pq->handler,
1453 &node->rb); 1444 &node->rb);
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index d2bc77f75253..14dfd757dafd 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -105,9 +105,10 @@ static inline int ahg_header_set(u32 *arr, int idx, size_t array_size,
105#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ 105#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
106#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ 106#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
107 107
108#define SDMA_PKT_Q_INACTIVE BIT(0) 108enum pkt_q_sdma_state {
109#define SDMA_PKT_Q_ACTIVE BIT(1) 109 SDMA_PKT_Q_ACTIVE,
110#define SDMA_PKT_Q_DEFERRED BIT(2) 110 SDMA_PKT_Q_DEFERRED,
111};
111 112
112/* 113/*
113 * Maximum retry attempts to submit a TX request 114 * Maximum retry attempts to submit a TX request
@@ -133,7 +134,7 @@ struct hfi1_user_sdma_pkt_q {
133 struct user_sdma_request *reqs; 134 struct user_sdma_request *reqs;
134 unsigned long *req_in_use; 135 unsigned long *req_in_use;
135 struct iowait busy; 136 struct iowait busy;
136 unsigned state; 137 enum pkt_q_sdma_state state;
137 wait_queue_head_t wait; 138 wait_queue_head_t wait;
138 unsigned long unpinned; 139 unsigned long unpinned;
139 struct mmu_rb_handler *handler; 140 struct mmu_rb_handler *handler;
@@ -203,14 +204,12 @@ struct user_sdma_request {
203 s8 ahg_idx; 204 s8 ahg_idx;
204 205
205 /* Writeable fields shared with interrupt */ 206 /* Writeable fields shared with interrupt */
206 u64 seqcomp ____cacheline_aligned_in_smp; 207 u16 seqcomp ____cacheline_aligned_in_smp;
207 u64 seqsubmitted; 208 u16 seqsubmitted;
208 /* status of the last txreq completed */
209 int status;
210 209
211 /* Send side fields */ 210 /* Send side fields */
212 struct list_head txps ____cacheline_aligned_in_smp; 211 struct list_head txps ____cacheline_aligned_in_smp;
213 u64 seqnum; 212 u16 seqnum;
214 /* 213 /*
215 * KDETH.OFFSET (TID) field 214 * KDETH.OFFSET (TID) field
216 * The offset can cover multiple packets, depending on the 215 * The offset can cover multiple packets, depending on the
@@ -228,7 +227,6 @@ struct user_sdma_request {
228 u16 tididx; 227 u16 tididx;
229 /* progress index moving along the iovs array */ 228 /* progress index moving along the iovs array */
230 u8 iov_idx; 229 u8 iov_idx;
231 u8 done;
232 u8 has_error; 230 u8 has_error;
233 231
234 struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; 232 struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
@@ -248,7 +246,7 @@ struct user_sdma_txreq {
248 struct user_sdma_request *req; 246 struct user_sdma_request *req;
249 u16 flags; 247 u16 flags;
250 unsigned int busycount; 248 unsigned int busycount;
251 u64 seqnum; 249 u16 seqnum;
252}; 250};
253 251
254int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, 252int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index a7c586a5589d..48e11e510358 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -129,8 +129,6 @@ unsigned short piothreshold = 256;
129module_param(piothreshold, ushort, S_IRUGO); 129module_param(piothreshold, ushort, S_IRUGO);
130MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio"); 130MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio");
131 131
132#define COPY_CACHELESS 1
133#define COPY_ADAPTIVE 2
134static unsigned int sge_copy_mode; 132static unsigned int sge_copy_mode;
135module_param(sge_copy_mode, uint, S_IRUGO); 133module_param(sge_copy_mode, uint, S_IRUGO);
136MODULE_PARM_DESC(sge_copy_mode, 134MODULE_PARM_DESC(sge_copy_mode,
@@ -151,159 +149,13 @@ static int pio_wait(struct rvt_qp *qp,
151/* 16B trailing buffer */ 149/* 16B trailing buffer */
152static const u8 trail_buf[MAX_16B_PADDING]; 150static const u8 trail_buf[MAX_16B_PADDING];
153 151
154static uint wss_threshold; 152static uint wss_threshold = 80;
155module_param(wss_threshold, uint, S_IRUGO); 153module_param(wss_threshold, uint, S_IRUGO);
156MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy"); 154MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
157static uint wss_clean_period = 256; 155static uint wss_clean_period = 256;
158module_param(wss_clean_period, uint, S_IRUGO); 156module_param(wss_clean_period, uint, S_IRUGO);
159MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned"); 157MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned");
160 158
161/* memory working set size */
162struct hfi1_wss {
163 unsigned long *entries;
164 atomic_t total_count;
165 atomic_t clean_counter;
166 atomic_t clean_entry;
167
168 int threshold;
169 int num_entries;
170 long pages_mask;
171};
172
173static struct hfi1_wss wss;
174
175int hfi1_wss_init(void)
176{
177 long llc_size;
178 long llc_bits;
179 long table_size;
180 long table_bits;
181
182 /* check for a valid percent range - default to 80 if none or invalid */
183 if (wss_threshold < 1 || wss_threshold > 100)
184 wss_threshold = 80;
185 /* reject a wildly large period */
186 if (wss_clean_period > 1000000)
187 wss_clean_period = 256;
188 /* reject a zero period */
189 if (wss_clean_period == 0)
190 wss_clean_period = 1;
191
192 /*
193 * Calculate the table size - the next power of 2 larger than the
194 * LLC size. LLC size is in KiB.
195 */
196 llc_size = wss_llc_size() * 1024;
197 table_size = roundup_pow_of_two(llc_size);
198
199 /* one bit per page in rounded up table */
200 llc_bits = llc_size / PAGE_SIZE;
201 table_bits = table_size / PAGE_SIZE;
202 wss.pages_mask = table_bits - 1;
203 wss.num_entries = table_bits / BITS_PER_LONG;
204
205 wss.threshold = (llc_bits * wss_threshold) / 100;
206 if (wss.threshold == 0)
207 wss.threshold = 1;
208
209 atomic_set(&wss.clean_counter, wss_clean_period);
210
211 wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries),
212 GFP_KERNEL);
213 if (!wss.entries) {
214 hfi1_wss_exit();
215 return -ENOMEM;
216 }
217
218 return 0;
219}
220
221void hfi1_wss_exit(void)
222{
223 /* coded to handle partially initialized and repeat callers */
224 kfree(wss.entries);
225 wss.entries = NULL;
226}
227
228/*
229 * Advance the clean counter. When the clean period has expired,
230 * clean an entry.
231 *
232 * This is implemented in atomics to avoid locking. Because multiple
233 * variables are involved, it can be racy which can lead to slightly
234 * inaccurate information. Since this is only a heuristic, this is
235 * OK. Any innaccuracies will clean themselves out as the counter
236 * advances. That said, it is unlikely the entry clean operation will
237 * race - the next possible racer will not start until the next clean
238 * period.
239 *
240 * The clean counter is implemented as a decrement to zero. When zero
241 * is reached an entry is cleaned.
242 */
243static void wss_advance_clean_counter(void)
244{
245 int entry;
246 int weight;
247 unsigned long bits;
248
249 /* become the cleaner if we decrement the counter to zero */
250 if (atomic_dec_and_test(&wss.clean_counter)) {
251 /*
252 * Set, not add, the clean period. This avoids an issue
253 * where the counter could decrement below the clean period.
254 * Doing a set can result in lost decrements, slowing the
255 * clean advance. Since this a heuristic, this possible
256 * slowdown is OK.
257 *
258 * An alternative is to loop, advancing the counter by a
259 * clean period until the result is > 0. However, this could
260 * lead to several threads keeping another in the clean loop.
261 * This could be mitigated by limiting the number of times
262 * we stay in the loop.
263 */
264 atomic_set(&wss.clean_counter, wss_clean_period);
265
266 /*
267 * Uniquely grab the entry to clean and move to next.
268 * The current entry is always the lower bits of
269 * wss.clean_entry. The table size, wss.num_entries,
270 * is always a power-of-2.
271 */
272 entry = (atomic_inc_return(&wss.clean_entry) - 1)
273 & (wss.num_entries - 1);
274
275 /* clear the entry and count the bits */
276 bits = xchg(&wss.entries[entry], 0);
277 weight = hweight64((u64)bits);
278 /* only adjust the contended total count if needed */
279 if (weight)
280 atomic_sub(weight, &wss.total_count);
281 }
282}
283
284/*
285 * Insert the given address into the working set array.
286 */
287static void wss_insert(void *address)
288{
289 u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask;
290 u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */
291 u32 nr = page & (BITS_PER_LONG - 1);
292
293 if (!test_and_set_bit(nr, &wss.entries[entry]))
294 atomic_inc(&wss.total_count);
295
296 wss_advance_clean_counter();
297}
298
299/*
300 * Is the working set larger than the threshold?
301 */
302static inline bool wss_exceeds_threshold(void)
303{
304 return atomic_read(&wss.total_count) >= wss.threshold;
305}
306
307/* 159/*
308 * Translate ib_wr_opcode into ib_wc_opcode. 160 * Translate ib_wr_opcode into ib_wc_opcode.
309 */ 161 */
@@ -438,79 +290,6 @@ static const u32 pio_opmask[BIT(3)] = {
438 */ 290 */
439__be64 ib_hfi1_sys_image_guid; 291__be64 ib_hfi1_sys_image_guid;
440 292
441/**
442 * hfi1_copy_sge - copy data to SGE memory
443 * @ss: the SGE state
444 * @data: the data to copy
445 * @length: the length of the data
446 * @release: boolean to release MR
447 * @copy_last: do a separate copy of the last 8 bytes
448 */
449void hfi1_copy_sge(
450 struct rvt_sge_state *ss,
451 void *data, u32 length,
452 bool release,
453 bool copy_last)
454{
455 struct rvt_sge *sge = &ss->sge;
456 int i;
457 bool in_last = false;
458 bool cacheless_copy = false;
459
460 if (sge_copy_mode == COPY_CACHELESS) {
461 cacheless_copy = length >= PAGE_SIZE;
462 } else if (sge_copy_mode == COPY_ADAPTIVE) {
463 if (length >= PAGE_SIZE) {
464 /*
465 * NOTE: this *assumes*:
466 * o The first vaddr is the dest.
467 * o If multiple pages, then vaddr is sequential.
468 */
469 wss_insert(sge->vaddr);
470 if (length >= (2 * PAGE_SIZE))
471 wss_insert(sge->vaddr + PAGE_SIZE);
472
473 cacheless_copy = wss_exceeds_threshold();
474 } else {
475 wss_advance_clean_counter();
476 }
477 }
478 if (copy_last) {
479 if (length > 8) {
480 length -= 8;
481 } else {
482 copy_last = false;
483 in_last = true;
484 }
485 }
486
487again:
488 while (length) {
489 u32 len = rvt_get_sge_length(sge, length);
490
491 WARN_ON_ONCE(len == 0);
492 if (unlikely(in_last)) {
493 /* enforce byte transfer ordering */
494 for (i = 0; i < len; i++)
495 ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
496 } else if (cacheless_copy) {
497 cacheless_memcpy(sge->vaddr, data, len);
498 } else {
499 memcpy(sge->vaddr, data, len);
500 }
501 rvt_update_sge(ss, len, release);
502 data += len;
503 length -= len;
504 }
505
506 if (copy_last) {
507 copy_last = false;
508 in_last = true;
509 length = 8;
510 goto again;
511 }
512}
513
514/* 293/*
515 * Make sure the QP is ready and able to accept the given opcode. 294 * Make sure the QP is ready and able to accept the given opcode.
516 */ 295 */
@@ -713,7 +492,7 @@ static void verbs_sdma_complete(
713 492
714 spin_lock(&qp->s_lock); 493 spin_lock(&qp->s_lock);
715 if (tx->wqe) { 494 if (tx->wqe) {
716 hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS); 495 rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
717 } else if (qp->ibqp.qp_type == IB_QPT_RC) { 496 } else if (qp->ibqp.qp_type == IB_QPT_RC) {
718 struct hfi1_opa_header *hdr; 497 struct hfi1_opa_header *hdr;
719 498
@@ -737,7 +516,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
737 if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { 516 if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
738 write_seqlock(&dev->iowait_lock); 517 write_seqlock(&dev->iowait_lock);
739 list_add_tail(&ps->s_txreq->txreq.list, 518 list_add_tail(&ps->s_txreq->txreq.list,
740 &priv->s_iowait.tx_head); 519 &ps->wait->tx_head);
741 if (list_empty(&priv->s_iowait.list)) { 520 if (list_empty(&priv->s_iowait.list)) {
742 if (list_empty(&dev->memwait)) 521 if (list_empty(&dev->memwait))
743 mod_timer(&dev->mem_timer, jiffies + 1); 522 mod_timer(&dev->mem_timer, jiffies + 1);
@@ -748,7 +527,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
748 rvt_get_qp(qp); 527 rvt_get_qp(qp);
749 } 528 }
750 write_sequnlock(&dev->iowait_lock); 529 write_sequnlock(&dev->iowait_lock);
751 qp->s_flags &= ~RVT_S_BUSY; 530 hfi1_qp_unbusy(qp, ps->wait);
752 ret = -EBUSY; 531 ret = -EBUSY;
753 } 532 }
754 spin_unlock_irqrestore(&qp->s_lock, flags); 533 spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -950,8 +729,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
950 if (unlikely(ret)) 729 if (unlikely(ret))
951 goto bail_build; 730 goto bail_build;
952 } 731 }
953 ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq, 732 ret = sdma_send_txreq(tx->sde, ps->wait, &tx->txreq, ps->pkts_sent);
954 ps->pkts_sent);
955 if (unlikely(ret < 0)) { 733 if (unlikely(ret < 0)) {
956 if (ret == -ECOMM) 734 if (ret == -ECOMM)
957 goto bail_ecomm; 735 goto bail_ecomm;
@@ -1001,7 +779,7 @@ static int pio_wait(struct rvt_qp *qp,
1001 if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { 779 if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
1002 write_seqlock(&dev->iowait_lock); 780 write_seqlock(&dev->iowait_lock);
1003 list_add_tail(&ps->s_txreq->txreq.list, 781 list_add_tail(&ps->s_txreq->txreq.list,
1004 &priv->s_iowait.tx_head); 782 &ps->wait->tx_head);
1005 if (list_empty(&priv->s_iowait.list)) { 783 if (list_empty(&priv->s_iowait.list)) {
1006 struct hfi1_ibdev *dev = &dd->verbs_dev; 784 struct hfi1_ibdev *dev = &dd->verbs_dev;
1007 int was_empty; 785 int was_empty;
@@ -1020,7 +798,7 @@ static int pio_wait(struct rvt_qp *qp,
1020 hfi1_sc_wantpiobuf_intr(sc, 1); 798 hfi1_sc_wantpiobuf_intr(sc, 1);
1021 } 799 }
1022 write_sequnlock(&dev->iowait_lock); 800 write_sequnlock(&dev->iowait_lock);
1023 qp->s_flags &= ~RVT_S_BUSY; 801 hfi1_qp_unbusy(qp, ps->wait);
1024 ret = -EBUSY; 802 ret = -EBUSY;
1025 } 803 }
1026 spin_unlock_irqrestore(&qp->s_lock, flags); 804 spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1160,7 +938,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
1160pio_bail: 938pio_bail:
1161 if (qp->s_wqe) { 939 if (qp->s_wqe) {
1162 spin_lock_irqsave(&qp->s_lock, flags); 940 spin_lock_irqsave(&qp->s_lock, flags);
1163 hfi1_send_complete(qp, qp->s_wqe, wc_status); 941 rvt_send_complete(qp, qp->s_wqe, wc_status);
1164 spin_unlock_irqrestore(&qp->s_lock, flags); 942 spin_unlock_irqrestore(&qp->s_lock, flags);
1165 } else if (qp->ibqp.qp_type == IB_QPT_RC) { 943 } else if (qp->ibqp.qp_type == IB_QPT_RC) {
1166 spin_lock_irqsave(&qp->s_lock, flags); 944 spin_lock_irqsave(&qp->s_lock, flags);
@@ -1367,7 +1145,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
1367 hfi1_cdbg(PIO, "%s() Failed. Completing with err", 1145 hfi1_cdbg(PIO, "%s() Failed. Completing with err",
1368 __func__); 1146 __func__);
1369 spin_lock_irqsave(&qp->s_lock, flags); 1147 spin_lock_irqsave(&qp->s_lock, flags);
1370 hfi1_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR); 1148 rvt_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
1371 spin_unlock_irqrestore(&qp->s_lock, flags); 1149 spin_unlock_irqrestore(&qp->s_lock, flags);
1372 } 1150 }
1373 return -EINVAL; 1151 return -EINVAL;
@@ -1943,7 +1721,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
1943 dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp; 1721 dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
1944 dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; 1722 dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
1945 dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc; 1723 dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
1946 dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe; 1724 dd->verbs_dev.rdi.driver_f.setup_wqe = hfi1_setup_wqe;
1947 dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup = 1725 dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup =
1948 hfi1_comp_vect_mappings_lookup; 1726 hfi1_comp_vect_mappings_lookup;
1949 1727
@@ -1956,10 +1734,16 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
1956 dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size; 1734 dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size;
1957 dd->verbs_dev.rdi.dparms.nports = dd->num_pports; 1735 dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
1958 dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd); 1736 dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd);
1737 dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode;
1738 dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold;
1739 dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period;
1959 1740
1960 /* post send table */ 1741 /* post send table */
1961 dd->verbs_dev.rdi.post_parms = hfi1_post_parms; 1742 dd->verbs_dev.rdi.post_parms = hfi1_post_parms;
1962 1743
1744 /* opcode translation table */
1745 dd->verbs_dev.rdi.wc_opcode = ib_hfi1_wc_opcode;
1746
1963 ppd = dd->pport; 1747 ppd = dd->pport;
1964 for (i = 0; i < dd->num_pports; i++, ppd++) 1748 for (i = 0; i < dd->num_pports; i++, ppd++)
1965 rvt_init_port(&dd->verbs_dev.rdi, 1749 rvt_init_port(&dd->verbs_dev.rdi,
@@ -1967,6 +1751,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
1967 i, 1751 i,
1968 ppd->pkeys); 1752 ppd->pkeys);
1969 1753
1754 rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev,
1755 &ib_hfi1_attr_group);
1756
1970 ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1); 1757 ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1);
1971 if (ret) 1758 if (ret)
1972 goto err_verbs_txreq; 1759 goto err_verbs_txreq;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index a4d06502f06d..64c9054db5f3 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -166,11 +166,13 @@ struct hfi1_qp_priv {
166 * This structure is used to hold commonly lookedup and computed values during 166 * This structure is used to hold commonly lookedup and computed values during
167 * the send engine progress. 167 * the send engine progress.
168 */ 168 */
169struct iowait_work;
169struct hfi1_pkt_state { 170struct hfi1_pkt_state {
170 struct hfi1_ibdev *dev; 171 struct hfi1_ibdev *dev;
171 struct hfi1_ibport *ibp; 172 struct hfi1_ibport *ibp;
172 struct hfi1_pportdata *ppd; 173 struct hfi1_pportdata *ppd;
173 struct verbs_txreq *s_txreq; 174 struct verbs_txreq *s_txreq;
175 struct iowait_work *wait;
174 unsigned long flags; 176 unsigned long flags;
175 unsigned long timeout; 177 unsigned long timeout;
176 unsigned long timeout_int; 178 unsigned long timeout_int;
@@ -247,7 +249,7 @@ static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev)
247 return container_of(rdi, struct hfi1_ibdev, rdi); 249 return container_of(rdi, struct hfi1_ibdev, rdi);
248} 250}
249 251
250static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait) 252static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
251{ 253{
252 struct hfi1_qp_priv *priv; 254 struct hfi1_qp_priv *priv;
253 255
@@ -313,9 +315,6 @@ void hfi1_put_txreq(struct verbs_txreq *tx);
313 315
314int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps); 316int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
315 317
316void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
317 bool release, bool copy_last);
318
319void hfi1_cnp_rcv(struct hfi1_packet *packet); 318void hfi1_cnp_rcv(struct hfi1_packet *packet);
320 319
321void hfi1_uc_rcv(struct hfi1_packet *packet); 320void hfi1_uc_rcv(struct hfi1_packet *packet);
@@ -343,7 +342,8 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
343void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, 342void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
344 int attr_mask, struct ib_udata *udata); 343 int attr_mask, struct ib_udata *udata);
345void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait); 344void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
346int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); 345int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
346 bool *call_send);
347 347
348extern const u32 rc_only_opcode; 348extern const u32 rc_only_opcode;
349extern const u32 uc_only_opcode; 349extern const u32 uc_only_opcode;
@@ -363,9 +363,6 @@ void hfi1_do_send_from_rvt(struct rvt_qp *qp);
363 363
364void hfi1_do_send(struct rvt_qp *qp, bool in_thread); 364void hfi1_do_send(struct rvt_qp *qp, bool in_thread);
365 365
366void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
367 enum ib_wc_status status);
368
369void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn); 366void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn);
370 367
371int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps); 368int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
@@ -390,28 +387,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
390int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, 387int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
391 u64 pbc); 388 u64 pbc);
392 389
393int hfi1_wss_init(void);
394void hfi1_wss_exit(void);
395
396/* platform specific: return the lowest level cache (llc) size, in KiB */
397static inline int wss_llc_size(void)
398{
399 /* assume that the boot CPU value is universal for all CPUs */
400 return boot_cpu_data.x86_cache_size;
401}
402
403/* platform specific: cacheless copy */
404static inline void cacheless_memcpy(void *dst, void *src, size_t n)
405{
406 /*
407 * Use the only available X64 cacheless copy. Add a __user cast
408 * to quiet sparse. The src agument is already in the kernel so
409 * there are no security issues. The extra fault recovery machinery
410 * is not invoked.
411 */
412 __copy_user_nocache(dst, (void __user *)src, n, 0);
413}
414
415static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr) 390static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr)
416{ 391{
417 return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ); 392 return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ);
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 1c19bbc764b2..2a77af26a231 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -102,22 +102,19 @@ static inline struct sdma_txreq *get_sdma_txreq(struct verbs_txreq *tx)
102 return &tx->txreq; 102 return &tx->txreq;
103} 103}
104 104
105static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp) 105static inline struct verbs_txreq *get_waiting_verbs_txreq(struct iowait_work *w)
106{ 106{
107 struct sdma_txreq *stx; 107 struct sdma_txreq *stx;
108 struct hfi1_qp_priv *priv = qp->priv;
109 108
110 stx = iowait_get_txhead(&priv->s_iowait); 109 stx = iowait_get_txhead(w);
111 if (stx) 110 if (stx)
112 return container_of(stx, struct verbs_txreq, txreq); 111 return container_of(stx, struct verbs_txreq, txreq);
113 return NULL; 112 return NULL;
114} 113}
115 114
116static inline bool verbs_txreq_queued(struct rvt_qp *qp) 115static inline bool verbs_txreq_queued(struct iowait_work *w)
117{ 116{
118 struct hfi1_qp_priv *priv = qp->priv; 117 return iowait_packet_queued(w);
119
120 return iowait_packet_queued(&priv->s_iowait);
121} 118}
122 119
123void hfi1_put_txreq(struct verbs_txreq *tx); 120void hfi1_put_txreq(struct verbs_txreq *tx);
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index c643d80c5a53..c9876d9e3cb9 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -120,7 +120,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
120 uctxt->seq_cnt = 1; 120 uctxt->seq_cnt = 1;
121 uctxt->is_vnic = true; 121 uctxt->is_vnic = true;
122 122
123 hfi1_set_vnic_msix_info(uctxt); 123 msix_request_rcd_irq(uctxt);
124 124
125 hfi1_stats.sps_ctxts++; 125 hfi1_stats.sps_ctxts++;
126 dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); 126 dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
@@ -135,8 +135,6 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
135 dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); 135 dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
136 flush_wc(); 136 flush_wc();
137 137
138 hfi1_reset_vnic_msix_info(uctxt);
139
140 /* 138 /*
141 * Disable receive context and interrupt available, reset all 139 * Disable receive context and interrupt available, reset all
142 * RcvCtxtCtrl bits to default values. 140 * RcvCtxtCtrl bits to default values.
@@ -148,6 +146,10 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
148 HFI1_RCVCTRL_NO_RHQ_DROP_DIS | 146 HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
149 HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); 147 HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
150 148
149 /* msix_intr will always be > 0, only clean up if this is true */
150 if (uctxt->msix_intr)
151 msix_free_irq(dd, uctxt->msix_intr);
152
151 uctxt->event_flags = 0; 153 uctxt->event_flags = 0;
152 154
153 hfi1_clear_tids(uctxt); 155 hfi1_clear_tids(uctxt);
@@ -626,7 +628,7 @@ static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
626 idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id); 628 idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
627 629
628 /* ensure irqs see the change */ 630 /* ensure irqs see the change */
629 hfi1_vnic_synchronize_irq(dd); 631 msix_vnic_synchronize_irq(dd);
630 632
631 /* remove unread skbs */ 633 /* remove unread skbs */
632 for (i = 0; i < vinfo->num_rx_q; i++) { 634 for (i = 0; i < vinfo->num_rx_q; i++) {
@@ -690,8 +692,6 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
690 rc = hfi1_vnic_txreq_init(dd); 692 rc = hfi1_vnic_txreq_init(dd);
691 if (rc) 693 if (rc)
692 goto txreq_fail; 694 goto txreq_fail;
693
694 dd->vnic.msix_idx = dd->first_dyn_msix_idx;
695 } 695 }
696 696
697 for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) { 697 for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
index c3c96c5869ed..97bd940a056a 100644
--- a/drivers/infiniband/hw/hfi1/vnic_sdma.c
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright(c) 2017 Intel Corporation. 2 * Copyright(c) 2017 - 2018 Intel Corporation.
3 * 3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or 4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license. 5 * redistributing this file, you may do so under either license.
@@ -198,8 +198,8 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
198 goto free_desc; 198 goto free_desc;
199 tx->retry_count = 0; 199 tx->retry_count = 0;
200 200
201 ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq, 201 ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait),
202 vnic_sdma->pkts_sent); 202 &tx->txreq, vnic_sdma->pkts_sent);
203 /* When -ECOMM, sdma callback will be called with ABORT status */ 203 /* When -ECOMM, sdma callback will be called with ABORT status */
204 if (unlikely(ret && unlikely(ret != -ECOMM))) 204 if (unlikely(ret && unlikely(ret != -ECOMM)))
205 goto free_desc; 205 goto free_desc;
@@ -230,13 +230,13 @@ tx_err:
230 * become available. 230 * become available.
231 */ 231 */
232static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, 232static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
233 struct iowait *wait, 233 struct iowait_work *wait,
234 struct sdma_txreq *txreq, 234 struct sdma_txreq *txreq,
235 uint seq, 235 uint seq,
236 bool pkts_sent) 236 bool pkts_sent)
237{ 237{
238 struct hfi1_vnic_sdma *vnic_sdma = 238 struct hfi1_vnic_sdma *vnic_sdma =
239 container_of(wait, struct hfi1_vnic_sdma, wait); 239 container_of(wait->iow, struct hfi1_vnic_sdma, wait);
240 struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev; 240 struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev;
241 struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq); 241 struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
242 242
@@ -247,7 +247,7 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
247 vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED; 247 vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
248 write_seqlock(&dev->iowait_lock); 248 write_seqlock(&dev->iowait_lock);
249 if (list_empty(&vnic_sdma->wait.list)) 249 if (list_empty(&vnic_sdma->wait.list))
250 iowait_queue(pkts_sent, wait, &sde->dmawait); 250 iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
251 write_sequnlock(&dev->iowait_lock); 251 write_sequnlock(&dev->iowait_lock);
252 return -EBUSY; 252 return -EBUSY;
253} 253}
@@ -285,7 +285,8 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
285 for (i = 0; i < vinfo->num_tx_q; i++) { 285 for (i = 0; i < vinfo->num_tx_q; i++) {
286 struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i]; 286 struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i];
287 287
288 iowait_init(&vnic_sdma->wait, 0, NULL, hfi1_vnic_sdma_sleep, 288 iowait_init(&vnic_sdma->wait, 0, NULL, NULL,
289 hfi1_vnic_sdma_sleep,
289 hfi1_vnic_sdma_wakeup, NULL); 290 hfi1_vnic_sdma_wakeup, NULL);
290 vnic_sdma->sde = &vinfo->dd->per_sdma[i]; 291 vnic_sdma->sde = &vinfo->dd->per_sdma[i];
291 vnic_sdma->dd = vinfo->dd; 292 vnic_sdma->dd = vinfo->dd;
@@ -295,10 +296,12 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
295 296
296 /* Add a free descriptor watermark for wakeups */ 297 /* Add a free descriptor watermark for wakeups */
297 if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) { 298 if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) {
299 struct iowait_work *work;
300
298 INIT_LIST_HEAD(&vnic_sdma->stx.list); 301 INIT_LIST_HEAD(&vnic_sdma->stx.list);
299 vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK; 302 vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK;
300 list_add_tail(&vnic_sdma->stx.list, 303 work = iowait_get_ib_work(&vnic_sdma->wait);
301 &vnic_sdma->wait.tx_head); 304 list_add_tail(&vnic_sdma->stx.list, &work->tx_head);
302 } 305 }
303 } 306 }
304} 307}
diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig
index fddb5fdf92de..21c2100b2ea9 100644
--- a/drivers/infiniband/hw/hns/Kconfig
+++ b/drivers/infiniband/hw/hns/Kconfig
@@ -1,6 +1,7 @@
1config INFINIBAND_HNS 1config INFINIBAND_HNS
2 tristate "HNS RoCE Driver" 2 tristate "HNS RoCE Driver"
3 depends on NET_VENDOR_HISILICON 3 depends on NET_VENDOR_HISILICON
4 depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
4 depends on ARM64 || (COMPILE_TEST && 64BIT) 5 depends on ARM64 || (COMPILE_TEST && 64BIT)
5 ---help--- 6 ---help---
6 This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine 7 This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 0d96c5bb38cd..9990dc9eb96a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -49,6 +49,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
49 struct hns_roce_ah *ah; 49 struct hns_roce_ah *ah;
50 u16 vlan_tag = 0xffff; 50 u16 vlan_tag = 0xffff;
51 const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); 51 const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
52 bool vlan_en = false;
52 53
53 ah = kzalloc(sizeof(*ah), GFP_ATOMIC); 54 ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
54 if (!ah) 55 if (!ah)
@@ -58,8 +59,10 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
58 memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); 59 memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
59 60
60 gid_attr = ah_attr->grh.sgid_attr; 61 gid_attr = ah_attr->grh.sgid_attr;
61 if (is_vlan_dev(gid_attr->ndev)) 62 if (is_vlan_dev(gid_attr->ndev)) {
62 vlan_tag = vlan_dev_vlan_id(gid_attr->ndev); 63 vlan_tag = vlan_dev_vlan_id(gid_attr->ndev);
64 vlan_en = true;
65 }
63 66
64 if (vlan_tag < 0x1000) 67 if (vlan_tag < 0x1000)
65 vlan_tag |= (rdma_ah_get_sl(ah_attr) & 68 vlan_tag |= (rdma_ah_get_sl(ah_attr) &
@@ -71,6 +74,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
71 HNS_ROCE_PORT_NUM_SHIFT)); 74 HNS_ROCE_PORT_NUM_SHIFT));
72 ah->av.gid_index = grh->sgid_index; 75 ah->av.gid_index = grh->sgid_index;
73 ah->av.vlan = cpu_to_le16(vlan_tag); 76 ah->av.vlan = cpu_to_le16(vlan_tag);
77 ah->av.vlan_en = vlan_en;
74 dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index, 78 dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index,
75 ah->av.vlan); 79 ah->av.vlan);
76 80
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 9a24fd0ee3e7..d39bdfdb5de9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -88,8 +88,11 @@
88#define BITMAP_RR 1 88#define BITMAP_RR 1
89 89
90#define MR_TYPE_MR 0x00 90#define MR_TYPE_MR 0x00
91#define MR_TYPE_FRMR 0x01
91#define MR_TYPE_DMA 0x03 92#define MR_TYPE_DMA 0x03
92 93
94#define HNS_ROCE_FRMR_MAX_PA 512
95
93#define PKEY_ID 0xffff 96#define PKEY_ID 0xffff
94#define GUID_LEN 8 97#define GUID_LEN 8
95#define NODE_DESC_SIZE 64 98#define NODE_DESC_SIZE 64
@@ -193,6 +196,9 @@ enum {
193 HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2), 196 HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2),
194 HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3), 197 HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3),
195 HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4), 198 HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4),
199 HNS_ROCE_CAP_FLAG_MW = BIT(7),
200 HNS_ROCE_CAP_FLAG_FRMR = BIT(8),
201 HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
196}; 202};
197 203
198enum hns_roce_mtt_type { 204enum hns_roce_mtt_type {
@@ -219,19 +225,11 @@ struct hns_roce_uar {
219 unsigned long logic_idx; 225 unsigned long logic_idx;
220}; 226};
221 227
222struct hns_roce_vma_data {
223 struct list_head list;
224 struct vm_area_struct *vma;
225 struct mutex *vma_list_mutex;
226};
227
228struct hns_roce_ucontext { 228struct hns_roce_ucontext {
229 struct ib_ucontext ibucontext; 229 struct ib_ucontext ibucontext;
230 struct hns_roce_uar uar; 230 struct hns_roce_uar uar;
231 struct list_head page_list; 231 struct list_head page_list;
232 struct mutex page_mutex; 232 struct mutex page_mutex;
233 struct list_head vma_list;
234 struct mutex vma_list_mutex;
235}; 233};
236 234
237struct hns_roce_pd { 235struct hns_roce_pd {
@@ -293,6 +291,16 @@ struct hns_roce_mtt {
293 enum hns_roce_mtt_type mtt_type; 291 enum hns_roce_mtt_type mtt_type;
294}; 292};
295 293
294struct hns_roce_mw {
295 struct ib_mw ibmw;
296 u32 pdn;
297 u32 rkey;
298 int enabled; /* MW's active status */
299 u32 pbl_hop_num;
300 u32 pbl_ba_pg_sz;
301 u32 pbl_buf_pg_sz;
302};
303
296/* Only support 4K page size for mr register */ 304/* Only support 4K page size for mr register */
297#define MR_SIZE_4K 0 305#define MR_SIZE_4K 0
298 306
@@ -304,6 +312,7 @@ struct hns_roce_mr {
304 u32 key; /* Key of MR */ 312 u32 key; /* Key of MR */
305 u32 pd; /* PD num of MR */ 313 u32 pd; /* PD num of MR */
306 u32 access;/* Access permission of MR */ 314 u32 access;/* Access permission of MR */
315 u32 npages;
307 int enabled; /* MR's active status */ 316 int enabled; /* MR's active status */
308 int type; /* MR's register type */ 317 int type; /* MR's register type */
309 u64 *pbl_buf;/* MR's PBL space */ 318 u64 *pbl_buf;/* MR's PBL space */
@@ -457,6 +466,7 @@ struct hns_roce_av {
457 u8 dgid[HNS_ROCE_GID_SIZE]; 466 u8 dgid[HNS_ROCE_GID_SIZE];
458 u8 mac[6]; 467 u8 mac[6];
459 __le16 vlan; 468 __le16 vlan;
469 bool vlan_en;
460}; 470};
461 471
462struct hns_roce_ah { 472struct hns_roce_ah {
@@ -656,6 +666,7 @@ struct hns_roce_eq_table {
656}; 666};
657 667
658struct hns_roce_caps { 668struct hns_roce_caps {
669 u64 fw_ver;
659 u8 num_ports; 670 u8 num_ports;
660 int gid_table_len[HNS_ROCE_MAX_PORTS]; 671 int gid_table_len[HNS_ROCE_MAX_PORTS];
661 int pkey_table_len[HNS_ROCE_MAX_PORTS]; 672 int pkey_table_len[HNS_ROCE_MAX_PORTS];
@@ -665,7 +676,9 @@ struct hns_roce_caps {
665 u32 max_sq_sg; /* 2 */ 676 u32 max_sq_sg; /* 2 */
666 u32 max_sq_inline; /* 32 */ 677 u32 max_sq_inline; /* 32 */
667 u32 max_rq_sg; /* 2 */ 678 u32 max_rq_sg; /* 2 */
679 u32 max_extend_sg;
668 int num_qps; /* 256k */ 680 int num_qps; /* 256k */
681 int reserved_qps;
669 u32 max_wqes; /* 16k */ 682 u32 max_wqes; /* 16k */
670 u32 max_sq_desc_sz; /* 64 */ 683 u32 max_sq_desc_sz; /* 64 */
671 u32 max_rq_desc_sz; /* 64 */ 684 u32 max_rq_desc_sz; /* 64 */
@@ -738,6 +751,7 @@ struct hns_roce_work {
738 struct hns_roce_dev *hr_dev; 751 struct hns_roce_dev *hr_dev;
739 struct work_struct work; 752 struct work_struct work;
740 u32 qpn; 753 u32 qpn;
754 u32 cqn;
741 int event_type; 755 int event_type;
742 int sub_type; 756 int sub_type;
743}; 757};
@@ -764,6 +778,8 @@ struct hns_roce_hw {
764 struct hns_roce_mr *mr, int flags, u32 pdn, 778 struct hns_roce_mr *mr, int flags, u32 pdn,
765 int mr_access_flags, u64 iova, u64 size, 779 int mr_access_flags, u64 iova, u64 size,
766 void *mb_buf); 780 void *mb_buf);
781 int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr);
782 int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw);
767 void (*write_cqc)(struct hns_roce_dev *hr_dev, 783 void (*write_cqc)(struct hns_roce_dev *hr_dev,
768 struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, 784 struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
769 dma_addr_t dma_handle, int nent, u32 vector); 785 dma_addr_t dma_handle, int nent, u32 vector);
@@ -863,6 +879,11 @@ static inline struct hns_roce_mr *to_hr_mr(struct ib_mr *ibmr)
863 return container_of(ibmr, struct hns_roce_mr, ibmr); 879 return container_of(ibmr, struct hns_roce_mr, ibmr);
864} 880}
865 881
882static inline struct hns_roce_mw *to_hr_mw(struct ib_mw *ibmw)
883{
884 return container_of(ibmw, struct hns_roce_mw, ibmw);
885}
886
866static inline struct hns_roce_qp *to_hr_qp(struct ib_qp *ibqp) 887static inline struct hns_roce_qp *to_hr_qp(struct ib_qp *ibqp)
867{ 888{
868 return container_of(ibqp, struct hns_roce_qp, ibqp); 889 return container_of(ibqp, struct hns_roce_qp, ibqp);
@@ -968,12 +989,20 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
968int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length, 989int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length,
969 u64 virt_addr, int mr_access_flags, struct ib_pd *pd, 990 u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
970 struct ib_udata *udata); 991 struct ib_udata *udata);
992struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
993 u32 max_num_sg);
994int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
995 unsigned int *sg_offset);
971int hns_roce_dereg_mr(struct ib_mr *ibmr); 996int hns_roce_dereg_mr(struct ib_mr *ibmr);
972int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, 997int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
973 struct hns_roce_cmd_mailbox *mailbox, 998 struct hns_roce_cmd_mailbox *mailbox,
974 unsigned long mpt_index); 999 unsigned long mpt_index);
975unsigned long key_to_hw_index(u32 key); 1000unsigned long key_to_hw_index(u32 key);
976 1001
1002struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type,
1003 struct ib_udata *udata);
1004int hns_roce_dealloc_mw(struct ib_mw *ibmw);
1005
977void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, 1006void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
978 struct hns_roce_buf *buf); 1007 struct hns_roce_buf *buf);
979int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, 1008int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 081aa91fc162..ca05810c92dc 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -731,7 +731,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
731 cq_init_attr.comp_vector = 0; 731 cq_init_attr.comp_vector = 0;
732 cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL, NULL); 732 cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL, NULL);
733 if (IS_ERR(cq)) { 733 if (IS_ERR(cq)) {
734 dev_err(dev, "Create cq for reseved loop qp failed!"); 734 dev_err(dev, "Create cq for reserved loop qp failed!");
735 return -ENOMEM; 735 return -ENOMEM;
736 } 736 }
737 free_mr->mr_free_cq = to_hr_cq(cq); 737 free_mr->mr_free_cq = to_hr_cq(cq);
@@ -744,7 +744,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
744 744
745 pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL); 745 pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL);
746 if (IS_ERR(pd)) { 746 if (IS_ERR(pd)) {
747 dev_err(dev, "Create pd for reseved loop qp failed!"); 747 dev_err(dev, "Create pd for reserved loop qp failed!");
748 ret = -ENOMEM; 748 ret = -ENOMEM;
749 goto alloc_pd_failed; 749 goto alloc_pd_failed;
750 } 750 }
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 0218c0f8c2a7..a4c62ae23a9a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -54,6 +54,59 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
54 dseg->len = cpu_to_le32(sg->length); 54 dseg->len = cpu_to_le32(sg->length);
55} 55}
56 56
57static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
58 struct hns_roce_wqe_frmr_seg *fseg,
59 const struct ib_reg_wr *wr)
60{
61 struct hns_roce_mr *mr = to_hr_mr(wr->mr);
62
63 /* use ib_access_flags */
64 roce_set_bit(rc_sq_wqe->byte_4,
65 V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
66 wr->access & IB_ACCESS_MW_BIND ? 1 : 0);
67 roce_set_bit(rc_sq_wqe->byte_4,
68 V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S,
69 wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
70 roce_set_bit(rc_sq_wqe->byte_4,
71 V2_RC_FRMR_WQE_BYTE_4_RR_S,
72 wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0);
73 roce_set_bit(rc_sq_wqe->byte_4,
74 V2_RC_FRMR_WQE_BYTE_4_RW_S,
75 wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
76 roce_set_bit(rc_sq_wqe->byte_4,
77 V2_RC_FRMR_WQE_BYTE_4_LW_S,
78 wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
79
80 /* Data structure reuse may lead to confusion */
81 rc_sq_wqe->msg_len = cpu_to_le32(mr->pbl_ba & 0xffffffff);
82 rc_sq_wqe->inv_key = cpu_to_le32(mr->pbl_ba >> 32);
83
84 rc_sq_wqe->byte_16 = cpu_to_le32(wr->mr->length & 0xffffffff);
85 rc_sq_wqe->byte_20 = cpu_to_le32(wr->mr->length >> 32);
86 rc_sq_wqe->rkey = cpu_to_le32(wr->key);
87 rc_sq_wqe->va = cpu_to_le64(wr->mr->iova);
88
89 fseg->pbl_size = cpu_to_le32(mr->pbl_size);
90 roce_set_field(fseg->mode_buf_pg_sz,
91 V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M,
92 V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S,
93 mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
94 roce_set_bit(fseg->mode_buf_pg_sz,
95 V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
96}
97
98static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
99 const struct ib_atomic_wr *wr)
100{
101 if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
102 aseg->fetchadd_swap_data = cpu_to_le64(wr->swap);
103 aseg->cmp_data = cpu_to_le64(wr->compare_add);
104 } else {
105 aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add);
106 aseg->cmp_data = 0;
107 }
108}
109
57static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, 110static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
58 unsigned int *sge_ind) 111 unsigned int *sge_ind)
59{ 112{
@@ -121,6 +174,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
121 } 174 }
122 175
123 if (wr->opcode == IB_WR_RDMA_READ) { 176 if (wr->opcode == IB_WR_RDMA_READ) {
177 *bad_wr = wr;
124 dev_err(hr_dev->dev, "Not support inline data!\n"); 178 dev_err(hr_dev->dev, "Not support inline data!\n");
125 return -EINVAL; 179 return -EINVAL;
126 } 180 }
@@ -179,6 +233,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
179 struct hns_roce_v2_ud_send_wqe *ud_sq_wqe; 233 struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
180 struct hns_roce_v2_rc_send_wqe *rc_sq_wqe; 234 struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
181 struct hns_roce_qp *qp = to_hr_qp(ibqp); 235 struct hns_roce_qp *qp = to_hr_qp(ibqp);
236 struct hns_roce_wqe_frmr_seg *fseg;
182 struct device *dev = hr_dev->dev; 237 struct device *dev = hr_dev->dev;
183 struct hns_roce_v2_db sq_db; 238 struct hns_roce_v2_db sq_db;
184 struct ib_qp_attr attr; 239 struct ib_qp_attr attr;
@@ -191,6 +246,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
191 int attr_mask; 246 int attr_mask;
192 u32 tmp_len; 247 u32 tmp_len;
193 int ret = 0; 248 int ret = 0;
249 u32 hr_op;
194 u8 *smac; 250 u8 *smac;
195 int nreq; 251 int nreq;
196 int i; 252 int i;
@@ -356,6 +412,9 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
356 V2_UD_SEND_WQE_BYTE_40_PORTN_S, 412 V2_UD_SEND_WQE_BYTE_40_PORTN_S,
357 qp->port); 413 qp->port);
358 414
415 roce_set_bit(ud_sq_wqe->byte_40,
416 V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S,
417 ah->av.vlan_en ? 1 : 0);
359 roce_set_field(ud_sq_wqe->byte_48, 418 roce_set_field(ud_sq_wqe->byte_48,
360 V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M, 419 V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M,
361 V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S, 420 V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S,
@@ -406,99 +465,100 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
406 roce_set_bit(rc_sq_wqe->byte_4, 465 roce_set_bit(rc_sq_wqe->byte_4,
407 V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit); 466 V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
408 467
468 wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
409 switch (wr->opcode) { 469 switch (wr->opcode) {
410 case IB_WR_RDMA_READ: 470 case IB_WR_RDMA_READ:
411 roce_set_field(rc_sq_wqe->byte_4, 471 hr_op = HNS_ROCE_V2_WQE_OP_RDMA_READ;
412 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
413 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
414 HNS_ROCE_V2_WQE_OP_RDMA_READ);
415 rc_sq_wqe->rkey = 472 rc_sq_wqe->rkey =
416 cpu_to_le32(rdma_wr(wr)->rkey); 473 cpu_to_le32(rdma_wr(wr)->rkey);
417 rc_sq_wqe->va = 474 rc_sq_wqe->va =
418 cpu_to_le64(rdma_wr(wr)->remote_addr); 475 cpu_to_le64(rdma_wr(wr)->remote_addr);
419 break; 476 break;
420 case IB_WR_RDMA_WRITE: 477 case IB_WR_RDMA_WRITE:
421 roce_set_field(rc_sq_wqe->byte_4, 478 hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE;
422 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
423 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
424 HNS_ROCE_V2_WQE_OP_RDMA_WRITE);
425 rc_sq_wqe->rkey = 479 rc_sq_wqe->rkey =
426 cpu_to_le32(rdma_wr(wr)->rkey); 480 cpu_to_le32(rdma_wr(wr)->rkey);
427 rc_sq_wqe->va = 481 rc_sq_wqe->va =
428 cpu_to_le64(rdma_wr(wr)->remote_addr); 482 cpu_to_le64(rdma_wr(wr)->remote_addr);
429 break; 483 break;
430 case IB_WR_RDMA_WRITE_WITH_IMM: 484 case IB_WR_RDMA_WRITE_WITH_IMM:
431 roce_set_field(rc_sq_wqe->byte_4, 485 hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM;
432 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
433 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
434 HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM);
435 rc_sq_wqe->rkey = 486 rc_sq_wqe->rkey =
436 cpu_to_le32(rdma_wr(wr)->rkey); 487 cpu_to_le32(rdma_wr(wr)->rkey);
437 rc_sq_wqe->va = 488 rc_sq_wqe->va =
438 cpu_to_le64(rdma_wr(wr)->remote_addr); 489 cpu_to_le64(rdma_wr(wr)->remote_addr);
439 break; 490 break;
440 case IB_WR_SEND: 491 case IB_WR_SEND:
441 roce_set_field(rc_sq_wqe->byte_4, 492 hr_op = HNS_ROCE_V2_WQE_OP_SEND;
442 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
443 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
444 HNS_ROCE_V2_WQE_OP_SEND);
445 break; 493 break;
446 case IB_WR_SEND_WITH_INV: 494 case IB_WR_SEND_WITH_INV:
447 roce_set_field(rc_sq_wqe->byte_4, 495 hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_INV;
448 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
449 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
450 HNS_ROCE_V2_WQE_OP_SEND_WITH_INV);
451 break; 496 break;
452 case IB_WR_SEND_WITH_IMM: 497 case IB_WR_SEND_WITH_IMM:
453 roce_set_field(rc_sq_wqe->byte_4, 498 hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM;
454 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
455 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
456 HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM);
457 break; 499 break;
458 case IB_WR_LOCAL_INV: 500 case IB_WR_LOCAL_INV:
459 roce_set_field(rc_sq_wqe->byte_4, 501 hr_op = HNS_ROCE_V2_WQE_OP_LOCAL_INV;
460 V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 502 roce_set_bit(rc_sq_wqe->byte_4,
461 V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 503 V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
462 HNS_ROCE_V2_WQE_OP_LOCAL_INV); 504 rc_sq_wqe->inv_key =
505 cpu_to_le32(wr->ex.invalidate_rkey);
506 break;
507 case IB_WR_REG_MR:
508 hr_op = HNS_ROCE_V2_WQE_OP_FAST_REG_PMR;
509 fseg = wqe;
510 set_frmr_seg(rc_sq_wqe, fseg, reg_wr(wr));
463 break; 511 break;
464 case IB_WR_ATOMIC_CMP_AND_SWP: 512 case IB_WR_ATOMIC_CMP_AND_SWP:
465 roce_set_field(rc_sq_wqe->byte_4, 513 hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP;
466 V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 514 rc_sq_wqe->rkey =
467 V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 515 cpu_to_le32(atomic_wr(wr)->rkey);
468 HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP); 516 rc_sq_wqe->va =
517 cpu_to_le64(atomic_wr(wr)->remote_addr);
469 break; 518 break;
470 case IB_WR_ATOMIC_FETCH_AND_ADD: 519 case IB_WR_ATOMIC_FETCH_AND_ADD:
471 roce_set_field(rc_sq_wqe->byte_4, 520 hr_op = HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD;
472 V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 521 rc_sq_wqe->rkey =
473 V2_RC_SEND_WQE_BYTE_4_OPCODE_S, 522 cpu_to_le32(atomic_wr(wr)->rkey);
474 HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD); 523 rc_sq_wqe->va =
524 cpu_to_le64(atomic_wr(wr)->remote_addr);
475 break; 525 break;
476 case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: 526 case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
477 roce_set_field(rc_sq_wqe->byte_4, 527 hr_op =
478 V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 528 HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP;
479 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
480 HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP);
481 break; 529 break;
482 case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: 530 case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
483 roce_set_field(rc_sq_wqe->byte_4, 531 hr_op =
484 V2_RC_SEND_WQE_BYTE_4_OPCODE_M, 532 HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD;
485 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
486 HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD);
487 break; 533 break;
488 default: 534 default:
489 roce_set_field(rc_sq_wqe->byte_4, 535 hr_op = HNS_ROCE_V2_WQE_OP_MASK;
490 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
491 V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
492 HNS_ROCE_V2_WQE_OP_MASK);
493 break; 536 break;
494 } 537 }
495 538
496 wqe += sizeof(struct hns_roce_v2_rc_send_wqe); 539 roce_set_field(rc_sq_wqe->byte_4,
540 V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
541 V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op);
542
543 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
544 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
545 struct hns_roce_v2_wqe_data_seg *dseg;
546
547 dseg = wqe;
548 set_data_seg_v2(dseg, wr->sg_list);
549 wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
550 set_atomic_seg(wqe, atomic_wr(wr));
551 roce_set_field(rc_sq_wqe->byte_16,
552 V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
553 V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
554 wr->num_sge);
555 } else if (wr->opcode != IB_WR_REG_MR) {
556 ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe,
557 wqe, &sge_ind, bad_wr);
558 if (ret)
559 goto out;
560 }
497 561
498 ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe,
499 &sge_ind, bad_wr);
500 if (ret)
501 goto out;
502 ind++; 562 ind++;
503 } else { 563 } else {
504 dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type); 564 dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
@@ -935,7 +995,24 @@ static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev)
935 995
936 resp = (struct hns_roce_query_version *)desc.data; 996 resp = (struct hns_roce_query_version *)desc.data;
937 hr_dev->hw_rev = le32_to_cpu(resp->rocee_hw_version); 997 hr_dev->hw_rev = le32_to_cpu(resp->rocee_hw_version);
938 hr_dev->vendor_id = le32_to_cpu(resp->rocee_vendor_id); 998 hr_dev->vendor_id = hr_dev->pci_dev->vendor;
999
1000 return 0;
1001}
1002
1003static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev)
1004{
1005 struct hns_roce_query_fw_info *resp;
1006 struct hns_roce_cmq_desc desc;
1007 int ret;
1008
1009 hns_roce_cmq_setup_basic_desc(&desc, HNS_QUERY_FW_VER, true);
1010 ret = hns_roce_cmq_send(hr_dev, &desc, 1);
1011 if (ret)
1012 return ret;
1013
1014 resp = (struct hns_roce_query_fw_info *)desc.data;
1015 hr_dev->caps.fw_ver = (u64)(le32_to_cpu(resp->fw_ver));
939 1016
940 return 0; 1017 return 0;
941} 1018}
@@ -1158,6 +1235,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
1158 1235
1159 ret = hns_roce_cmq_query_hw_info(hr_dev); 1236 ret = hns_roce_cmq_query_hw_info(hr_dev);
1160 if (ret) { 1237 if (ret) {
1238 dev_err(hr_dev->dev, "Query hardware version fail, ret = %d.\n",
1239 ret);
1240 return ret;
1241 }
1242
1243 ret = hns_roce_query_fw_ver(hr_dev);
1244 if (ret) {
1161 dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n", 1245 dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n",
1162 ret); 1246 ret);
1163 return ret; 1247 return ret;
@@ -1185,14 +1269,16 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
1185 return ret; 1269 return ret;
1186 } 1270 }
1187 1271
1188 hr_dev->vendor_part_id = 0; 1272
1189 hr_dev->sys_image_guid = 0; 1273 hr_dev->vendor_part_id = hr_dev->pci_dev->device;
1274 hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
1190 1275
1191 caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM; 1276 caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM;
1192 caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM; 1277 caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM;
1193 caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM; 1278 caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM;
1194 caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM; 1279 caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM;
1195 caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM; 1280 caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM;
1281 caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM;
1196 caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM; 1282 caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM;
1197 caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE; 1283 caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
1198 caps->num_uars = HNS_ROCE_V2_UAR_NUM; 1284 caps->num_uars = HNS_ROCE_V2_UAR_NUM;
@@ -1222,6 +1308,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
1222 caps->reserved_mrws = 1; 1308 caps->reserved_mrws = 1;
1223 caps->reserved_uars = 0; 1309 caps->reserved_uars = 0;
1224 caps->reserved_cqs = 0; 1310 caps->reserved_cqs = 0;
1311 caps->reserved_qps = HNS_ROCE_V2_RSV_QPS;
1225 1312
1226 caps->qpc_ba_pg_sz = 0; 1313 caps->qpc_ba_pg_sz = 0;
1227 caps->qpc_buf_pg_sz = 0; 1314 caps->qpc_buf_pg_sz = 0;
@@ -1255,6 +1342,11 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
1255 HNS_ROCE_CAP_FLAG_RQ_INLINE | 1342 HNS_ROCE_CAP_FLAG_RQ_INLINE |
1256 HNS_ROCE_CAP_FLAG_RECORD_DB | 1343 HNS_ROCE_CAP_FLAG_RECORD_DB |
1257 HNS_ROCE_CAP_FLAG_SQ_RECORD_DB; 1344 HNS_ROCE_CAP_FLAG_SQ_RECORD_DB;
1345
1346 if (hr_dev->pci_dev->revision == 0x21)
1347 caps->flags |= HNS_ROCE_CAP_FLAG_MW |
1348 HNS_ROCE_CAP_FLAG_FRMR;
1349
1258 caps->pkey_table_len[0] = 1; 1350 caps->pkey_table_len[0] = 1;
1259 caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; 1351 caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
1260 caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM; 1352 caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM;
@@ -1262,6 +1354,9 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
1262 caps->local_ca_ack_delay = 0; 1354 caps->local_ca_ack_delay = 0;
1263 caps->max_mtu = IB_MTU_4096; 1355 caps->max_mtu = IB_MTU_4096;
1264 1356
1357 if (hr_dev->pci_dev->revision == 0x21)
1358 caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC;
1359
1265 ret = hns_roce_v2_set_bt(hr_dev); 1360 ret = hns_roce_v2_set_bt(hr_dev);
1266 if (ret) 1361 if (ret)
1267 dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n", 1362 dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n",
@@ -1690,10 +1785,11 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
1690 1785
1691 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0); 1786 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0);
1692 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1); 1787 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
1693 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 0); 1788 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
1694 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S, 1789 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S,
1695 (mr->access & IB_ACCESS_MW_BIND ? 1 : 0)); 1790 (mr->access & IB_ACCESS_MW_BIND ? 1 : 0));
1696 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, 0); 1791 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S,
1792 mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
1697 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S, 1793 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S,
1698 (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0)); 1794 (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0));
1699 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S, 1795 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S,
@@ -1817,6 +1913,88 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
1817 return 0; 1913 return 0;
1818} 1914}
1819 1915
1916static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
1917{
1918 struct hns_roce_v2_mpt_entry *mpt_entry;
1919
1920 mpt_entry = mb_buf;
1921 memset(mpt_entry, 0, sizeof(*mpt_entry));
1922
1923 roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
1924 V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
1925 roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
1926 V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1);
1927 roce_set_field(mpt_entry->byte_4_pd_hop_st,
1928 V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
1929 V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
1930 mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
1931 roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
1932 V2_MPT_BYTE_4_PD_S, mr->pd);
1933
1934 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1);
1935 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
1936 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
1937
1938 roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1);
1939 roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
1940 roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0);
1941 roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
1942
1943 mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
1944
1945 mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
1946 roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M,
1947 V2_MPT_BYTE_48_PBL_BA_H_S,
1948 upper_32_bits(mr->pbl_ba >> 3));
1949
1950 roce_set_field(mpt_entry->byte_64_buf_pa1,
1951 V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
1952 V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
1953 mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
1954
1955 return 0;
1956}
1957
1958static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
1959{
1960 struct hns_roce_v2_mpt_entry *mpt_entry;
1961
1962 mpt_entry = mb_buf;
1963 memset(mpt_entry, 0, sizeof(*mpt_entry));
1964
1965 roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
1966 V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
1967 roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
1968 V2_MPT_BYTE_4_PD_S, mw->pdn);
1969 roce_set_field(mpt_entry->byte_4_pd_hop_st,
1970 V2_MPT_BYTE_4_PBL_HOP_NUM_M,
1971 V2_MPT_BYTE_4_PBL_HOP_NUM_S,
1972 mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ?
1973 0 : mw->pbl_hop_num);
1974 roce_set_field(mpt_entry->byte_4_pd_hop_st,
1975 V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
1976 V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
1977 mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
1978
1979 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
1980 roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
1981
1982 roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
1983 roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1);
1984 roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
1985 roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BQP_S,
1986 mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1);
1987
1988 roce_set_field(mpt_entry->byte_64_buf_pa1,
1989 V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
1990 V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
1991 mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
1992
1993 mpt_entry->lkey = cpu_to_le32(mw->rkey);
1994
1995 return 0;
1996}
1997
1820static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) 1998static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
1821{ 1999{
1822 return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf, 2000 return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
@@ -2274,6 +2452,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
2274 wc->src_qp = (u8)roce_get_field(cqe->byte_32, 2452 wc->src_qp = (u8)roce_get_field(cqe->byte_32,
2275 V2_CQE_BYTE_32_RMT_QPN_M, 2453 V2_CQE_BYTE_32_RMT_QPN_M,
2276 V2_CQE_BYTE_32_RMT_QPN_S); 2454 V2_CQE_BYTE_32_RMT_QPN_S);
2455 wc->slid = 0;
2277 wc->wc_flags |= (roce_get_bit(cqe->byte_32, 2456 wc->wc_flags |= (roce_get_bit(cqe->byte_32,
2278 V2_CQE_BYTE_32_GRH_S) ? 2457 V2_CQE_BYTE_32_GRH_S) ?
2279 IB_WC_GRH : 0); 2458 IB_WC_GRH : 0);
@@ -2287,7 +2466,14 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
2287 wc->smac[5] = roce_get_field(cqe->byte_28, 2466 wc->smac[5] = roce_get_field(cqe->byte_28,
2288 V2_CQE_BYTE_28_SMAC_5_M, 2467 V2_CQE_BYTE_28_SMAC_5_M,
2289 V2_CQE_BYTE_28_SMAC_5_S); 2468 V2_CQE_BYTE_28_SMAC_5_S);
2290 wc->vlan_id = 0xffff; 2469 if (roce_get_bit(cqe->byte_28, V2_CQE_BYTE_28_VID_VLD_S)) {
2470 wc->vlan_id = (u16)roce_get_field(cqe->byte_28,
2471 V2_CQE_BYTE_28_VID_M,
2472 V2_CQE_BYTE_28_VID_S);
2473 } else {
2474 wc->vlan_id = 0xffff;
2475 }
2476
2291 wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC); 2477 wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
2292 wc->network_hdr_type = roce_get_field(cqe->byte_28, 2478 wc->network_hdr_type = roce_get_field(cqe->byte_28,
2293 V2_CQE_BYTE_28_PORT_TYPE_M, 2479 V2_CQE_BYTE_28_PORT_TYPE_M,
@@ -2589,21 +2775,16 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
2589 roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_TX_ERR_S, 0); 2775 roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_TX_ERR_S, 0);
2590 roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_RX_ERR_S, 0); 2776 roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_RX_ERR_S, 0);
2591 2777
2592 roce_set_field(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_MAPID_M, 2778 roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_TEMPID_M,
2593 V2_QPC_BYTE_60_MAPID_S, 0); 2779 V2_QPC_BYTE_60_TEMPID_S, 0);
2594 2780
2595 roce_set_bit(qpc_mask->byte_60_qpst_mapid, 2781 roce_set_field(qpc_mask->byte_60_qpst_tempid,
2596 V2_QPC_BYTE_60_INNER_MAP_IND_S, 0); 2782 V2_QPC_BYTE_60_SCC_TOKEN_M, V2_QPC_BYTE_60_SCC_TOKEN_S,
2597 roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_MAP_IND_S, 2783 0);
2598 0); 2784 roce_set_bit(qpc_mask->byte_60_qpst_tempid,
2599 roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_RQ_MAP_IND_S, 2785 V2_QPC_BYTE_60_SQ_DB_DOING_S, 0);
2600 0); 2786 roce_set_bit(qpc_mask->byte_60_qpst_tempid,
2601 roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_EXT_MAP_IND_S, 2787 V2_QPC_BYTE_60_RQ_DB_DOING_S, 0);
2602 0);
2603 roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_RLS_IND_S,
2604 0);
2605 roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_EXT_IND_S,
2606 0);
2607 roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0); 2788 roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0);
2608 roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0); 2789 roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0);
2609 2790
@@ -2685,7 +2866,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
2685 roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M, 2866 roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M,
2686 V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0); 2867 V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0);
2687 2868
2688 roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RSVD_RAQ_MAP_S, 0); 2869 roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S,
2870 0);
2689 roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M, 2871 roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M,
2690 V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S, 0); 2872 V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S, 0);
2691 roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_TAIL_M, 2873 roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_TAIL_M,
@@ -2694,8 +2876,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
2694 roce_set_field(qpc_mask->byte_144_raq, 2876 roce_set_field(qpc_mask->byte_144_raq,
2695 V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M, 2877 V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M,
2696 V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S, 0); 2878 V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S, 0);
2697 roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_RTY_INI_IND_S,
2698 0);
2699 roce_set_field(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_CREDIT_M, 2879 roce_set_field(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_CREDIT_M,
2700 V2_QPC_BYTE_144_RAQ_CREDIT_S, 0); 2880 V2_QPC_BYTE_144_RAQ_CREDIT_S, 0);
2701 roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RESP_RTY_FLG_S, 0); 2881 roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RESP_RTY_FLG_S, 0);
@@ -2721,14 +2901,12 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
2721 V2_QPC_BYTE_160_SQ_CONSUMER_IDX_M, 2901 V2_QPC_BYTE_160_SQ_CONSUMER_IDX_M,
2722 V2_QPC_BYTE_160_SQ_CONSUMER_IDX_S, 0); 2902 V2_QPC_BYTE_160_SQ_CONSUMER_IDX_S, 0);
2723 2903
2724 roce_set_field(context->byte_168_irrl_idx, 2904 roce_set_bit(qpc_mask->byte_168_irrl_idx,
2725 V2_QPC_BYTE_168_SQ_SHIFT_BAK_M, 2905 V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S, 0);
2726 V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 2906 roce_set_bit(qpc_mask->byte_168_irrl_idx,
2727 ilog2((unsigned int)hr_qp->sq.wqe_cnt)); 2907 V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S, 0);
2728 roce_set_field(qpc_mask->byte_168_irrl_idx, 2908 roce_set_bit(qpc_mask->byte_168_irrl_idx,
2729 V2_QPC_BYTE_168_SQ_SHIFT_BAK_M, 2909 V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S, 0);
2730 V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 0);
2731
2732 roce_set_bit(qpc_mask->byte_168_irrl_idx, 2910 roce_set_bit(qpc_mask->byte_168_irrl_idx,
2733 V2_QPC_BYTE_168_MSG_RTY_LP_FLG_S, 0); 2911 V2_QPC_BYTE_168_MSG_RTY_LP_FLG_S, 0);
2734 roce_set_bit(qpc_mask->byte_168_irrl_idx, 2912 roce_set_bit(qpc_mask->byte_168_irrl_idx,
@@ -2746,6 +2924,9 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
2746 roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_MSG_RNR_FLG_S, 2924 roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_MSG_RNR_FLG_S,
2747 0); 2925 0);
2748 2926
2927 roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1);
2928 roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 0);
2929
2749 roce_set_field(qpc_mask->byte_176_msg_pktn, 2930 roce_set_field(qpc_mask->byte_176_msg_pktn,
2750 V2_QPC_BYTE_176_MSG_USE_PKTN_M, 2931 V2_QPC_BYTE_176_MSG_USE_PKTN_M,
2751 V2_QPC_BYTE_176_MSG_USE_PKTN_S, 0); 2932 V2_QPC_BYTE_176_MSG_USE_PKTN_S, 0);
@@ -2790,6 +2971,13 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
2790 V2_QPC_BYTE_232_IRRL_SGE_IDX_M, 2971 V2_QPC_BYTE_232_IRRL_SGE_IDX_M,
2791 V2_QPC_BYTE_232_IRRL_SGE_IDX_S, 0); 2972 V2_QPC_BYTE_232_IRRL_SGE_IDX_S, 0);
2792 2973
2974 roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_SO_LP_VLD_S,
2975 0);
2976 roce_set_bit(qpc_mask->byte_232_irrl_sge,
2977 V2_QPC_BYTE_232_FENCE_LP_VLD_S, 0);
2978 roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_IRRL_LP_VLD_S,
2979 0);
2980
2793 qpc_mask->irrl_cur_sge_offset = 0; 2981 qpc_mask->irrl_cur_sge_offset = 0;
2794 2982
2795 roce_set_field(qpc_mask->byte_240_irrl_tail, 2983 roce_set_field(qpc_mask->byte_240_irrl_tail,
@@ -2955,13 +3143,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
2955 roce_set_field(qpc_mask->byte_56_dqpn_err, 3143 roce_set_field(qpc_mask->byte_56_dqpn_err,
2956 V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0); 3144 V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0);
2957 } 3145 }
2958 roce_set_field(context->byte_168_irrl_idx,
2959 V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
2960 V2_QPC_BYTE_168_SQ_SHIFT_BAK_S,
2961 ilog2((unsigned int)hr_qp->sq.wqe_cnt));
2962 roce_set_field(qpc_mask->byte_168_irrl_idx,
2963 V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
2964 V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 0);
2965} 3146}
2966 3147
2967static int modify_qp_init_to_rtr(struct ib_qp *ibqp, 3148static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
@@ -3271,13 +3452,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
3271 * we should set all bits of the relevant fields in context mask to 3452 * we should set all bits of the relevant fields in context mask to
3272 * 0 at the same time, else set them to 0x1. 3453 * 0 at the same time, else set them to 0x1.
3273 */ 3454 */
3274 roce_set_field(context->byte_60_qpst_mapid,
3275 V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M,
3276 V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S, attr->retry_cnt);
3277 roce_set_field(qpc_mask->byte_60_qpst_mapid,
3278 V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M,
3279 V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S, 0);
3280
3281 context->sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT); 3455 context->sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT);
3282 roce_set_field(context->byte_168_irrl_idx, 3456 roce_set_field(context->byte_168_irrl_idx,
3283 V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M, 3457 V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M,
@@ -3538,6 +3712,17 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
3538 memcpy(src_mac, gid_attr->ndev->dev_addr, ETH_ALEN); 3712 memcpy(src_mac, gid_attr->ndev->dev_addr, ETH_ALEN);
3539 } 3713 }
3540 3714
3715 if (is_vlan_dev(gid_attr->ndev)) {
3716 roce_set_bit(context->byte_76_srqn_op_en,
3717 V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1);
3718 roce_set_bit(qpc_mask->byte_76_srqn_op_en,
3719 V2_QPC_BYTE_76_RQ_VLAN_EN_S, 0);
3720 roce_set_bit(context->byte_168_irrl_idx,
3721 V2_QPC_BYTE_168_SQ_VLAN_EN_S, 1);
3722 roce_set_bit(qpc_mask->byte_168_irrl_idx,
3723 V2_QPC_BYTE_168_SQ_VLAN_EN_S, 0);
3724 }
3725
3541 roce_set_field(context->byte_24_mtu_tc, 3726 roce_set_field(context->byte_24_mtu_tc,
3542 V2_QPC_BYTE_24_VLAN_ID_M, 3727 V2_QPC_BYTE_24_VLAN_ID_M,
3543 V2_QPC_BYTE_24_VLAN_ID_S, vlan); 3728 V2_QPC_BYTE_24_VLAN_ID_S, vlan);
@@ -3584,8 +3769,15 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
3584 V2_QPC_BYTE_24_HOP_LIMIT_M, 3769 V2_QPC_BYTE_24_HOP_LIMIT_M,
3585 V2_QPC_BYTE_24_HOP_LIMIT_S, 0); 3770 V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
3586 3771
3587 roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, 3772 if (hr_dev->pci_dev->revision == 0x21 &&
3588 V2_QPC_BYTE_24_TC_S, grh->traffic_class); 3773 gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
3774 roce_set_field(context->byte_24_mtu_tc,
3775 V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
3776 grh->traffic_class >> 2);
3777 else
3778 roce_set_field(context->byte_24_mtu_tc,
3779 V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
3780 grh->traffic_class);
3589 roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, 3781 roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
3590 V2_QPC_BYTE_24_TC_S, 0); 3782 V2_QPC_BYTE_24_TC_S, 0);
3591 roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, 3783 roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
@@ -3606,9 +3798,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
3606 set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); 3798 set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
3607 3799
3608 /* Every status migrate must change state */ 3800 /* Every status migrate must change state */
3609 roce_set_field(context->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M, 3801 roce_set_field(context->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
3610 V2_QPC_BYTE_60_QP_ST_S, new_state); 3802 V2_QPC_BYTE_60_QP_ST_S, new_state);
3611 roce_set_field(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M, 3803 roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
3612 V2_QPC_BYTE_60_QP_ST_S, 0); 3804 V2_QPC_BYTE_60_QP_ST_S, 0);
3613 3805
3614 /* SW pass context to HW */ 3806 /* SW pass context to HW */
@@ -3728,7 +3920,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
3728 goto out; 3920 goto out;
3729 } 3921 }
3730 3922
3731 state = roce_get_field(context->byte_60_qpst_mapid, 3923 state = roce_get_field(context->byte_60_qpst_tempid,
3732 V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S); 3924 V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S);
3733 tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state); 3925 tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state);
3734 if (tmp_qp_state == -1) { 3926 if (tmp_qp_state == -1) {
@@ -3995,13 +4187,103 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
3995{ 4187{
3996 struct hns_roce_work *irq_work = 4188 struct hns_roce_work *irq_work =
3997 container_of(work, struct hns_roce_work, work); 4189 container_of(work, struct hns_roce_work, work);
4190 struct device *dev = irq_work->hr_dev->dev;
3998 u32 qpn = irq_work->qpn; 4191 u32 qpn = irq_work->qpn;
4192 u32 cqn = irq_work->cqn;
3999 4193
4000 switch (irq_work->event_type) { 4194 switch (irq_work->event_type) {
4195 case HNS_ROCE_EVENT_TYPE_PATH_MIG:
4196 dev_info(dev, "Path migrated succeeded.\n");
4197 break;
4198 case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
4199 dev_warn(dev, "Path migration failed.\n");
4200 break;
4201 case HNS_ROCE_EVENT_TYPE_COMM_EST:
4202 dev_info(dev, "Communication established.\n");
4203 break;
4204 case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
4205 dev_warn(dev, "Send queue drained.\n");
4206 break;
4001 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: 4207 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
4208 dev_err(dev, "Local work queue catastrophic error.\n");
4209 hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
4210 switch (irq_work->sub_type) {
4211 case HNS_ROCE_LWQCE_QPC_ERROR:
4212 dev_err(dev, "QP %d, QPC error.\n", qpn);
4213 break;
4214 case HNS_ROCE_LWQCE_MTU_ERROR:
4215 dev_err(dev, "QP %d, MTU error.\n", qpn);
4216 break;
4217 case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
4218 dev_err(dev, "QP %d, WQE BA addr error.\n", qpn);
4219 break;
4220 case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
4221 dev_err(dev, "QP %d, WQE addr error.\n", qpn);
4222 break;
4223 case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
4224 dev_err(dev, "QP %d, WQE shift error.\n", qpn);
4225 break;
4226 default:
4227 dev_err(dev, "Unhandled sub_event type %d.\n",
4228 irq_work->sub_type);
4229 break;
4230 }
4231 break;
4002 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: 4232 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
4233 dev_err(dev, "Invalid request local work queue error.\n");
4234 hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
4235 break;
4003 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: 4236 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
4237 dev_err(dev, "Local access violation work queue error.\n");
4004 hns_roce_set_qps_to_err(irq_work->hr_dev, qpn); 4238 hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
4239 switch (irq_work->sub_type) {
4240 case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
4241 dev_err(dev, "QP %d, R_key violation.\n", qpn);
4242 break;
4243 case HNS_ROCE_LAVWQE_LENGTH_ERROR:
4244 dev_err(dev, "QP %d, length error.\n", qpn);
4245 break;
4246 case HNS_ROCE_LAVWQE_VA_ERROR:
4247 dev_err(dev, "QP %d, VA error.\n", qpn);
4248 break;
4249 case HNS_ROCE_LAVWQE_PD_ERROR:
4250 dev_err(dev, "QP %d, PD error.\n", qpn);
4251 break;
4252 case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
4253 dev_err(dev, "QP %d, rw acc error.\n", qpn);
4254 break;
4255 case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
4256 dev_err(dev, "QP %d, key state error.\n", qpn);
4257 break;
4258 case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
4259 dev_err(dev, "QP %d, MR operation error.\n", qpn);
4260 break;
4261 default:
4262 dev_err(dev, "Unhandled sub_event type %d.\n",
4263 irq_work->sub_type);
4264 break;
4265 }
4266 break;
4267 case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
4268 dev_warn(dev, "SRQ limit reach.\n");
4269 break;
4270 case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
4271 dev_warn(dev, "SRQ last wqe reach.\n");
4272 break;
4273 case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
4274 dev_err(dev, "SRQ catas error.\n");
4275 break;
4276 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
4277 dev_err(dev, "CQ 0x%x access err.\n", cqn);
4278 break;
4279 case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
4280 dev_warn(dev, "CQ 0x%x overflow\n", cqn);
4281 break;
4282 case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
4283 dev_warn(dev, "DB overflow.\n");
4284 break;
4285 case HNS_ROCE_EVENT_TYPE_FLR:
4286 dev_warn(dev, "Function level reset.\n");
4005 break; 4287 break;
4006 default: 4288 default:
4007 break; 4289 break;
@@ -4011,7 +4293,8 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
4011} 4293}
4012 4294
4013static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, 4295static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
4014 struct hns_roce_eq *eq, u32 qpn) 4296 struct hns_roce_eq *eq,
4297 u32 qpn, u32 cqn)
4015{ 4298{
4016 struct hns_roce_work *irq_work; 4299 struct hns_roce_work *irq_work;
4017 4300
@@ -4022,6 +4305,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
4022 INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle); 4305 INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle);
4023 irq_work->hr_dev = hr_dev; 4306 irq_work->hr_dev = hr_dev;
4024 irq_work->qpn = qpn; 4307 irq_work->qpn = qpn;
4308 irq_work->cqn = cqn;
4025 irq_work->event_type = eq->event_type; 4309 irq_work->event_type = eq->event_type;
4026 irq_work->sub_type = eq->sub_type; 4310 irq_work->sub_type = eq->sub_type;
4027 queue_work(hr_dev->irq_workq, &(irq_work->work)); 4311 queue_work(hr_dev->irq_workq, &(irq_work->work));
@@ -4058,124 +4342,6 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
4058 hns_roce_write64_k(doorbell, eq->doorbell); 4342 hns_roce_write64_k(doorbell, eq->doorbell);
4059} 4343}
4060 4344
4061static void hns_roce_v2_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
4062 struct hns_roce_aeqe *aeqe,
4063 u32 qpn)
4064{
4065 struct device *dev = hr_dev->dev;
4066 int sub_type;
4067
4068 dev_warn(dev, "Local work queue catastrophic error.\n");
4069 sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
4070 HNS_ROCE_V2_AEQE_SUB_TYPE_S);
4071 switch (sub_type) {
4072 case HNS_ROCE_LWQCE_QPC_ERROR:
4073 dev_warn(dev, "QP %d, QPC error.\n", qpn);
4074 break;
4075 case HNS_ROCE_LWQCE_MTU_ERROR:
4076 dev_warn(dev, "QP %d, MTU error.\n", qpn);
4077 break;
4078 case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
4079 dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn);
4080 break;
4081 case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
4082 dev_warn(dev, "QP %d, WQE addr error.\n", qpn);
4083 break;
4084 case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
4085 dev_warn(dev, "QP %d, WQE shift error.\n", qpn);
4086 break;
4087 default:
4088 dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
4089 break;
4090 }
4091}
4092
4093static void hns_roce_v2_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
4094 struct hns_roce_aeqe *aeqe, u32 qpn)
4095{
4096 struct device *dev = hr_dev->dev;
4097 int sub_type;
4098
4099 dev_warn(dev, "Local access violation work queue error.\n");
4100 sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
4101 HNS_ROCE_V2_AEQE_SUB_TYPE_S);
4102 switch (sub_type) {
4103 case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
4104 dev_warn(dev, "QP %d, R_key violation.\n", qpn);
4105 break;
4106 case HNS_ROCE_LAVWQE_LENGTH_ERROR:
4107 dev_warn(dev, "QP %d, length error.\n", qpn);
4108 break;
4109 case HNS_ROCE_LAVWQE_VA_ERROR:
4110 dev_warn(dev, "QP %d, VA error.\n", qpn);
4111 break;
4112 case HNS_ROCE_LAVWQE_PD_ERROR:
4113 dev_err(dev, "QP %d, PD error.\n", qpn);
4114 break;
4115 case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
4116 dev_warn(dev, "QP %d, rw acc error.\n", qpn);
4117 break;
4118 case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
4119 dev_warn(dev, "QP %d, key state error.\n", qpn);
4120 break;
4121 case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
4122 dev_warn(dev, "QP %d, MR operation error.\n", qpn);
4123 break;
4124 default:
4125 dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
4126 break;
4127 }
4128}
4129
4130static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev,
4131 struct hns_roce_aeqe *aeqe,
4132 int event_type, u32 qpn)
4133{
4134 struct device *dev = hr_dev->dev;
4135
4136 switch (event_type) {
4137 case HNS_ROCE_EVENT_TYPE_COMM_EST:
4138 dev_warn(dev, "Communication established.\n");
4139 break;
4140 case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
4141 dev_warn(dev, "Send queue drained.\n");
4142 break;
4143 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
4144 hns_roce_v2_wq_catas_err_handle(hr_dev, aeqe, qpn);
4145 break;
4146 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
4147 dev_warn(dev, "Invalid request local work queue error.\n");
4148 break;
4149 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
4150 hns_roce_v2_local_wq_access_err_handle(hr_dev, aeqe, qpn);
4151 break;
4152 default:
4153 break;
4154 }
4155
4156 hns_roce_qp_event(hr_dev, qpn, event_type);
4157}
4158
4159static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev,
4160 struct hns_roce_aeqe *aeqe,
4161 int event_type, u32 cqn)
4162{
4163 struct device *dev = hr_dev->dev;
4164
4165 switch (event_type) {
4166 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
4167 dev_warn(dev, "CQ 0x%x access err.\n", cqn);
4168 break;
4169 case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
4170 dev_warn(dev, "CQ 0x%x overflow\n", cqn);
4171 break;
4172 default:
4173 break;
4174 }
4175
4176 hns_roce_cq_event(hr_dev, cqn, event_type);
4177}
4178
4179static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry) 4345static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
4180{ 4346{
4181 u32 buf_chk_sz; 4347 u32 buf_chk_sz;
@@ -4251,31 +4417,23 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
4251 4417
4252 switch (event_type) { 4418 switch (event_type) {
4253 case HNS_ROCE_EVENT_TYPE_PATH_MIG: 4419 case HNS_ROCE_EVENT_TYPE_PATH_MIG:
4254 dev_warn(dev, "Path migrated succeeded.\n");
4255 break;
4256 case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: 4420 case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
4257 dev_warn(dev, "Path migration failed.\n");
4258 break;
4259 case HNS_ROCE_EVENT_TYPE_COMM_EST: 4421 case HNS_ROCE_EVENT_TYPE_COMM_EST:
4260 case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: 4422 case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
4261 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: 4423 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
4262 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: 4424 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
4263 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: 4425 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
4264 hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type, 4426 hns_roce_qp_event(hr_dev, qpn, event_type);
4265 qpn);
4266 break; 4427 break;
4267 case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: 4428 case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
4268 case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: 4429 case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
4269 case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: 4430 case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
4270 dev_warn(dev, "SRQ not support.\n");
4271 break; 4431 break;
4272 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: 4432 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
4273 case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: 4433 case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
4274 hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type, 4434 hns_roce_cq_event(hr_dev, cqn, event_type);
4275 cqn);
4276 break; 4435 break;
4277 case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: 4436 case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
4278 dev_warn(dev, "DB overflow.\n");
4279 break; 4437 break;
4280 case HNS_ROCE_EVENT_TYPE_MB: 4438 case HNS_ROCE_EVENT_TYPE_MB:
4281 hns_roce_cmd_event(hr_dev, 4439 hns_roce_cmd_event(hr_dev,
@@ -4284,10 +4442,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
4284 le64_to_cpu(aeqe->event.cmd.out_param)); 4442 le64_to_cpu(aeqe->event.cmd.out_param));
4285 break; 4443 break;
4286 case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW: 4444 case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
4287 dev_warn(dev, "CEQ overflow.\n");
4288 break; 4445 break;
4289 case HNS_ROCE_EVENT_TYPE_FLR: 4446 case HNS_ROCE_EVENT_TYPE_FLR:
4290 dev_warn(dev, "Function level reset.\n");
4291 break; 4447 break;
4292 default: 4448 default:
4293 dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n", 4449 dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n",
@@ -4304,7 +4460,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
4304 dev_warn(dev, "cons_index overflow, set back to 0.\n"); 4460 dev_warn(dev, "cons_index overflow, set back to 0.\n");
4305 eq->cons_index = 0; 4461 eq->cons_index = 0;
4306 } 4462 }
4307 hns_roce_v2_init_irq_work(hr_dev, eq, qpn); 4463 hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn);
4308 } 4464 }
4309 4465
4310 set_eq_cons_index_v2(eq); 4466 set_eq_cons_index_v2(eq);
@@ -5125,6 +5281,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
5125 create_singlethread_workqueue("hns_roce_irq_workqueue"); 5281 create_singlethread_workqueue("hns_roce_irq_workqueue");
5126 if (!hr_dev->irq_workq) { 5282 if (!hr_dev->irq_workq) {
5127 dev_err(dev, "Create irq workqueue failed!\n"); 5283 dev_err(dev, "Create irq workqueue failed!\n");
5284 ret = -ENOMEM;
5128 goto err_request_irq_fail; 5285 goto err_request_irq_fail;
5129 } 5286 }
5130 5287
@@ -5195,6 +5352,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
5195 .set_mac = hns_roce_v2_set_mac, 5352 .set_mac = hns_roce_v2_set_mac,
5196 .write_mtpt = hns_roce_v2_write_mtpt, 5353 .write_mtpt = hns_roce_v2_write_mtpt,
5197 .rereg_write_mtpt = hns_roce_v2_rereg_write_mtpt, 5354 .rereg_write_mtpt = hns_roce_v2_rereg_write_mtpt,
5355 .frmr_write_mtpt = hns_roce_v2_frmr_write_mtpt,
5356 .mw_write_mtpt = hns_roce_v2_mw_write_mtpt,
5198 .write_cqc = hns_roce_v2_write_cqc, 5357 .write_cqc = hns_roce_v2_write_cqc,
5199 .set_hem = hns_roce_v2_set_hem, 5358 .set_hem = hns_roce_v2_set_hem,
5200 .clear_hem = hns_roce_v2_clear_hem, 5359 .clear_hem = hns_roce_v2_clear_hem,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 14aa308befef..8bc820635bbd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -50,6 +50,7 @@
50#define HNS_ROCE_V2_MAX_CQE_NUM 0x10000 50#define HNS_ROCE_V2_MAX_CQE_NUM 0x10000
51#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100 51#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100
52#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff 52#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff
53#define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000
53#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 54#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20
54#define HNS_ROCE_V2_UAR_NUM 256 55#define HNS_ROCE_V2_UAR_NUM 256
55#define HNS_ROCE_V2_PHY_UAR_NUM 1 56#define HNS_ROCE_V2_PHY_UAR_NUM 1
@@ -78,6 +79,7 @@
78#define HNS_ROCE_INVALID_LKEY 0x100 79#define HNS_ROCE_INVALID_LKEY 0x100
79#define HNS_ROCE_CMQ_TX_TIMEOUT 30000 80#define HNS_ROCE_CMQ_TX_TIMEOUT 30000
80#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2 81#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2
82#define HNS_ROCE_V2_RSV_QPS 8
81 83
82#define HNS_ROCE_CONTEXT_HOP_NUM 1 84#define HNS_ROCE_CONTEXT_HOP_NUM 1
83#define HNS_ROCE_MTT_HOP_NUM 1 85#define HNS_ROCE_MTT_HOP_NUM 1
@@ -201,6 +203,7 @@ enum {
201 203
202/* CMQ command */ 204/* CMQ command */
203enum hns_roce_opcode_type { 205enum hns_roce_opcode_type {
206 HNS_QUERY_FW_VER = 0x0001,
204 HNS_ROCE_OPC_QUERY_HW_VER = 0x8000, 207 HNS_ROCE_OPC_QUERY_HW_VER = 0x8000,
205 HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001, 208 HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001,
206 HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004, 209 HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004,
@@ -324,6 +327,7 @@ struct hns_roce_v2_cq_context {
324 327
325enum{ 328enum{
326 V2_MPT_ST_VALID = 0x1, 329 V2_MPT_ST_VALID = 0x1,
330 V2_MPT_ST_FREE = 0x2,
327}; 331};
328 332
329enum hns_roce_v2_qp_state { 333enum hns_roce_v2_qp_state {
@@ -350,7 +354,7 @@ struct hns_roce_v2_qp_context {
350 __le32 dmac; 354 __le32 dmac;
351 __le32 byte_52_udpspn_dmac; 355 __le32 byte_52_udpspn_dmac;
352 __le32 byte_56_dqpn_err; 356 __le32 byte_56_dqpn_err;
353 __le32 byte_60_qpst_mapid; 357 __le32 byte_60_qpst_tempid;
354 __le32 qkey_xrcd; 358 __le32 qkey_xrcd;
355 __le32 byte_68_rq_db; 359 __le32 byte_68_rq_db;
356 __le32 rq_db_record_addr; 360 __le32 rq_db_record_addr;
@@ -492,26 +496,15 @@ struct hns_roce_v2_qp_context {
492#define V2_QPC_BYTE_56_LP_PKTN_INI_S 28 496#define V2_QPC_BYTE_56_LP_PKTN_INI_S 28
493#define V2_QPC_BYTE_56_LP_PKTN_INI_M GENMASK(31, 28) 497#define V2_QPC_BYTE_56_LP_PKTN_INI_M GENMASK(31, 28)
494 498
495#define V2_QPC_BYTE_60_MAPID_S 0 499#define V2_QPC_BYTE_60_TEMPID_S 0
496#define V2_QPC_BYTE_60_MAPID_M GENMASK(12, 0) 500#define V2_QPC_BYTE_60_TEMPID_M GENMASK(7, 0)
497 501
498#define V2_QPC_BYTE_60_INNER_MAP_IND_S 13 502#define V2_QPC_BYTE_60_SCC_TOKEN_S 8
503#define V2_QPC_BYTE_60_SCC_TOKEN_M GENMASK(26, 8)
499 504
500#define V2_QPC_BYTE_60_SQ_MAP_IND_S 14 505#define V2_QPC_BYTE_60_SQ_DB_DOING_S 27
501 506
502#define V2_QPC_BYTE_60_RQ_MAP_IND_S 15 507#define V2_QPC_BYTE_60_RQ_DB_DOING_S 28
503
504#define V2_QPC_BYTE_60_TEMPID_S 16
505#define V2_QPC_BYTE_60_TEMPID_M GENMASK(22, 16)
506
507#define V2_QPC_BYTE_60_EXT_MAP_IND_S 23
508
509#define V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S 24
510#define V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M GENMASK(26, 24)
511
512#define V2_QPC_BYTE_60_SQ_RLS_IND_S 27
513
514#define V2_QPC_BYTE_60_SQ_EXT_IND_S 28
515 508
516#define V2_QPC_BYTE_60_QP_ST_S 29 509#define V2_QPC_BYTE_60_QP_ST_S 29
517#define V2_QPC_BYTE_60_QP_ST_M GENMASK(31, 29) 510#define V2_QPC_BYTE_60_QP_ST_M GENMASK(31, 29)
@@ -534,6 +527,7 @@ struct hns_roce_v2_qp_context {
534 527
535#define V2_QPC_BYTE_76_RQIE_S 28 528#define V2_QPC_BYTE_76_RQIE_S 28
536 529
530#define V2_QPC_BYTE_76_RQ_VLAN_EN_S 30
537#define V2_QPC_BYTE_80_RX_CQN_S 0 531#define V2_QPC_BYTE_80_RX_CQN_S 0
538#define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0) 532#define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0)
539 533
@@ -588,7 +582,7 @@ struct hns_roce_v2_qp_context {
588#define V2_QPC_BYTE_140_RR_MAX_S 12 582#define V2_QPC_BYTE_140_RR_MAX_S 12
589#define V2_QPC_BYTE_140_RR_MAX_M GENMASK(14, 12) 583#define V2_QPC_BYTE_140_RR_MAX_M GENMASK(14, 12)
590 584
591#define V2_QPC_BYTE_140_RSVD_RAQ_MAP_S 15 585#define V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S 15
592 586
593#define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S 16 587#define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S 16
594#define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M GENMASK(23, 16) 588#define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M GENMASK(23, 16)
@@ -599,8 +593,6 @@ struct hns_roce_v2_qp_context {
599#define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S 0 593#define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S 0
600#define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M GENMASK(23, 0) 594#define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M GENMASK(23, 0)
601 595
602#define V2_QPC_BYTE_144_RAQ_RTY_INI_IND_S 24
603
604#define V2_QPC_BYTE_144_RAQ_CREDIT_S 25 596#define V2_QPC_BYTE_144_RAQ_CREDIT_S 25
605#define V2_QPC_BYTE_144_RAQ_CREDIT_M GENMASK(29, 25) 597#define V2_QPC_BYTE_144_RAQ_CREDIT_M GENMASK(29, 25)
606 598
@@ -637,9 +629,10 @@ struct hns_roce_v2_qp_context {
637#define V2_QPC_BYTE_168_LP_SGEN_INI_S 22 629#define V2_QPC_BYTE_168_LP_SGEN_INI_S 22
638#define V2_QPC_BYTE_168_LP_SGEN_INI_M GENMASK(23, 22) 630#define V2_QPC_BYTE_168_LP_SGEN_INI_M GENMASK(23, 22)
639 631
640#define V2_QPC_BYTE_168_SQ_SHIFT_BAK_S 24 632#define V2_QPC_BYTE_168_SQ_VLAN_EN_S 24
641#define V2_QPC_BYTE_168_SQ_SHIFT_BAK_M GENMASK(27, 24) 633#define V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S 25
642 634#define V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S 26
635#define V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S 27
643#define V2_QPC_BYTE_168_IRRL_IDX_LSB_S 28 636#define V2_QPC_BYTE_168_IRRL_IDX_LSB_S 28
644#define V2_QPC_BYTE_168_IRRL_IDX_LSB_M GENMASK(31, 28) 637#define V2_QPC_BYTE_168_IRRL_IDX_LSB_M GENMASK(31, 28)
645 638
@@ -725,6 +718,10 @@ struct hns_roce_v2_qp_context {
725#define V2_QPC_BYTE_232_IRRL_SGE_IDX_S 20 718#define V2_QPC_BYTE_232_IRRL_SGE_IDX_S 20
726#define V2_QPC_BYTE_232_IRRL_SGE_IDX_M GENMASK(28, 20) 719#define V2_QPC_BYTE_232_IRRL_SGE_IDX_M GENMASK(28, 20)
727 720
721#define V2_QPC_BYTE_232_SO_LP_VLD_S 29
722#define V2_QPC_BYTE_232_FENCE_LP_VLD_S 30
723#define V2_QPC_BYTE_232_IRRL_LP_VLD_S 31
724
728#define V2_QPC_BYTE_240_IRRL_TAIL_REAL_S 0 725#define V2_QPC_BYTE_240_IRRL_TAIL_REAL_S 0
729#define V2_QPC_BYTE_240_IRRL_TAIL_REAL_M GENMASK(7, 0) 726#define V2_QPC_BYTE_240_IRRL_TAIL_REAL_M GENMASK(7, 0)
730 727
@@ -743,6 +740,9 @@ struct hns_roce_v2_qp_context {
743#define V2_QPC_BYTE_244_RNR_CNT_S 27 740#define V2_QPC_BYTE_244_RNR_CNT_S 27
744#define V2_QPC_BYTE_244_RNR_CNT_M GENMASK(29, 27) 741#define V2_QPC_BYTE_244_RNR_CNT_M GENMASK(29, 27)
745 742
743#define V2_QPC_BYTE_244_LCL_OP_FLG_S 30
744#define V2_QPC_BYTE_244_IRRL_RD_FLG_S 31
745
746#define V2_QPC_BYTE_248_IRRL_PSN_S 0 746#define V2_QPC_BYTE_248_IRRL_PSN_S 0
747#define V2_QPC_BYTE_248_IRRL_PSN_M GENMASK(23, 0) 747#define V2_QPC_BYTE_248_IRRL_PSN_M GENMASK(23, 0)
748 748
@@ -818,6 +818,11 @@ struct hns_roce_v2_cqe {
818#define V2_CQE_BYTE_28_PORT_TYPE_S 16 818#define V2_CQE_BYTE_28_PORT_TYPE_S 16
819#define V2_CQE_BYTE_28_PORT_TYPE_M GENMASK(17, 16) 819#define V2_CQE_BYTE_28_PORT_TYPE_M GENMASK(17, 16)
820 820
821#define V2_CQE_BYTE_28_VID_S 18
822#define V2_CQE_BYTE_28_VID_M GENMASK(29, 18)
823
824#define V2_CQE_BYTE_28_VID_VLD_S 30
825
821#define V2_CQE_BYTE_32_RMT_QPN_S 0 826#define V2_CQE_BYTE_32_RMT_QPN_S 0
822#define V2_CQE_BYTE_32_RMT_QPN_M GENMASK(23, 0) 827#define V2_CQE_BYTE_32_RMT_QPN_M GENMASK(23, 0)
823 828
@@ -878,8 +883,19 @@ struct hns_roce_v2_mpt_entry {
878 883
879#define V2_MPT_BYTE_8_LW_EN_S 7 884#define V2_MPT_BYTE_8_LW_EN_S 7
880 885
886#define V2_MPT_BYTE_8_MW_CNT_S 8
887#define V2_MPT_BYTE_8_MW_CNT_M GENMASK(31, 8)
888
889#define V2_MPT_BYTE_12_FRE_S 0
890
881#define V2_MPT_BYTE_12_PA_S 1 891#define V2_MPT_BYTE_12_PA_S 1
882 892
893#define V2_MPT_BYTE_12_MR_MW_S 4
894
895#define V2_MPT_BYTE_12_BPD_S 5
896
897#define V2_MPT_BYTE_12_BQP_S 6
898
883#define V2_MPT_BYTE_12_INNER_PA_VLD_S 7 899#define V2_MPT_BYTE_12_INNER_PA_VLD_S 7
884 900
885#define V2_MPT_BYTE_12_MW_BIND_QPN_S 8 901#define V2_MPT_BYTE_12_MW_BIND_QPN_S 8
@@ -988,6 +1004,8 @@ struct hns_roce_v2_ud_send_wqe {
988#define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24 1004#define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24
989#define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24) 1005#define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24)
990 1006
1007#define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30
1008
991#define V2_UD_SEND_WQE_BYTE_40_LBI_S 31 1009#define V2_UD_SEND_WQE_BYTE_40_LBI_S 31
992 1010
993#define V2_UD_SEND_WQE_DMAC_0_S 0 1011#define V2_UD_SEND_WQE_DMAC_0_S 0
@@ -1042,6 +1060,16 @@ struct hns_roce_v2_rc_send_wqe {
1042 1060
1043#define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12 1061#define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12
1044 1062
1063#define V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S 19
1064
1065#define V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S 20
1066
1067#define V2_RC_FRMR_WQE_BYTE_4_RR_S 21
1068
1069#define V2_RC_FRMR_WQE_BYTE_4_RW_S 22
1070
1071#define V2_RC_FRMR_WQE_BYTE_4_LW_S 23
1072
1045#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0 1073#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0
1046#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0) 1074#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0)
1047 1075
@@ -1051,6 +1079,16 @@ struct hns_roce_v2_rc_send_wqe {
1051#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0 1079#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
1052#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0) 1080#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
1053 1081
1082struct hns_roce_wqe_frmr_seg {
1083 __le32 pbl_size;
1084 __le32 mode_buf_pg_sz;
1085};
1086
1087#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S 4
1088#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M GENMASK(7, 4)
1089
1090#define V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S 8
1091
1054struct hns_roce_v2_wqe_data_seg { 1092struct hns_roce_v2_wqe_data_seg {
1055 __le32 len; 1093 __le32 len;
1056 __le32 lkey; 1094 __le32 lkey;
@@ -1068,6 +1106,11 @@ struct hns_roce_query_version {
1068 __le32 rsv[5]; 1106 __le32 rsv[5];
1069}; 1107};
1070 1108
1109struct hns_roce_query_fw_info {
1110 __le32 fw_ver;
1111 __le32 rsv[5];
1112};
1113
1071struct hns_roce_cfg_llm_a { 1114struct hns_roce_cfg_llm_a {
1072 __le32 base_addr_l; 1115 __le32 base_addr_l;
1073 __le32 base_addr_h; 1116 __le32 base_addr_h;
@@ -1564,4 +1607,9 @@ struct hns_roce_eq_context {
1564#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0 1607#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0
1565#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0) 1608#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0)
1566 1609
1610struct hns_roce_wqe_atomic_seg {
1611 __le64 fetchadd_swap_data;
1612 __le64 cmp_data;
1613};
1614
1567#endif 1615#endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index c5cae9a38c04..1b3ee514f2ef 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -196,6 +196,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
196 196
197 memset(props, 0, sizeof(*props)); 197 memset(props, 0, sizeof(*props));
198 198
199 props->fw_ver = hr_dev->caps.fw_ver;
199 props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid); 200 props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid);
200 props->max_mr_size = (u64)(~(0ULL)); 201 props->max_mr_size = (u64)(~(0ULL));
201 props->page_size_cap = hr_dev->caps.page_size_cap; 202 props->page_size_cap = hr_dev->caps.page_size_cap;
@@ -215,7 +216,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
215 props->max_pd = hr_dev->caps.num_pds; 216 props->max_pd = hr_dev->caps.num_pds;
216 props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma; 217 props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma;
217 props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma; 218 props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma;
218 props->atomic_cap = IB_ATOMIC_NONE; 219 props->atomic_cap = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_ATOMIC ?
220 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
219 props->max_pkeys = 1; 221 props->max_pkeys = 1;
220 props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay; 222 props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
221 223
@@ -344,8 +346,6 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
344 if (ret) 346 if (ret)
345 goto error_fail_uar_alloc; 347 goto error_fail_uar_alloc;
346 348
347 INIT_LIST_HEAD(&context->vma_list);
348 mutex_init(&context->vma_list_mutex);
349 if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { 349 if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
350 INIT_LIST_HEAD(&context->page_list); 350 INIT_LIST_HEAD(&context->page_list);
351 mutex_init(&context->page_mutex); 351 mutex_init(&context->page_mutex);
@@ -376,76 +376,34 @@ static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
376 return 0; 376 return 0;
377} 377}
378 378
379static void hns_roce_vma_open(struct vm_area_struct *vma)
380{
381 vma->vm_ops = NULL;
382}
383
384static void hns_roce_vma_close(struct vm_area_struct *vma)
385{
386 struct hns_roce_vma_data *vma_data;
387
388 vma_data = (struct hns_roce_vma_data *)vma->vm_private_data;
389 vma_data->vma = NULL;
390 mutex_lock(vma_data->vma_list_mutex);
391 list_del(&vma_data->list);
392 mutex_unlock(vma_data->vma_list_mutex);
393 kfree(vma_data);
394}
395
396static const struct vm_operations_struct hns_roce_vm_ops = {
397 .open = hns_roce_vma_open,
398 .close = hns_roce_vma_close,
399};
400
401static int hns_roce_set_vma_data(struct vm_area_struct *vma,
402 struct hns_roce_ucontext *context)
403{
404 struct list_head *vma_head = &context->vma_list;
405 struct hns_roce_vma_data *vma_data;
406
407 vma_data = kzalloc(sizeof(*vma_data), GFP_KERNEL);
408 if (!vma_data)
409 return -ENOMEM;
410
411 vma_data->vma = vma;
412 vma_data->vma_list_mutex = &context->vma_list_mutex;
413 vma->vm_private_data = vma_data;
414 vma->vm_ops = &hns_roce_vm_ops;
415
416 mutex_lock(&context->vma_list_mutex);
417 list_add(&vma_data->list, vma_head);
418 mutex_unlock(&context->vma_list_mutex);
419
420 return 0;
421}
422
423static int hns_roce_mmap(struct ib_ucontext *context, 379static int hns_roce_mmap(struct ib_ucontext *context,
424 struct vm_area_struct *vma) 380 struct vm_area_struct *vma)
425{ 381{
426 struct hns_roce_dev *hr_dev = to_hr_dev(context->device); 382 struct hns_roce_dev *hr_dev = to_hr_dev(context->device);
427 383
428 if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0) 384 switch (vma->vm_pgoff) {
429 return -EINVAL; 385 case 0:
386 return rdma_user_mmap_io(context, vma,
387 to_hr_ucontext(context)->uar.pfn,
388 PAGE_SIZE,
389 pgprot_noncached(vma->vm_page_prot));
390
391 /* vm_pgoff: 1 -- TPTR */
392 case 1:
393 if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size)
394 return -EINVAL;
395 /*
396 * FIXME: using io_remap_pfn_range on the dma address returned
397 * by dma_alloc_coherent is totally wrong.
398 */
399 return rdma_user_mmap_io(context, vma,
400 hr_dev->tptr_dma_addr >> PAGE_SHIFT,
401 hr_dev->tptr_size,
402 vma->vm_page_prot);
430 403
431 if (vma->vm_pgoff == 0) { 404 default:
432 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
433 if (io_remap_pfn_range(vma, vma->vm_start,
434 to_hr_ucontext(context)->uar.pfn,
435 PAGE_SIZE, vma->vm_page_prot))
436 return -EAGAIN;
437 } else if (vma->vm_pgoff == 1 && hr_dev->tptr_dma_addr &&
438 hr_dev->tptr_size) {
439 /* vm_pgoff: 1 -- TPTR */
440 if (io_remap_pfn_range(vma, vma->vm_start,
441 hr_dev->tptr_dma_addr >> PAGE_SHIFT,
442 hr_dev->tptr_size,
443 vma->vm_page_prot))
444 return -EAGAIN;
445 } else
446 return -EINVAL; 405 return -EINVAL;
447 406 }
448 return hns_roce_set_vma_data(vma, to_hr_ucontext(context));
449} 407}
450 408
451static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num, 409static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
@@ -471,21 +429,6 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
471 429
472static void hns_roce_disassociate_ucontext(struct ib_ucontext *ibcontext) 430static void hns_roce_disassociate_ucontext(struct ib_ucontext *ibcontext)
473{ 431{
474 struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
475 struct hns_roce_vma_data *vma_data, *n;
476 struct vm_area_struct *vma;
477
478 mutex_lock(&context->vma_list_mutex);
479 list_for_each_entry_safe(vma_data, n, &context->vma_list, list) {
480 vma = vma_data->vma;
481 zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
482
483 vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
484 vma->vm_ops = NULL;
485 list_del(&vma_data->list);
486 kfree(vma_data);
487 }
488 mutex_unlock(&context->vma_list_mutex);
489} 432}
490 433
491static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) 434static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
@@ -508,7 +451,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
508 spin_lock_init(&iboe->lock); 451 spin_lock_init(&iboe->lock);
509 452
510 ib_dev = &hr_dev->ib_dev; 453 ib_dev = &hr_dev->ib_dev;
511 strlcpy(ib_dev->name, "hns_%d", IB_DEVICE_NAME_MAX);
512 454
513 ib_dev->owner = THIS_MODULE; 455 ib_dev->owner = THIS_MODULE;
514 ib_dev->node_type = RDMA_NODE_IB_CA; 456 ib_dev->node_type = RDMA_NODE_IB_CA;
@@ -584,12 +526,27 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
584 ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); 526 ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR);
585 } 527 }
586 528
529 /* MW */
530 if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) {
531 ib_dev->alloc_mw = hns_roce_alloc_mw;
532 ib_dev->dealloc_mw = hns_roce_dealloc_mw;
533 ib_dev->uverbs_cmd_mask |=
534 (1ULL << IB_USER_VERBS_CMD_ALLOC_MW) |
535 (1ULL << IB_USER_VERBS_CMD_DEALLOC_MW);
536 }
537
538 /* FRMR */
539 if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) {
540 ib_dev->alloc_mr = hns_roce_alloc_mr;
541 ib_dev->map_mr_sg = hns_roce_map_mr_sg;
542 }
543
587 /* OTHERS */ 544 /* OTHERS */
588 ib_dev->get_port_immutable = hns_roce_port_immutable; 545 ib_dev->get_port_immutable = hns_roce_port_immutable;
589 ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext; 546 ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext;
590 547
591 ib_dev->driver_id = RDMA_DRIVER_HNS; 548 ib_dev->driver_id = RDMA_DRIVER_HNS;
592 ret = ib_register_device(ib_dev, NULL); 549 ret = ib_register_device(ib_dev, "hns_%d", NULL);
593 if (ret) { 550 if (ret) {
594 dev_err(dev, "ib_register_device failed!\n"); 551 dev_err(dev, "ib_register_device failed!\n");
595 return ret; 552 return ret;
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index eb26a5f6fc58..521ad2aa3a4e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -329,7 +329,7 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
329 u64 bt_idx; 329 u64 bt_idx;
330 u64 size; 330 u64 size;
331 331
332 mhop_num = hr_dev->caps.pbl_hop_num; 332 mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
333 pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); 333 pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
334 pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8); 334 pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
335 335
@@ -351,7 +351,7 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
351 351
352 mr->pbl_size = npages; 352 mr->pbl_size = npages;
353 mr->pbl_ba = mr->pbl_dma_addr; 353 mr->pbl_ba = mr->pbl_dma_addr;
354 mr->pbl_hop_num = hr_dev->caps.pbl_hop_num; 354 mr->pbl_hop_num = mhop_num;
355 mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; 355 mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
356 mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; 356 mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
357 return 0; 357 return 0;
@@ -511,7 +511,6 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
511 mr->key = hw_index_to_key(index); /* MR key */ 511 mr->key = hw_index_to_key(index); /* MR key */
512 512
513 if (size == ~0ull) { 513 if (size == ~0ull) {
514 mr->type = MR_TYPE_DMA;
515 mr->pbl_buf = NULL; 514 mr->pbl_buf = NULL;
516 mr->pbl_dma_addr = 0; 515 mr->pbl_dma_addr = 0;
517 /* PBL multi-hop addressing parameters */ 516 /* PBL multi-hop addressing parameters */
@@ -522,7 +521,6 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
522 mr->pbl_l1_dma_addr = NULL; 521 mr->pbl_l1_dma_addr = NULL;
523 mr->pbl_l0_dma_addr = 0; 522 mr->pbl_l0_dma_addr = 0;
524 } else { 523 } else {
525 mr->type = MR_TYPE_MR;
526 if (!hr_dev->caps.pbl_hop_num) { 524 if (!hr_dev->caps.pbl_hop_num) {
527 mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, 525 mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
528 &(mr->pbl_dma_addr), 526 &(mr->pbl_dma_addr),
@@ -548,9 +546,9 @@ static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
548 u32 mhop_num; 546 u32 mhop_num;
549 u64 bt_idx; 547 u64 bt_idx;
550 548
551 npages = ib_umem_page_count(mr->umem); 549 npages = mr->pbl_size;
552 pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); 550 pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
553 mhop_num = hr_dev->caps.pbl_hop_num; 551 mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num;
554 552
555 if (mhop_num == HNS_ROCE_HOP_NUM_0) 553 if (mhop_num == HNS_ROCE_HOP_NUM_0)
556 return; 554 return;
@@ -636,7 +634,8 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
636 } 634 }
637 635
638 if (mr->size != ~0ULL) { 636 if (mr->size != ~0ULL) {
639 npages = ib_umem_page_count(mr->umem); 637 if (mr->type == MR_TYPE_MR)
638 npages = ib_umem_page_count(mr->umem);
640 639
641 if (!hr_dev->caps.pbl_hop_num) 640 if (!hr_dev->caps.pbl_hop_num)
642 dma_free_coherent(dev, (unsigned int)(npages * 8), 641 dma_free_coherent(dev, (unsigned int)(npages * 8),
@@ -674,7 +673,10 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
674 goto err_table; 673 goto err_table;
675 } 674 }
676 675
677 ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx); 676 if (mr->type != MR_TYPE_FRMR)
677 ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
678 else
679 ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr);
678 if (ret) { 680 if (ret) {
679 dev_err(dev, "Write mtpt fail!\n"); 681 dev_err(dev, "Write mtpt fail!\n");
680 goto err_page; 682 goto err_page;
@@ -855,6 +857,8 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
855 if (mr == NULL) 857 if (mr == NULL)
856 return ERR_PTR(-ENOMEM); 858 return ERR_PTR(-ENOMEM);
857 859
860 mr->type = MR_TYPE_DMA;
861
858 /* Allocate memory region key */ 862 /* Allocate memory region key */
859 ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0, 863 ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
860 ~0ULL, acc, 0, mr); 864 ~0ULL, acc, 0, mr);
@@ -1031,6 +1035,8 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1031 } 1035 }
1032 } 1036 }
1033 1037
1038 mr->type = MR_TYPE_MR;
1039
1034 ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length, 1040 ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
1035 access_flags, n, mr); 1041 access_flags, n, mr);
1036 if (ret) 1042 if (ret)
@@ -1201,3 +1207,193 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr)
1201 1207
1202 return ret; 1208 return ret;
1203} 1209}
1210
1211struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
1212 u32 max_num_sg)
1213{
1214 struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
1215 struct device *dev = hr_dev->dev;
1216 struct hns_roce_mr *mr;
1217 u64 length;
1218 u32 page_size;
1219 int ret;
1220
1221 page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT);
1222 length = max_num_sg * page_size;
1223
1224 if (mr_type != IB_MR_TYPE_MEM_REG)
1225 return ERR_PTR(-EINVAL);
1226
1227 if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
1228 dev_err(dev, "max_num_sg larger than %d\n",
1229 HNS_ROCE_FRMR_MAX_PA);
1230 return ERR_PTR(-EINVAL);
1231 }
1232
1233 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1234 if (!mr)
1235 return ERR_PTR(-ENOMEM);
1236
1237 mr->type = MR_TYPE_FRMR;
1238
1239 /* Allocate memory region key */
1240 ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length,
1241 0, max_num_sg, mr);
1242 if (ret)
1243 goto err_free;
1244
1245 ret = hns_roce_mr_enable(hr_dev, mr);
1246 if (ret)
1247 goto err_mr;
1248
1249 mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
1250 mr->umem = NULL;
1251
1252 return &mr->ibmr;
1253
1254err_mr:
1255 hns_roce_mr_free(to_hr_dev(pd->device), mr);
1256
1257err_free:
1258 kfree(mr);
1259 return ERR_PTR(ret);
1260}
1261
1262static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
1263{
1264 struct hns_roce_mr *mr = to_hr_mr(ibmr);
1265
1266 mr->pbl_buf[mr->npages++] = cpu_to_le64(addr);
1267
1268 return 0;
1269}
1270
1271int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
1272 unsigned int *sg_offset)
1273{
1274 struct hns_roce_mr *mr = to_hr_mr(ibmr);
1275
1276 mr->npages = 0;
1277
1278 return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
1279}
1280
1281static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
1282 struct hns_roce_mw *mw)
1283{
1284 struct device *dev = hr_dev->dev;
1285 int ret;
1286
1287 if (mw->enabled) {
1288 ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mw->rkey)
1289 & (hr_dev->caps.num_mtpts - 1));
1290 if (ret)
1291 dev_warn(dev, "MW HW2SW_MPT failed (%d)\n", ret);
1292
1293 hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
1294 key_to_hw_index(mw->rkey));
1295 }
1296
1297 hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
1298 key_to_hw_index(mw->rkey), BITMAP_NO_RR);
1299}
1300
1301static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
1302 struct hns_roce_mw *mw)
1303{
1304 struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
1305 struct hns_roce_cmd_mailbox *mailbox;
1306 struct device *dev = hr_dev->dev;
1307 unsigned long mtpt_idx = key_to_hw_index(mw->rkey);
1308 int ret;
1309
1310 /* prepare HEM entry memory */
1311 ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
1312 if (ret)
1313 return ret;
1314
1315 mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
1316 if (IS_ERR(mailbox)) {
1317 ret = PTR_ERR(mailbox);
1318 goto err_table;
1319 }
1320
1321 ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw);
1322 if (ret) {
1323 dev_err(dev, "MW write mtpt fail!\n");
1324 goto err_page;
1325 }
1326
1327 ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
1328 mtpt_idx & (hr_dev->caps.num_mtpts - 1));
1329 if (ret) {
1330 dev_err(dev, "MW sw2hw_mpt failed (%d)\n", ret);
1331 goto err_page;
1332 }
1333
1334 mw->enabled = 1;
1335
1336 hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1337
1338 return 0;
1339
1340err_page:
1341 hns_roce_free_cmd_mailbox(hr_dev, mailbox);
1342
1343err_table:
1344 hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
1345
1346 return ret;
1347}
1348
1349struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
1350 struct ib_udata *udata)
1351{
1352 struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device);
1353 struct hns_roce_mw *mw;
1354 unsigned long index = 0;
1355 int ret;
1356
1357 mw = kmalloc(sizeof(*mw), GFP_KERNEL);
1358 if (!mw)
1359 return ERR_PTR(-ENOMEM);
1360
1361 /* Allocate a key for mw from bitmap */
1362 ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
1363 if (ret)
1364 goto err_bitmap;
1365
1366 mw->rkey = hw_index_to_key(index);
1367
1368 mw->ibmw.rkey = mw->rkey;
1369 mw->ibmw.type = type;
1370 mw->pdn = to_hr_pd(ib_pd)->pdn;
1371 mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
1372 mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
1373 mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
1374
1375 ret = hns_roce_mw_enable(hr_dev, mw);
1376 if (ret)
1377 goto err_mw;
1378
1379 return &mw->ibmw;
1380
1381err_mw:
1382 hns_roce_mw_free(hr_dev, mw);
1383
1384err_bitmap:
1385 kfree(mw);
1386
1387 return ERR_PTR(ret);
1388}
1389
1390int hns_roce_dealloc_mw(struct ib_mw *ibmw)
1391{
1392 struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
1393 struct hns_roce_mw *mw = to_hr_mw(ibmw);
1394
1395 hns_roce_mw_free(hr_dev, mw);
1396 kfree(mw);
1397
1398 return 0;
1399}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index efb7e961ca65..5ebf481a39d9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -31,6 +31,7 @@
31 * SOFTWARE. 31 * SOFTWARE.
32 */ 32 */
33 33
34#include <linux/pci.h>
34#include <linux/platform_device.h> 35#include <linux/platform_device.h>
35#include <rdma/ib_addr.h> 36#include <rdma/ib_addr.h>
36#include <rdma/ib_umem.h> 37#include <rdma/ib_umem.h>
@@ -343,6 +344,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
343{ 344{
344 u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz); 345 u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz);
345 u8 max_sq_stride = ilog2(roundup_sq_stride); 346 u8 max_sq_stride = ilog2(roundup_sq_stride);
347 u32 ex_sge_num;
346 u32 page_size; 348 u32 page_size;
347 u32 max_cnt; 349 u32 max_cnt;
348 350
@@ -372,7 +374,18 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
372 if (hr_qp->sq.max_gs > 2) 374 if (hr_qp->sq.max_gs > 2)
373 hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * 375 hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
374 (hr_qp->sq.max_gs - 2)); 376 (hr_qp->sq.max_gs - 2));
377
378 if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision == 0x20)) {
379 if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
380 dev_err(hr_dev->dev,
381 "The extended sge cnt error! sge_cnt=%d\n",
382 hr_qp->sge.sge_cnt);
383 return -EINVAL;
384 }
385 }
386
375 hr_qp->sge.sge_shift = 4; 387 hr_qp->sge.sge_shift = 4;
388 ex_sge_num = hr_qp->sge.sge_cnt;
376 389
377 /* Get buf size, SQ and RQ are aligned to page_szie */ 390 /* Get buf size, SQ and RQ are aligned to page_szie */
378 if (hr_dev->caps.max_sq_sg <= 2) { 391 if (hr_dev->caps.max_sq_sg <= 2) {
@@ -386,6 +399,8 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
386 hr_qp->sq.wqe_shift), PAGE_SIZE); 399 hr_qp->sq.wqe_shift), PAGE_SIZE);
387 } else { 400 } else {
388 page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); 401 page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
402 hr_qp->sge.sge_cnt =
403 max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num);
389 hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << 404 hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
390 hr_qp->rq.wqe_shift), page_size) + 405 hr_qp->rq.wqe_shift), page_size) +
391 HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt << 406 HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt <<
@@ -394,7 +409,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
394 hr_qp->sq.wqe_shift), page_size); 409 hr_qp->sq.wqe_shift), page_size);
395 410
396 hr_qp->sq.offset = 0; 411 hr_qp->sq.offset = 0;
397 if (hr_qp->sge.sge_cnt) { 412 if (ex_sge_num) {
398 hr_qp->sge.offset = HNS_ROCE_ALOGN_UP( 413 hr_qp->sge.offset = HNS_ROCE_ALOGN_UP(
399 (hr_qp->sq.wqe_cnt << 414 (hr_qp->sq.wqe_cnt <<
400 hr_qp->sq.wqe_shift), 415 hr_qp->sq.wqe_shift),
@@ -465,6 +480,14 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
465 hr_qp->sge.sge_shift = 4; 480 hr_qp->sge.sge_shift = 4;
466 } 481 }
467 482
483 if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) {
484 if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
485 dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n",
486 hr_qp->sge.sge_cnt);
487 return -EINVAL;
488 }
489 }
490
468 /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ 491 /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */
469 page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); 492 page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
470 hr_qp->sq.offset = 0; 493 hr_qp->sq.offset = 0;
@@ -472,6 +495,8 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
472 page_size); 495 page_size);
473 496
474 if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) { 497 if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) {
498 hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift),
499 (u32)hr_qp->sge.sge_cnt);
475 hr_qp->sge.offset = size; 500 hr_qp->sge.offset = size;
476 size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt << 501 size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt <<
477 hr_qp->sge.sge_shift, page_size); 502 hr_qp->sge.sge_shift, page_size);
@@ -952,8 +977,8 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
952 } 977 }
953 } 978 }
954 979
955 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, 980 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
956 IB_LINK_LAYER_ETHERNET)) { 981 attr_mask)) {
957 dev_err(dev, "ib_modify_qp_is_ok failed\n"); 982 dev_err(dev, "ib_modify_qp_is_ok failed\n");
958 goto out; 983 goto out;
959 } 984 }
@@ -1106,14 +1131,20 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
1106{ 1131{
1107 struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; 1132 struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
1108 int reserved_from_top = 0; 1133 int reserved_from_top = 0;
1134 int reserved_from_bot;
1109 int ret; 1135 int ret;
1110 1136
1111 spin_lock_init(&qp_table->lock); 1137 spin_lock_init(&qp_table->lock);
1112 INIT_RADIX_TREE(&hr_dev->qp_table_tree, GFP_ATOMIC); 1138 INIT_RADIX_TREE(&hr_dev->qp_table_tree, GFP_ATOMIC);
1113 1139
1114 /* A port include two SQP, six port total 12 */ 1140 /* In hw v1, a port include two SQP, six ports total 12 */
1141 if (hr_dev->caps.max_sq_sg <= 2)
1142 reserved_from_bot = SQP_NUM;
1143 else
1144 reserved_from_bot = hr_dev->caps.reserved_qps;
1145
1115 ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps, 1146 ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps,
1116 hr_dev->caps.num_qps - 1, SQP_NUM, 1147 hr_dev->caps.num_qps - 1, reserved_from_bot,
1117 reserved_from_top); 1148 reserved_from_top);
1118 if (ret) { 1149 if (ret) {
1119 dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n", 1150 dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n",
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 423818a7d333..771eb6bd0785 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -1689,7 +1689,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev,
1689 unsigned long flags; 1689 unsigned long flags;
1690 1690
1691 rtnl_lock(); 1691 rtnl_lock();
1692 for_each_netdev_rcu(&init_net, ip_dev) { 1692 for_each_netdev(&init_net, ip_dev) {
1693 if ((((rdma_vlan_dev_vlan_id(ip_dev) < I40IW_NO_VLAN) && 1693 if ((((rdma_vlan_dev_vlan_id(ip_dev) < I40IW_NO_VLAN) &&
1694 (rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) || 1694 (rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) ||
1695 (ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) { 1695 (ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) {
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index e2e6c74a7452..102875872bea 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -2135,10 +2135,10 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr)
2135} 2135}
2136 2136
2137/** 2137/**
2138 * i40iw_show_rev 2138 * hw_rev_show
2139 */ 2139 */
2140static ssize_t i40iw_show_rev(struct device *dev, 2140static ssize_t hw_rev_show(struct device *dev,
2141 struct device_attribute *attr, char *buf) 2141 struct device_attribute *attr, char *buf)
2142{ 2142{
2143 struct i40iw_ib_device *iwibdev = container_of(dev, 2143 struct i40iw_ib_device *iwibdev = container_of(dev,
2144 struct i40iw_ib_device, 2144 struct i40iw_ib_device,
@@ -2147,34 +2147,37 @@ static ssize_t i40iw_show_rev(struct device *dev,
2147 2147
2148 return sprintf(buf, "%x\n", hw_rev); 2148 return sprintf(buf, "%x\n", hw_rev);
2149} 2149}
2150static DEVICE_ATTR_RO(hw_rev);
2150 2151
2151/** 2152/**
2152 * i40iw_show_hca 2153 * hca_type_show
2153 */ 2154 */
2154static ssize_t i40iw_show_hca(struct device *dev, 2155static ssize_t hca_type_show(struct device *dev,
2155 struct device_attribute *attr, char *buf) 2156 struct device_attribute *attr, char *buf)
2156{ 2157{
2157 return sprintf(buf, "I40IW\n"); 2158 return sprintf(buf, "I40IW\n");
2158} 2159}
2160static DEVICE_ATTR_RO(hca_type);
2159 2161
2160/** 2162/**
2161 * i40iw_show_board 2163 * board_id_show
2162 */ 2164 */
2163static ssize_t i40iw_show_board(struct device *dev, 2165static ssize_t board_id_show(struct device *dev,
2164 struct device_attribute *attr, 2166 struct device_attribute *attr, char *buf)
2165 char *buf)
2166{ 2167{
2167 return sprintf(buf, "%.*s\n", 32, "I40IW Board ID"); 2168 return sprintf(buf, "%.*s\n", 32, "I40IW Board ID");
2168} 2169}
2170static DEVICE_ATTR_RO(board_id);
2169 2171
2170static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL); 2172static struct attribute *i40iw_dev_attributes[] = {
2171static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL); 2173 &dev_attr_hw_rev.attr,
2172static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL); 2174 &dev_attr_hca_type.attr,
2175 &dev_attr_board_id.attr,
2176 NULL
2177};
2173 2178
2174static struct device_attribute *i40iw_dev_attributes[] = { 2179static const struct attribute_group i40iw_attr_group = {
2175 &dev_attr_hw_rev, 2180 .attrs = i40iw_dev_attributes,
2176 &dev_attr_hca_type,
2177 &dev_attr_board_id
2178}; 2181};
2179 2182
2180/** 2183/**
@@ -2752,7 +2755,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
2752 i40iw_pr_err("iwdev == NULL\n"); 2755 i40iw_pr_err("iwdev == NULL\n");
2753 return NULL; 2756 return NULL;
2754 } 2757 }
2755 strlcpy(iwibdev->ibdev.name, "i40iw%d", IB_DEVICE_NAME_MAX);
2756 iwibdev->ibdev.owner = THIS_MODULE; 2758 iwibdev->ibdev.owner = THIS_MODULE;
2757 iwdev->iwibdev = iwibdev; 2759 iwdev->iwibdev = iwibdev;
2758 iwibdev->iwdev = iwdev; 2760 iwibdev->iwdev = iwdev;
@@ -2851,20 +2853,6 @@ void i40iw_port_ibevent(struct i40iw_device *iwdev)
2851} 2853}
2852 2854
2853/** 2855/**
2854 * i40iw_unregister_rdma_device - unregister of iwarp from IB
2855 * @iwibdev: rdma device ptr
2856 */
2857static void i40iw_unregister_rdma_device(struct i40iw_ib_device *iwibdev)
2858{
2859 int i;
2860
2861 for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i)
2862 device_remove_file(&iwibdev->ibdev.dev,
2863 i40iw_dev_attributes[i]);
2864 ib_unregister_device(&iwibdev->ibdev);
2865}
2866
2867/**
2868 * i40iw_destroy_rdma_device - destroy rdma device and free resources 2856 * i40iw_destroy_rdma_device - destroy rdma device and free resources
2869 * @iwibdev: IB device ptr 2857 * @iwibdev: IB device ptr
2870 */ 2858 */
@@ -2873,7 +2861,7 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
2873 if (!iwibdev) 2861 if (!iwibdev)
2874 return; 2862 return;
2875 2863
2876 i40iw_unregister_rdma_device(iwibdev); 2864 ib_unregister_device(&iwibdev->ibdev);
2877 kfree(iwibdev->ibdev.iwcm); 2865 kfree(iwibdev->ibdev.iwcm);
2878 iwibdev->ibdev.iwcm = NULL; 2866 iwibdev->ibdev.iwcm = NULL;
2879 wait_event_timeout(iwibdev->iwdev->close_wq, 2867 wait_event_timeout(iwibdev->iwdev->close_wq,
@@ -2888,32 +2876,19 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
2888 */ 2876 */
2889int i40iw_register_rdma_device(struct i40iw_device *iwdev) 2877int i40iw_register_rdma_device(struct i40iw_device *iwdev)
2890{ 2878{
2891 int i, ret; 2879 int ret;
2892 struct i40iw_ib_device *iwibdev; 2880 struct i40iw_ib_device *iwibdev;
2893 2881
2894 iwdev->iwibdev = i40iw_init_rdma_device(iwdev); 2882 iwdev->iwibdev = i40iw_init_rdma_device(iwdev);
2895 if (!iwdev->iwibdev) 2883 if (!iwdev->iwibdev)
2896 return -ENOMEM; 2884 return -ENOMEM;
2897 iwibdev = iwdev->iwibdev; 2885 iwibdev = iwdev->iwibdev;
2898 2886 rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group);
2899 iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW; 2887 iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW;
2900 ret = ib_register_device(&iwibdev->ibdev, NULL); 2888 ret = ib_register_device(&iwibdev->ibdev, "i40iw%d", NULL);
2901 if (ret) 2889 if (ret)
2902 goto error; 2890 goto error;
2903 2891
2904 for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i) {
2905 ret =
2906 device_create_file(&iwibdev->ibdev.dev,
2907 i40iw_dev_attributes[i]);
2908 if (ret) {
2909 while (i > 0) {
2910 i--;
2911 device_remove_file(&iwibdev->ibdev.dev, i40iw_dev_attributes[i]);
2912 }
2913 ib_unregister_device(&iwibdev->ibdev);
2914 goto error;
2915 }
2916 }
2917 return 0; 2892 return 0;
2918error: 2893error:
2919 kfree(iwdev->iwibdev->ibdev.iwcm); 2894 kfree(iwdev->iwibdev->ibdev.iwcm);
diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig
index db4aa13ebae0..d1de3285fd88 100644
--- a/drivers/infiniband/hw/mlx4/Kconfig
+++ b/drivers/infiniband/hw/mlx4/Kconfig
@@ -1,6 +1,7 @@
1config MLX4_INFINIBAND 1config MLX4_INFINIBAND
2 tristate "Mellanox ConnectX HCA support" 2 tristate "Mellanox ConnectX HCA support"
3 depends on NETDEVICES && ETHERNET && PCI && INET 3 depends on NETDEVICES && ETHERNET && PCI && INET
4 depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
4 depends on MAY_USE_DEVLINK 5 depends on MAY_USE_DEVLINK
5 select NET_VENDOR_MELLANOX 6 select NET_VENDOR_MELLANOX
6 select MLX4_CORE 7 select MLX4_CORE
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index e5466d786bb1..8942f5f7f04d 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -807,15 +807,17 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
807 int err; 807 int err;
808 struct ib_port_attr pattr; 808 struct ib_port_attr pattr;
809 809
810 if (in_wc && in_wc->qp->qp_num) { 810 if (in_wc && in_wc->qp) {
811 pr_debug("received MAD: slid:%d sqpn:%d " 811 pr_debug("received MAD: port:%d slid:%d sqpn:%d "
812 "dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n", 812 "dlid_bits:%d dqpn:%d wc_flags:0x%x tid:%016llx cls:%x mtd:%x atr:%x\n",
813 in_wc->slid, in_wc->src_qp, 813 port_num,
814 in_wc->dlid_path_bits, 814 in_wc->slid, in_wc->src_qp,
815 in_wc->qp->qp_num, 815 in_wc->dlid_path_bits,
816 in_wc->wc_flags, 816 in_wc->qp->qp_num,
817 in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method, 817 in_wc->wc_flags,
818 be16_to_cpu(in_mad->mad_hdr.attr_id)); 818 be64_to_cpu(in_mad->mad_hdr.tid),
819 in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
820 be16_to_cpu(in_mad->mad_hdr.attr_id));
819 if (in_wc->wc_flags & IB_WC_GRH) { 821 if (in_wc->wc_flags & IB_WC_GRH) {
820 pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n", 822 pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
821 be64_to_cpu(in_grh->sgid.global.subnet_prefix), 823 be64_to_cpu(in_grh->sgid.global.subnet_prefix),
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0bbeaaae47e0..0def2323459c 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1140,144 +1140,50 @@ static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
1140 return 0; 1140 return 0;
1141} 1141}
1142 1142
1143static void mlx4_ib_vma_open(struct vm_area_struct *area)
1144{
1145 /* vma_open is called when a new VMA is created on top of our VMA.
1146 * This is done through either mremap flow or split_vma (usually due
1147 * to mlock, madvise, munmap, etc.). We do not support a clone of the
1148 * vma, as this VMA is strongly hardware related. Therefore we set the
1149 * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
1150 * calling us again and trying to do incorrect actions. We assume that
1151 * the original vma size is exactly a single page that there will be no
1152 * "splitting" operations on.
1153 */
1154 area->vm_ops = NULL;
1155}
1156
1157static void mlx4_ib_vma_close(struct vm_area_struct *area)
1158{
1159 struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data;
1160
1161 /* It's guaranteed that all VMAs opened on a FD are closed before the
1162 * file itself is closed, therefore no sync is needed with the regular
1163 * closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync
1164 * with accessing the vma as part of mlx4_ib_disassociate_ucontext.
1165 * The close operation is usually called under mm->mmap_sem except when
1166 * process is exiting. The exiting case is handled explicitly as part
1167 * of mlx4_ib_disassociate_ucontext.
1168 */
1169 mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *)
1170 area->vm_private_data;
1171
1172 /* set the vma context pointer to null in the mlx4_ib driver's private
1173 * data to protect against a race condition in mlx4_ib_dissassociate_ucontext().
1174 */
1175 mlx4_ib_vma_priv_data->vma = NULL;
1176}
1177
1178static const struct vm_operations_struct mlx4_ib_vm_ops = {
1179 .open = mlx4_ib_vma_open,
1180 .close = mlx4_ib_vma_close
1181};
1182
1183static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) 1143static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
1184{ 1144{
1185 int i;
1186 struct vm_area_struct *vma;
1187 struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
1188
1189 /* need to protect from a race on closing the vma as part of
1190 * mlx4_ib_vma_close().
1191 */
1192 for (i = 0; i < HW_BAR_COUNT; i++) {
1193 vma = context->hw_bar_info[i].vma;
1194 if (!vma)
1195 continue;
1196
1197 zap_vma_ptes(context->hw_bar_info[i].vma,
1198 context->hw_bar_info[i].vma->vm_start, PAGE_SIZE);
1199
1200 context->hw_bar_info[i].vma->vm_flags &=
1201 ~(VM_SHARED | VM_MAYSHARE);
1202 /* context going to be destroyed, should not access ops any more */
1203 context->hw_bar_info[i].vma->vm_ops = NULL;
1204 }
1205}
1206
1207static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
1208 struct mlx4_ib_vma_private_data *vma_private_data)
1209{
1210 vma_private_data->vma = vma;
1211 vma->vm_private_data = vma_private_data;
1212 vma->vm_ops = &mlx4_ib_vm_ops;
1213} 1145}
1214 1146
1215static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) 1147static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
1216{ 1148{
1217 struct mlx4_ib_dev *dev = to_mdev(context->device); 1149 struct mlx4_ib_dev *dev = to_mdev(context->device);
1218 struct mlx4_ib_ucontext *mucontext = to_mucontext(context);
1219 1150
1220 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1151 switch (vma->vm_pgoff) {
1221 return -EINVAL; 1152 case 0:
1222 1153 return rdma_user_mmap_io(context, vma,
1223 if (vma->vm_pgoff == 0) { 1154 to_mucontext(context)->uar.pfn,
1224 /* We prevent double mmaping on same context */ 1155 PAGE_SIZE,
1225 if (mucontext->hw_bar_info[HW_BAR_DB].vma) 1156 pgprot_noncached(vma->vm_page_prot));
1226 return -EINVAL;
1227
1228 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1229
1230 if (io_remap_pfn_range(vma, vma->vm_start,
1231 to_mucontext(context)->uar.pfn,
1232 PAGE_SIZE, vma->vm_page_prot))
1233 return -EAGAIN;
1234
1235 mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]);
1236 1157
1237 } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) { 1158 case 1:
1238 /* We prevent double mmaping on same context */ 1159 if (dev->dev->caps.bf_reg_size == 0)
1239 if (mucontext->hw_bar_info[HW_BAR_BF].vma)
1240 return -EINVAL; 1160 return -EINVAL;
1161 return rdma_user_mmap_io(
1162 context, vma,
1163 to_mucontext(context)->uar.pfn +
1164 dev->dev->caps.num_uars,
1165 PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot));
1241 1166
1242 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 1167 case 3: {
1243
1244 if (io_remap_pfn_range(vma, vma->vm_start,
1245 to_mucontext(context)->uar.pfn +
1246 dev->dev->caps.num_uars,
1247 PAGE_SIZE, vma->vm_page_prot))
1248 return -EAGAIN;
1249
1250 mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]);
1251
1252 } else if (vma->vm_pgoff == 3) {
1253 struct mlx4_clock_params params; 1168 struct mlx4_clock_params params;
1254 int ret; 1169 int ret;
1255 1170
1256 /* We prevent double mmaping on same context */
1257 if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma)
1258 return -EINVAL;
1259
1260 ret = mlx4_get_internal_clock_params(dev->dev, &params); 1171 ret = mlx4_get_internal_clock_params(dev->dev, &params);
1261
1262 if (ret) 1172 if (ret)
1263 return ret; 1173 return ret;
1264 1174
1265 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1175 return rdma_user_mmap_io(
1266 if (io_remap_pfn_range(vma, vma->vm_start, 1176 context, vma,
1267 (pci_resource_start(dev->dev->persist->pdev, 1177 (pci_resource_start(dev->dev->persist->pdev,
1268 params.bar) + 1178 params.bar) +
1269 params.offset) 1179 params.offset) >>
1270 >> PAGE_SHIFT, 1180 PAGE_SHIFT,
1271 PAGE_SIZE, vma->vm_page_prot)) 1181 PAGE_SIZE, pgprot_noncached(vma->vm_page_prot));
1272 return -EAGAIN;
1273
1274 mlx4_ib_set_vma_data(vma,
1275 &mucontext->hw_bar_info[HW_BAR_CLOCK]);
1276 } else {
1277 return -EINVAL;
1278 } 1182 }
1279 1183
1280 return 0; 1184 default:
1185 return -EINVAL;
1186 }
1281} 1187}
1282 1188
1283static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev, 1189static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
@@ -2133,39 +2039,43 @@ out:
2133 return err; 2039 return err;
2134} 2040}
2135 2041
2136static ssize_t show_hca(struct device *device, struct device_attribute *attr, 2042static ssize_t hca_type_show(struct device *device,
2137 char *buf) 2043 struct device_attribute *attr, char *buf)
2138{ 2044{
2139 struct mlx4_ib_dev *dev = 2045 struct mlx4_ib_dev *dev =
2140 container_of(device, struct mlx4_ib_dev, ib_dev.dev); 2046 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
2141 return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); 2047 return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
2142} 2048}
2049static DEVICE_ATTR_RO(hca_type);
2143 2050
2144static ssize_t show_rev(struct device *device, struct device_attribute *attr, 2051static ssize_t hw_rev_show(struct device *device,
2145 char *buf) 2052 struct device_attribute *attr, char *buf)
2146{ 2053{
2147 struct mlx4_ib_dev *dev = 2054 struct mlx4_ib_dev *dev =
2148 container_of(device, struct mlx4_ib_dev, ib_dev.dev); 2055 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
2149 return sprintf(buf, "%x\n", dev->dev->rev_id); 2056 return sprintf(buf, "%x\n", dev->dev->rev_id);
2150} 2057}
2058static DEVICE_ATTR_RO(hw_rev);
2151 2059
2152static ssize_t show_board(struct device *device, struct device_attribute *attr, 2060static ssize_t board_id_show(struct device *device,
2153 char *buf) 2061 struct device_attribute *attr, char *buf)
2154{ 2062{
2155 struct mlx4_ib_dev *dev = 2063 struct mlx4_ib_dev *dev =
2156 container_of(device, struct mlx4_ib_dev, ib_dev.dev); 2064 container_of(device, struct mlx4_ib_dev, ib_dev.dev);
2157 return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, 2065 return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
2158 dev->dev->board_id); 2066 dev->dev->board_id);
2159} 2067}
2068static DEVICE_ATTR_RO(board_id);
2160 2069
2161static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 2070static struct attribute *mlx4_class_attributes[] = {
2162static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 2071 &dev_attr_hw_rev.attr,
2163static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 2072 &dev_attr_hca_type.attr,
2073 &dev_attr_board_id.attr,
2074 NULL
2075};
2164 2076
2165static struct device_attribute *mlx4_class_attributes[] = { 2077static const struct attribute_group mlx4_attr_group = {
2166 &dev_attr_hw_rev, 2078 .attrs = mlx4_class_attributes,
2167 &dev_attr_hca_type,
2168 &dev_attr_board_id
2169}; 2079};
2170 2080
2171struct diag_counter { 2081struct diag_counter {
@@ -2636,7 +2546,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
2636 ibdev->dev = dev; 2546 ibdev->dev = dev;
2637 ibdev->bond_next_port = 0; 2547 ibdev->bond_next_port = 0;
2638 2548
2639 strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
2640 ibdev->ib_dev.owner = THIS_MODULE; 2549 ibdev->ib_dev.owner = THIS_MODULE;
2641 ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; 2550 ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
2642 ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; 2551 ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
@@ -2898,8 +2807,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
2898 if (mlx4_ib_alloc_diag_counters(ibdev)) 2807 if (mlx4_ib_alloc_diag_counters(ibdev))
2899 goto err_steer_free_bitmap; 2808 goto err_steer_free_bitmap;
2900 2809
2810 rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group);
2901 ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4; 2811 ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4;
2902 if (ib_register_device(&ibdev->ib_dev, NULL)) 2812 if (ib_register_device(&ibdev->ib_dev, "mlx4_%d", NULL))
2903 goto err_diag_counters; 2813 goto err_diag_counters;
2904 2814
2905 if (mlx4_ib_mad_init(ibdev)) 2815 if (mlx4_ib_mad_init(ibdev))
@@ -2922,12 +2832,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
2922 goto err_notif; 2832 goto err_notif;
2923 } 2833 }
2924 2834
2925 for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
2926 if (device_create_file(&ibdev->ib_dev.dev,
2927 mlx4_class_attributes[j]))
2928 goto err_notif;
2929 }
2930
2931 ibdev->ib_active = true; 2835 ibdev->ib_active = true;
2932 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) 2836 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
2933 devlink_port_type_ib_set(mlx4_get_devlink_port(dev, i), 2837 devlink_port_type_ib_set(mlx4_get_devlink_port(dev, i),
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 81ffc007e0a1..d844831179cf 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -673,7 +673,7 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
673 if (!list_empty(&group->pending_list)) 673 if (!list_empty(&group->pending_list))
674 req = list_first_entry(&group->pending_list, 674 req = list_first_entry(&group->pending_list,
675 struct mcast_req, group_list); 675 struct mcast_req, group_list);
676 if ((method == IB_MGMT_METHOD_GET_RESP)) { 676 if (method == IB_MGMT_METHOD_GET_RESP) {
677 if (req) { 677 if (req) {
678 send_reply_to_slave(req->func, group, &req->sa_mad, status); 678 send_reply_to_slave(req->func, group, &req->sa_mad, status);
679 --group->func[req->func].num_pend_reqs; 679 --group->func[req->func].num_pend_reqs;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index e10dccc7958f..8850dfc3826d 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -80,16 +80,11 @@ enum hw_bar_type {
80 HW_BAR_COUNT 80 HW_BAR_COUNT
81}; 81};
82 82
83struct mlx4_ib_vma_private_data {
84 struct vm_area_struct *vma;
85};
86
87struct mlx4_ib_ucontext { 83struct mlx4_ib_ucontext {
88 struct ib_ucontext ibucontext; 84 struct ib_ucontext ibucontext;
89 struct mlx4_uar uar; 85 struct mlx4_uar uar;
90 struct list_head db_page_list; 86 struct list_head db_page_list;
91 struct mutex db_page_mutex; 87 struct mutex db_page_mutex;
92 struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT];
93 struct list_head wqn_ranges_list; 88 struct list_head wqn_ranges_list;
94 struct mutex wqn_ranges_mutex; /* protect wqn_ranges_list */ 89 struct mutex wqn_ranges_mutex; /* protect wqn_ranges_list */
95}; 90};
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 6dd3cd2c2f80..0711ca1dfb8f 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -2629,7 +2629,6 @@ enum {
2629static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 2629static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2630 int attr_mask, struct ib_udata *udata) 2630 int attr_mask, struct ib_udata *udata)
2631{ 2631{
2632 enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
2633 struct mlx4_ib_dev *dev = to_mdev(ibqp->device); 2632 struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
2634 struct mlx4_ib_qp *qp = to_mqp(ibqp); 2633 struct mlx4_ib_qp *qp = to_mqp(ibqp);
2635 enum ib_qp_state cur_state, new_state; 2634 enum ib_qp_state cur_state, new_state;
@@ -2639,13 +2638,8 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2639 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; 2638 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
2640 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 2639 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
2641 2640
2642 if (cur_state != new_state || cur_state != IB_QPS_RESET) {
2643 int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
2644 ll = rdma_port_get_link_layer(&dev->ib_dev, port);
2645 }
2646
2647 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, 2641 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
2648 attr_mask, ll)) { 2642 attr_mask)) {
2649 pr_debug("qpn 0x%x: invalid attribute mask specified " 2643 pr_debug("qpn 0x%x: invalid attribute mask specified "
2650 "for transition %d to %d. qp_type %d," 2644 "for transition %d to %d. qp_type %d,"
2651 " attr_mask 0x%x\n", 2645 " attr_mask 0x%x\n",
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index e219093d2764..752bdd536130 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -818,9 +818,7 @@ int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
818 if (!mlx4_is_master(dev->dev)) 818 if (!mlx4_is_master(dev->dev))
819 return 0; 819 return 0;
820 820
821 dev->iov_parent = 821 dev->iov_parent = kobject_create_and_add("iov", &dev->ib_dev.dev.kobj);
822 kobject_create_and_add("iov",
823 kobject_get(dev->ib_dev.ports_parent->parent));
824 if (!dev->iov_parent) { 822 if (!dev->iov_parent) {
825 ret = -ENOMEM; 823 ret = -ENOMEM;
826 goto err; 824 goto err;
@@ -850,7 +848,6 @@ err_add_entries:
850err_ports: 848err_ports:
851 kobject_put(dev->iov_parent); 849 kobject_put(dev->iov_parent);
852err: 850err:
853 kobject_put(dev->ib_dev.ports_parent->parent);
854 pr_err("mlx4_ib_device_register_sysfs error (%d)\n", ret); 851 pr_err("mlx4_ib_device_register_sysfs error (%d)\n", ret);
855 return ret; 852 return ret;
856} 853}
@@ -886,5 +883,4 @@ void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device)
886 kobject_put(device->ports_parent); 883 kobject_put(device->ports_parent);
887 kobject_put(device->iov_parent); 884 kobject_put(device->iov_parent);
888 kobject_put(device->iov_parent); 885 kobject_put(device->iov_parent);
889 kobject_put(device->ib_dev.ports_parent->parent);
890} 886}
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index c84fef9a8a08..ca060a2e2b36 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -197,3 +197,132 @@ int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
197 return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT, 197 return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT,
198 0, 0); 198 0, 0);
199} 199}
200
201void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid)
202{
203 u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {};
204 u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {};
205
206 MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR);
207 MLX5_SET(destroy_tir_in, in, tirn, tirn);
208 MLX5_SET(destroy_tir_in, in, uid, uid);
209 mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
210}
211
212void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid)
213{
214 u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {0};
215 u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0};
216
217 MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
218 MLX5_SET(destroy_tis_in, in, tisn, tisn);
219 MLX5_SET(destroy_tis_in, in, uid, uid);
220 mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
221}
222
223void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid)
224{
225 u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
226 u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {};
227
228 MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
229 MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
230 MLX5_SET(destroy_rqt_in, in, uid, uid);
231 mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
232}
233
234int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
235 u16 uid)
236{
237 u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {0};
238 u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0};
239 int err;
240
241 MLX5_SET(alloc_transport_domain_in, in, opcode,
242 MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
243
244 err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
245 if (!err)
246 *tdn = MLX5_GET(alloc_transport_domain_out, out,
247 transport_domain);
248
249 return err;
250}
251
252void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
253 u16 uid)
254{
255 u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {0};
256 u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0};
257
258 MLX5_SET(dealloc_transport_domain_in, in, opcode,
259 MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
260 MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn);
261 mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
262}
263
264void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid)
265{
266 u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {};
267 u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {};
268
269 MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
270 MLX5_SET(dealloc_pd_in, in, pd, pdn);
271 MLX5_SET(dealloc_pd_in, in, uid, uid);
272 mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
273}
274
275int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
276 u32 qpn, u16 uid)
277{
278 u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {};
279 u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {};
280 void *gid;
281
282 MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG);
283 MLX5_SET(attach_to_mcg_in, in, qpn, qpn);
284 MLX5_SET(attach_to_mcg_in, in, uid, uid);
285 gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid);
286 memcpy(gid, mgid, sizeof(*mgid));
287 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
288}
289
290int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
291 u32 qpn, u16 uid)
292{
293 u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {};
294 u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {};
295 void *gid;
296
297 MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
298 MLX5_SET(detach_from_mcg_in, in, qpn, qpn);
299 MLX5_SET(detach_from_mcg_in, in, uid, uid);
300 gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid);
301 memcpy(gid, mgid, sizeof(*mgid));
302 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
303}
304
305int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid)
306{
307 u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {};
308 u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {};
309 int err;
310
311 MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD);
312 MLX5_SET(alloc_xrcd_in, in, uid, uid);
313 err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
314 if (!err)
315 *xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd);
316 return err;
317}
318
319int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid)
320{
321 u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {};
322 u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {};
323
324 MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
325 MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn);
326 MLX5_SET(dealloc_xrcd_in, in, uid, uid);
327 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
328}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 88cbb1c41703..c03c56455534 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -47,4 +47,18 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
47int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr, 47int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr,
48 u64 length, u32 alignment); 48 u64 length, u32 alignment);
49int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length); 49int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length);
50void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
51void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
52void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
53void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid);
54int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
55 u16 uid);
56void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
57 u16 uid);
58int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
59 u32 qpn, u16 uid);
60int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
61 u32 qpn, u16 uid);
62int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid);
63int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid);
50#endif /* MLX5_IB_CMD_H */ 64#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index cca1820802b8..7d769b5538b4 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -874,6 +874,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
874 cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD; 874 cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD;
875 } 875 }
876 876
877 MLX5_SET(create_cq_in, *cqb, uid, to_mucontext(context)->devx_uid);
877 return 0; 878 return 0;
878 879
879err_cqb: 880err_cqb:
@@ -1454,7 +1455,7 @@ ex:
1454 return err; 1455 return err;
1455} 1456}
1456 1457
1457int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq) 1458int mlx5_ib_get_cqe_size(struct ib_cq *ibcq)
1458{ 1459{
1459 struct mlx5_ib_cq *cq; 1460 struct mlx5_ib_cq *cq;
1460 1461
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 66dc337e49a7..61aab7c0c513 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -19,7 +19,7 @@
19#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in) 19#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
20struct devx_obj { 20struct devx_obj {
21 struct mlx5_core_dev *mdev; 21 struct mlx5_core_dev *mdev;
22 u32 obj_id; 22 u64 obj_id;
23 u32 dinlen; /* destroy inbox length */ 23 u32 dinlen; /* destroy inbox length */
24 u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; 24 u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
25}; 25};
@@ -45,13 +45,14 @@ static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file)
45 return to_mucontext(ib_uverbs_get_ucontext(file)); 45 return to_mucontext(ib_uverbs_get_ucontext(file));
46} 46}
47 47
48int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) 48int mlx5_ib_devx_create(struct mlx5_ib_dev *dev)
49{ 49{
50 u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; 50 u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
51 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; 51 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
52 u64 general_obj_types; 52 u64 general_obj_types;
53 void *hdr; 53 void *hdr;
54 int err; 54 int err;
55 u16 uid;
55 56
56 hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr); 57 hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr);
57 58
@@ -60,9 +61,6 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *contex
60 !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM)) 61 !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM))
61 return -EINVAL; 62 return -EINVAL;
62 63
63 if (!capable(CAP_NET_RAW))
64 return -EPERM;
65
66 MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 64 MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
67 MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX); 65 MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX);
68 66
@@ -70,19 +68,18 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *contex
70 if (err) 68 if (err)
71 return err; 69 return err;
72 70
73 context->devx_uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 71 uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
74 return 0; 72 return uid;
75} 73}
76 74
77void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, 75void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
78 struct mlx5_ib_ucontext *context)
79{ 76{
80 u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0}; 77 u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0};
81 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; 78 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
82 79
83 MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 80 MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
84 MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX); 81 MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX);
85 MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, context->devx_uid); 82 MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, uid);
86 83
87 mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); 84 mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
88} 85}
@@ -109,150 +106,218 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
109 } 106 }
110} 107}
111 108
109/*
110 * As the obj_id in the firmware is not globally unique the object type
111 * must be considered upon checking for a valid object id.
112 * For that the opcode of the creator command is encoded as part of the obj_id.
113 */
114static u64 get_enc_obj_id(u16 opcode, u32 obj_id)
115{
116 return ((u64)opcode << 32) | obj_id;
117}
118
112static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) 119static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
113{ 120{
114 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); 121 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
115 u32 obj_id; 122 u64 obj_id;
116 123
117 switch (opcode) { 124 switch (opcode) {
118 case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: 125 case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
119 case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: 126 case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
120 obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id); 127 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_GENERAL_OBJECT,
128 MLX5_GET(general_obj_in_cmd_hdr, in,
129 obj_id));
121 break; 130 break;
122 case MLX5_CMD_OP_QUERY_MKEY: 131 case MLX5_CMD_OP_QUERY_MKEY:
123 obj_id = MLX5_GET(query_mkey_in, in, mkey_index); 132 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_MKEY,
133 MLX5_GET(query_mkey_in, in,
134 mkey_index));
124 break; 135 break;
125 case MLX5_CMD_OP_QUERY_CQ: 136 case MLX5_CMD_OP_QUERY_CQ:
126 obj_id = MLX5_GET(query_cq_in, in, cqn); 137 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
138 MLX5_GET(query_cq_in, in, cqn));
127 break; 139 break;
128 case MLX5_CMD_OP_MODIFY_CQ: 140 case MLX5_CMD_OP_MODIFY_CQ:
129 obj_id = MLX5_GET(modify_cq_in, in, cqn); 141 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
142 MLX5_GET(modify_cq_in, in, cqn));
130 break; 143 break;
131 case MLX5_CMD_OP_QUERY_SQ: 144 case MLX5_CMD_OP_QUERY_SQ:
132 obj_id = MLX5_GET(query_sq_in, in, sqn); 145 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
146 MLX5_GET(query_sq_in, in, sqn));
133 break; 147 break;
134 case MLX5_CMD_OP_MODIFY_SQ: 148 case MLX5_CMD_OP_MODIFY_SQ:
135 obj_id = MLX5_GET(modify_sq_in, in, sqn); 149 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
150 MLX5_GET(modify_sq_in, in, sqn));
136 break; 151 break;
137 case MLX5_CMD_OP_QUERY_RQ: 152 case MLX5_CMD_OP_QUERY_RQ:
138 obj_id = MLX5_GET(query_rq_in, in, rqn); 153 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
154 MLX5_GET(query_rq_in, in, rqn));
139 break; 155 break;
140 case MLX5_CMD_OP_MODIFY_RQ: 156 case MLX5_CMD_OP_MODIFY_RQ:
141 obj_id = MLX5_GET(modify_rq_in, in, rqn); 157 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
158 MLX5_GET(modify_rq_in, in, rqn));
142 break; 159 break;
143 case MLX5_CMD_OP_QUERY_RMP: 160 case MLX5_CMD_OP_QUERY_RMP:
144 obj_id = MLX5_GET(query_rmp_in, in, rmpn); 161 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
162 MLX5_GET(query_rmp_in, in, rmpn));
145 break; 163 break;
146 case MLX5_CMD_OP_MODIFY_RMP: 164 case MLX5_CMD_OP_MODIFY_RMP:
147 obj_id = MLX5_GET(modify_rmp_in, in, rmpn); 165 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
166 MLX5_GET(modify_rmp_in, in, rmpn));
148 break; 167 break;
149 case MLX5_CMD_OP_QUERY_RQT: 168 case MLX5_CMD_OP_QUERY_RQT:
150 obj_id = MLX5_GET(query_rqt_in, in, rqtn); 169 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
170 MLX5_GET(query_rqt_in, in, rqtn));
151 break; 171 break;
152 case MLX5_CMD_OP_MODIFY_RQT: 172 case MLX5_CMD_OP_MODIFY_RQT:
153 obj_id = MLX5_GET(modify_rqt_in, in, rqtn); 173 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
174 MLX5_GET(modify_rqt_in, in, rqtn));
154 break; 175 break;
155 case MLX5_CMD_OP_QUERY_TIR: 176 case MLX5_CMD_OP_QUERY_TIR:
156 obj_id = MLX5_GET(query_tir_in, in, tirn); 177 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
178 MLX5_GET(query_tir_in, in, tirn));
157 break; 179 break;
158 case MLX5_CMD_OP_MODIFY_TIR: 180 case MLX5_CMD_OP_MODIFY_TIR:
159 obj_id = MLX5_GET(modify_tir_in, in, tirn); 181 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
182 MLX5_GET(modify_tir_in, in, tirn));
160 break; 183 break;
161 case MLX5_CMD_OP_QUERY_TIS: 184 case MLX5_CMD_OP_QUERY_TIS:
162 obj_id = MLX5_GET(query_tis_in, in, tisn); 185 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
186 MLX5_GET(query_tis_in, in, tisn));
163 break; 187 break;
164 case MLX5_CMD_OP_MODIFY_TIS: 188 case MLX5_CMD_OP_MODIFY_TIS:
165 obj_id = MLX5_GET(modify_tis_in, in, tisn); 189 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
190 MLX5_GET(modify_tis_in, in, tisn));
166 break; 191 break;
167 case MLX5_CMD_OP_QUERY_FLOW_TABLE: 192 case MLX5_CMD_OP_QUERY_FLOW_TABLE:
168 obj_id = MLX5_GET(query_flow_table_in, in, table_id); 193 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
194 MLX5_GET(query_flow_table_in, in,
195 table_id));
169 break; 196 break;
170 case MLX5_CMD_OP_MODIFY_FLOW_TABLE: 197 case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
171 obj_id = MLX5_GET(modify_flow_table_in, in, table_id); 198 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
199 MLX5_GET(modify_flow_table_in, in,
200 table_id));
172 break; 201 break;
173 case MLX5_CMD_OP_QUERY_FLOW_GROUP: 202 case MLX5_CMD_OP_QUERY_FLOW_GROUP:
174 obj_id = MLX5_GET(query_flow_group_in, in, group_id); 203 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_GROUP,
204 MLX5_GET(query_flow_group_in, in,
205 group_id));
175 break; 206 break;
176 case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: 207 case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
177 obj_id = MLX5_GET(query_fte_in, in, flow_index); 208 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
209 MLX5_GET(query_fte_in, in,
210 flow_index));
178 break; 211 break;
179 case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: 212 case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
180 obj_id = MLX5_GET(set_fte_in, in, flow_index); 213 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
214 MLX5_GET(set_fte_in, in, flow_index));
181 break; 215 break;
182 case MLX5_CMD_OP_QUERY_Q_COUNTER: 216 case MLX5_CMD_OP_QUERY_Q_COUNTER:
183 obj_id = MLX5_GET(query_q_counter_in, in, counter_set_id); 217 obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_Q_COUNTER,
218 MLX5_GET(query_q_counter_in, in,
219 counter_set_id));
184 break; 220 break;
185 case MLX5_CMD_OP_QUERY_FLOW_COUNTER: 221 case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
186 obj_id = MLX5_GET(query_flow_counter_in, in, flow_counter_id); 222 obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_FLOW_COUNTER,
223 MLX5_GET(query_flow_counter_in, in,
224 flow_counter_id));
187 break; 225 break;
188 case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: 226 case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
189 obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id); 227 obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT,
228 MLX5_GET(general_obj_in_cmd_hdr, in,
229 obj_id));
190 break; 230 break;
191 case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: 231 case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
192 obj_id = MLX5_GET(query_scheduling_element_in, in, 232 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
193 scheduling_element_id); 233 MLX5_GET(query_scheduling_element_in,
234 in, scheduling_element_id));
194 break; 235 break;
195 case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: 236 case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
196 obj_id = MLX5_GET(modify_scheduling_element_in, in, 237 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
197 scheduling_element_id); 238 MLX5_GET(modify_scheduling_element_in,
239 in, scheduling_element_id));
198 break; 240 break;
199 case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: 241 case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
200 obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); 242 obj_id = get_enc_obj_id(MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT,
243 MLX5_GET(add_vxlan_udp_dport_in, in,
244 vxlan_udp_port));
201 break; 245 break;
202 case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: 246 case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
203 obj_id = MLX5_GET(query_l2_table_entry_in, in, table_index); 247 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
248 MLX5_GET(query_l2_table_entry_in, in,
249 table_index));
204 break; 250 break;
205 case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: 251 case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
206 obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index); 252 obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
253 MLX5_GET(set_l2_table_entry_in, in,
254 table_index));
207 break; 255 break;
208 case MLX5_CMD_OP_QUERY_QP: 256 case MLX5_CMD_OP_QUERY_QP:
209 obj_id = MLX5_GET(query_qp_in, in, qpn); 257 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
258 MLX5_GET(query_qp_in, in, qpn));
210 break; 259 break;
211 case MLX5_CMD_OP_RST2INIT_QP: 260 case MLX5_CMD_OP_RST2INIT_QP:
212 obj_id = MLX5_GET(rst2init_qp_in, in, qpn); 261 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
262 MLX5_GET(rst2init_qp_in, in, qpn));
213 break; 263 break;
214 case MLX5_CMD_OP_INIT2RTR_QP: 264 case MLX5_CMD_OP_INIT2RTR_QP:
215 obj_id = MLX5_GET(init2rtr_qp_in, in, qpn); 265 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
266 MLX5_GET(init2rtr_qp_in, in, qpn));
216 break; 267 break;
217 case MLX5_CMD_OP_RTR2RTS_QP: 268 case MLX5_CMD_OP_RTR2RTS_QP:
218 obj_id = MLX5_GET(rtr2rts_qp_in, in, qpn); 269 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
270 MLX5_GET(rtr2rts_qp_in, in, qpn));
219 break; 271 break;
220 case MLX5_CMD_OP_RTS2RTS_QP: 272 case MLX5_CMD_OP_RTS2RTS_QP:
221 obj_id = MLX5_GET(rts2rts_qp_in, in, qpn); 273 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
274 MLX5_GET(rts2rts_qp_in, in, qpn));
222 break; 275 break;
223 case MLX5_CMD_OP_SQERR2RTS_QP: 276 case MLX5_CMD_OP_SQERR2RTS_QP:
224 obj_id = MLX5_GET(sqerr2rts_qp_in, in, qpn); 277 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
278 MLX5_GET(sqerr2rts_qp_in, in, qpn));
225 break; 279 break;
226 case MLX5_CMD_OP_2ERR_QP: 280 case MLX5_CMD_OP_2ERR_QP:
227 obj_id = MLX5_GET(qp_2err_in, in, qpn); 281 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
282 MLX5_GET(qp_2err_in, in, qpn));
228 break; 283 break;
229 case MLX5_CMD_OP_2RST_QP: 284 case MLX5_CMD_OP_2RST_QP:
230 obj_id = MLX5_GET(qp_2rst_in, in, qpn); 285 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
286 MLX5_GET(qp_2rst_in, in, qpn));
231 break; 287 break;
232 case MLX5_CMD_OP_QUERY_DCT: 288 case MLX5_CMD_OP_QUERY_DCT:
233 obj_id = MLX5_GET(query_dct_in, in, dctn); 289 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
290 MLX5_GET(query_dct_in, in, dctn));
234 break; 291 break;
235 case MLX5_CMD_OP_QUERY_XRQ: 292 case MLX5_CMD_OP_QUERY_XRQ:
236 obj_id = MLX5_GET(query_xrq_in, in, xrqn); 293 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
294 MLX5_GET(query_xrq_in, in, xrqn));
237 break; 295 break;
238 case MLX5_CMD_OP_QUERY_XRC_SRQ: 296 case MLX5_CMD_OP_QUERY_XRC_SRQ:
239 obj_id = MLX5_GET(query_xrc_srq_in, in, xrc_srqn); 297 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
298 MLX5_GET(query_xrc_srq_in, in,
299 xrc_srqn));
240 break; 300 break;
241 case MLX5_CMD_OP_ARM_XRC_SRQ: 301 case MLX5_CMD_OP_ARM_XRC_SRQ:
242 obj_id = MLX5_GET(arm_xrc_srq_in, in, xrc_srqn); 302 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
303 MLX5_GET(arm_xrc_srq_in, in, xrc_srqn));
243 break; 304 break;
244 case MLX5_CMD_OP_QUERY_SRQ: 305 case MLX5_CMD_OP_QUERY_SRQ:
245 obj_id = MLX5_GET(query_srq_in, in, srqn); 306 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SRQ,
307 MLX5_GET(query_srq_in, in, srqn));
246 break; 308 break;
247 case MLX5_CMD_OP_ARM_RQ: 309 case MLX5_CMD_OP_ARM_RQ:
248 obj_id = MLX5_GET(arm_rq_in, in, srq_number); 310 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
311 MLX5_GET(arm_rq_in, in, srq_number));
249 break; 312 break;
250 case MLX5_CMD_OP_DRAIN_DCT: 313 case MLX5_CMD_OP_DRAIN_DCT:
251 case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: 314 case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
252 obj_id = MLX5_GET(drain_dct_in, in, dctn); 315 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
316 MLX5_GET(drain_dct_in, in, dctn));
253 break; 317 break;
254 case MLX5_CMD_OP_ARM_XRQ: 318 case MLX5_CMD_OP_ARM_XRQ:
255 obj_id = MLX5_GET(arm_xrq_in, in, xrqn); 319 obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
320 MLX5_GET(arm_xrq_in, in, xrqn));
256 break; 321 break;
257 default: 322 default:
258 return false; 323 return false;
@@ -264,11 +329,102 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
264 return false; 329 return false;
265} 330}
266 331
267static bool devx_is_obj_create_cmd(const void *in) 332static void devx_set_umem_valid(const void *in)
268{ 333{
269 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); 334 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
270 335
271 switch (opcode) { 336 switch (opcode) {
337 case MLX5_CMD_OP_CREATE_MKEY:
338 MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
339 break;
340 case MLX5_CMD_OP_CREATE_CQ:
341 {
342 void *cqc;
343
344 MLX5_SET(create_cq_in, in, cq_umem_valid, 1);
345 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
346 MLX5_SET(cqc, cqc, dbr_umem_valid, 1);
347 break;
348 }
349 case MLX5_CMD_OP_CREATE_QP:
350 {
351 void *qpc;
352
353 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
354 MLX5_SET(qpc, qpc, dbr_umem_valid, 1);
355 MLX5_SET(create_qp_in, in, wq_umem_valid, 1);
356 break;
357 }
358
359 case MLX5_CMD_OP_CREATE_RQ:
360 {
361 void *rqc, *wq;
362
363 rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
364 wq = MLX5_ADDR_OF(rqc, rqc, wq);
365 MLX5_SET(wq, wq, dbr_umem_valid, 1);
366 MLX5_SET(wq, wq, wq_umem_valid, 1);
367 break;
368 }
369
370 case MLX5_CMD_OP_CREATE_SQ:
371 {
372 void *sqc, *wq;
373
374 sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
375 wq = MLX5_ADDR_OF(sqc, sqc, wq);
376 MLX5_SET(wq, wq, dbr_umem_valid, 1);
377 MLX5_SET(wq, wq, wq_umem_valid, 1);
378 break;
379 }
380
381 case MLX5_CMD_OP_MODIFY_CQ:
382 MLX5_SET(modify_cq_in, in, cq_umem_valid, 1);
383 break;
384
385 case MLX5_CMD_OP_CREATE_RMP:
386 {
387 void *rmpc, *wq;
388
389 rmpc = MLX5_ADDR_OF(create_rmp_in, in, ctx);
390 wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
391 MLX5_SET(wq, wq, dbr_umem_valid, 1);
392 MLX5_SET(wq, wq, wq_umem_valid, 1);
393 break;
394 }
395
396 case MLX5_CMD_OP_CREATE_XRQ:
397 {
398 void *xrqc, *wq;
399
400 xrqc = MLX5_ADDR_OF(create_xrq_in, in, xrq_context);
401 wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
402 MLX5_SET(wq, wq, dbr_umem_valid, 1);
403 MLX5_SET(wq, wq, wq_umem_valid, 1);
404 break;
405 }
406
407 case MLX5_CMD_OP_CREATE_XRC_SRQ:
408 {
409 void *xrc_srqc;
410
411 MLX5_SET(create_xrc_srq_in, in, xrc_srq_umem_valid, 1);
412 xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, in,
413 xrc_srq_context_entry);
414 MLX5_SET(xrc_srqc, xrc_srqc, dbr_umem_valid, 1);
415 break;
416 }
417
418 default:
419 return;
420 }
421}
422
423static bool devx_is_obj_create_cmd(const void *in, u16 *opcode)
424{
425 *opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
426
427 switch (*opcode) {
272 case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: 428 case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
273 case MLX5_CMD_OP_CREATE_MKEY: 429 case MLX5_CMD_OP_CREATE_MKEY:
274 case MLX5_CMD_OP_CREATE_CQ: 430 case MLX5_CMD_OP_CREATE_CQ:
@@ -385,12 +541,49 @@ static bool devx_is_obj_query_cmd(const void *in)
385 } 541 }
386} 542}
387 543
544static bool devx_is_whitelist_cmd(void *in)
545{
546 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
547
548 switch (opcode) {
549 case MLX5_CMD_OP_QUERY_HCA_CAP:
550 case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
551 return true;
552 default:
553 return false;
554 }
555}
556
557static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
558{
559 if (devx_is_whitelist_cmd(cmd_in)) {
560 struct mlx5_ib_dev *dev;
561
562 if (c->devx_uid)
563 return c->devx_uid;
564
565 dev = to_mdev(c->ibucontext.device);
566 if (dev->devx_whitelist_uid)
567 return dev->devx_whitelist_uid;
568
569 return -EOPNOTSUPP;
570 }
571
572 if (!c->devx_uid)
573 return -EINVAL;
574
575 if (!capable(CAP_NET_RAW))
576 return -EPERM;
577
578 return c->devx_uid;
579}
388static bool devx_is_general_cmd(void *in) 580static bool devx_is_general_cmd(void *in)
389{ 581{
390 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); 582 u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
391 583
392 switch (opcode) { 584 switch (opcode) {
393 case MLX5_CMD_OP_QUERY_HCA_CAP: 585 case MLX5_CMD_OP_QUERY_HCA_CAP:
586 case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
394 case MLX5_CMD_OP_QUERY_VPORT_STATE: 587 case MLX5_CMD_OP_QUERY_VPORT_STATE:
395 case MLX5_CMD_OP_QUERY_ADAPTER: 588 case MLX5_CMD_OP_QUERY_ADAPTER:
396 case MLX5_CMD_OP_QUERY_ISSI: 589 case MLX5_CMD_OP_QUERY_ISSI:
@@ -498,14 +691,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
498 MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT); 691 MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
499 void *cmd_out; 692 void *cmd_out;
500 int err; 693 int err;
694 int uid;
501 695
502 c = devx_ufile2uctx(file); 696 c = devx_ufile2uctx(file);
503 if (IS_ERR(c)) 697 if (IS_ERR(c))
504 return PTR_ERR(c); 698 return PTR_ERR(c);
505 dev = to_mdev(c->ibucontext.device); 699 dev = to_mdev(c->ibucontext.device);
506 700
507 if (!c->devx_uid) 701 uid = devx_get_uid(c, cmd_in);
508 return -EPERM; 702 if (uid < 0)
703 return uid;
509 704
510 /* Only white list of some general HCA commands are allowed for this method. */ 705 /* Only white list of some general HCA commands are allowed for this method. */
511 if (!devx_is_general_cmd(cmd_in)) 706 if (!devx_is_general_cmd(cmd_in))
@@ -515,7 +710,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
515 if (IS_ERR(cmd_out)) 710 if (IS_ERR(cmd_out))
516 return PTR_ERR(cmd_out); 711 return PTR_ERR(cmd_out);
517 712
518 MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); 713 MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
519 err = mlx5_cmd_exec(dev->mdev, cmd_in, 714 err = mlx5_cmd_exec(dev->mdev, cmd_in,
520 uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN), 715 uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
521 cmd_out, cmd_out_len); 716 cmd_out, cmd_out_len);
@@ -726,11 +921,15 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
726 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; 921 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
727 struct devx_obj *obj; 922 struct devx_obj *obj;
728 int err; 923 int err;
924 int uid;
925 u32 obj_id;
926 u16 opcode;
729 927
730 if (!c->devx_uid) 928 uid = devx_get_uid(c, cmd_in);
731 return -EPERM; 929 if (uid < 0)
930 return uid;
732 931
733 if (!devx_is_obj_create_cmd(cmd_in)) 932 if (!devx_is_obj_create_cmd(cmd_in, &opcode))
734 return -EINVAL; 933 return -EINVAL;
735 934
736 cmd_out = uverbs_zalloc(attrs, cmd_out_len); 935 cmd_out = uverbs_zalloc(attrs, cmd_out_len);
@@ -741,7 +940,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
741 if (!obj) 940 if (!obj)
742 return -ENOMEM; 941 return -ENOMEM;
743 942
744 MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); 943 MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
944 devx_set_umem_valid(cmd_in);
945
745 err = mlx5_cmd_exec(dev->mdev, cmd_in, 946 err = mlx5_cmd_exec(dev->mdev, cmd_in,
746 uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN), 947 uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN),
747 cmd_out, cmd_out_len); 948 cmd_out, cmd_out_len);
@@ -750,13 +951,15 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
750 951
751 uobj->object = obj; 952 uobj->object = obj;
752 obj->mdev = dev->mdev; 953 obj->mdev = dev->mdev;
753 devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, &obj->obj_id); 954 devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen,
955 &obj_id);
754 WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); 956 WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
755 957
756 err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); 958 err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
757 if (err) 959 if (err)
758 goto obj_destroy; 960 goto obj_destroy;
759 961
962 obj->obj_id = get_enc_obj_id(opcode, obj_id);
760 return 0; 963 return 0;
761 964
762obj_destroy: 965obj_destroy:
@@ -778,9 +981,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
778 struct devx_obj *obj = uobj->object; 981 struct devx_obj *obj = uobj->object;
779 void *cmd_out; 982 void *cmd_out;
780 int err; 983 int err;
984 int uid;
781 985
782 if (!c->devx_uid) 986 uid = devx_get_uid(c, cmd_in);
783 return -EPERM; 987 if (uid < 0)
988 return uid;
784 989
785 if (!devx_is_obj_modify_cmd(cmd_in)) 990 if (!devx_is_obj_modify_cmd(cmd_in))
786 return -EINVAL; 991 return -EINVAL;
@@ -792,7 +997,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
792 if (IS_ERR(cmd_out)) 997 if (IS_ERR(cmd_out))
793 return PTR_ERR(cmd_out); 998 return PTR_ERR(cmd_out);
794 999
795 MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); 1000 MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1001 devx_set_umem_valid(cmd_in);
1002
796 err = mlx5_cmd_exec(obj->mdev, cmd_in, 1003 err = mlx5_cmd_exec(obj->mdev, cmd_in,
797 uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN), 1004 uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
798 cmd_out, cmd_out_len); 1005 cmd_out, cmd_out_len);
@@ -815,9 +1022,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
815 struct devx_obj *obj = uobj->object; 1022 struct devx_obj *obj = uobj->object;
816 void *cmd_out; 1023 void *cmd_out;
817 int err; 1024 int err;
1025 int uid;
818 1026
819 if (!c->devx_uid) 1027 uid = devx_get_uid(c, cmd_in);
820 return -EPERM; 1028 if (uid < 0)
1029 return uid;
821 1030
822 if (!devx_is_obj_query_cmd(cmd_in)) 1031 if (!devx_is_obj_query_cmd(cmd_in))
823 return -EINVAL; 1032 return -EINVAL;
@@ -829,7 +1038,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
829 if (IS_ERR(cmd_out)) 1038 if (IS_ERR(cmd_out))
830 return PTR_ERR(cmd_out); 1039 return PTR_ERR(cmd_out);
831 1040
832 MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid); 1041 MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
833 err = mlx5_cmd_exec(obj->mdev, cmd_in, 1042 err = mlx5_cmd_exec(obj->mdev, cmd_in,
834 uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN), 1043 uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
835 cmd_out, cmd_out_len); 1044 cmd_out, cmd_out_len);
@@ -928,6 +1137,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
928 int err; 1137 int err;
929 1138
930 if (!c->devx_uid) 1139 if (!c->devx_uid)
1140 return -EINVAL;
1141
1142 if (!capable(CAP_NET_RAW))
931 return -EPERM; 1143 return -EPERM;
932 1144
933 obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL); 1145 obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index 1a29f47f836e..f86cdcafdafc 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -7,7 +7,9 @@
7#include <rdma/ib_verbs.h> 7#include <rdma/ib_verbs.h>
8#include <rdma/uverbs_types.h> 8#include <rdma/uverbs_types.h>
9#include <rdma/uverbs_ioctl.h> 9#include <rdma/uverbs_ioctl.h>
10#include <rdma/uverbs_std_types.h>
10#include <rdma/mlx5_user_ioctl_cmds.h> 11#include <rdma/mlx5_user_ioctl_cmds.h>
12#include <rdma/mlx5_user_ioctl_verbs.h>
11#include <rdma/ib_umem.h> 13#include <rdma/ib_umem.h>
12#include <linux/mlx5/driver.h> 14#include <linux/mlx5/driver.h>
13#include <linux/mlx5/fs.h> 15#include <linux/mlx5/fs.h>
@@ -16,6 +18,24 @@
16#define UVERBS_MODULE_NAME mlx5_ib 18#define UVERBS_MODULE_NAME mlx5_ib
17#include <rdma/uverbs_named_ioctl.h> 19#include <rdma/uverbs_named_ioctl.h>
18 20
21static int
22mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
23 enum mlx5_flow_namespace_type *namespace)
24{
25 switch (table_type) {
26 case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
27 *namespace = MLX5_FLOW_NAMESPACE_BYPASS;
28 break;
29 case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
30 *namespace = MLX5_FLOW_NAMESPACE_EGRESS;
31 break;
32 default:
33 return -EINVAL;
34 }
35
36 return 0;
37}
38
19static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { 39static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
20 [MLX5_IB_FLOW_TYPE_NORMAL] = { 40 [MLX5_IB_FLOW_TYPE_NORMAL] = {
21 .type = UVERBS_ATTR_TYPE_PTR_IN, 41 .type = UVERBS_ATTR_TYPE_PTR_IN,
@@ -38,11 +58,15 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
38 }, 58 },
39}; 59};
40 60
61#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
41static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( 62static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
42 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) 63 struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
43{ 64{
65 struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
44 struct mlx5_ib_flow_handler *flow_handler; 66 struct mlx5_ib_flow_handler *flow_handler;
45 struct mlx5_ib_flow_matcher *fs_matcher; 67 struct mlx5_ib_flow_matcher *fs_matcher;
68 struct ib_uobject **arr_flow_actions;
69 struct ib_uflow_resources *uflow_res;
46 void *devx_obj; 70 void *devx_obj;
47 int dest_id, dest_type; 71 int dest_id, dest_type;
48 void *cmd_in; 72 void *cmd_in;
@@ -52,6 +76,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
52 struct ib_uobject *uobj = 76 struct ib_uobject *uobj =
53 uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); 77 uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
54 struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); 78 struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
79 int len, ret, i;
55 80
56 if (!capable(CAP_NET_RAW)) 81 if (!capable(CAP_NET_RAW))
57 return -EPERM; 82 return -EPERM;
@@ -61,7 +86,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
61 dest_qp = uverbs_attr_is_valid(attrs, 86 dest_qp = uverbs_attr_is_valid(attrs,
62 MLX5_IB_ATTR_CREATE_FLOW_DEST_QP); 87 MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
63 88
64 if ((dest_devx && dest_qp) || (!dest_devx && !dest_qp)) 89 fs_matcher = uverbs_attr_get_obj(attrs,
90 MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
91 if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS &&
92 ((dest_devx && dest_qp) || (!dest_devx && !dest_qp)))
93 return -EINVAL;
94
95 if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS &&
96 (dest_devx || dest_qp))
65 return -EINVAL; 97 return -EINVAL;
66 98
67 if (dest_devx) { 99 if (dest_devx) {
@@ -75,7 +107,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
75 */ 107 */
76 if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type)) 108 if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type))
77 return -EINVAL; 109 return -EINVAL;
78 } else { 110 } else if (dest_qp) {
79 struct mlx5_ib_qp *mqp; 111 struct mlx5_ib_qp *mqp;
80 112
81 qp = uverbs_attr_get_obj(attrs, 113 qp = uverbs_attr_get_obj(attrs,
@@ -92,6 +124,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
92 else 124 else
93 dest_id = mqp->raw_packet_qp.rq.tirn; 125 dest_id = mqp->raw_packet_qp.rq.tirn;
94 dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR; 126 dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
127 } else {
128 dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
95 } 129 }
96 130
97 if (dev->rep) 131 if (dev->rep)
@@ -101,16 +135,48 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
101 attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); 135 attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
102 inlen = uverbs_attr_get_len(attrs, 136 inlen = uverbs_attr_get_len(attrs,
103 MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); 137 MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
104 fs_matcher = uverbs_attr_get_obj(attrs, 138
105 MLX5_IB_ATTR_CREATE_FLOW_MATCHER); 139 uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
106 flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, cmd_in, inlen, 140 if (!uflow_res)
141 return -ENOMEM;
142
143 len = uverbs_attr_get_uobjs_arr(attrs,
144 MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
145 for (i = 0; i < len; i++) {
146 struct mlx5_ib_flow_action *maction =
147 to_mflow_act(arr_flow_actions[i]->object);
148
149 ret = parse_flow_flow_action(maction, false, &flow_act);
150 if (ret)
151 goto err_out;
152 flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
153 arr_flow_actions[i]->object);
154 }
155
156 ret = uverbs_copy_from(&flow_act.flow_tag, attrs,
157 MLX5_IB_ATTR_CREATE_FLOW_TAG);
158 if (!ret) {
159 if (flow_act.flow_tag >= BIT(24)) {
160 ret = -EINVAL;
161 goto err_out;
162 }
163 flow_act.flags |= FLOW_ACT_HAS_TAG;
164 }
165
166 flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act,
167 cmd_in, inlen,
107 dest_id, dest_type); 168 dest_id, dest_type);
108 if (IS_ERR(flow_handler)) 169 if (IS_ERR(flow_handler)) {
109 return PTR_ERR(flow_handler); 170 ret = PTR_ERR(flow_handler);
171 goto err_out;
172 }
110 173
111 ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev); 174 ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
112 175
113 return 0; 176 return 0;
177err_out:
178 ib_uverbs_flow_resources_free(uflow_res);
179 return ret;
114} 180}
115 181
116static int flow_matcher_cleanup(struct ib_uobject *uobject, 182static int flow_matcher_cleanup(struct ib_uobject *uobject,
@@ -134,12 +200,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
134 attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); 200 attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
135 struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); 201 struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
136 struct mlx5_ib_flow_matcher *obj; 202 struct mlx5_ib_flow_matcher *obj;
203 u32 flags;
137 int err; 204 int err;
138 205
139 obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL); 206 obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
140 if (!obj) 207 if (!obj)
141 return -ENOMEM; 208 return -ENOMEM;
142 209
210 obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
143 obj->mask_len = uverbs_attr_get_len( 211 obj->mask_len = uverbs_attr_get_len(
144 attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK); 212 attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
145 err = uverbs_copy_from(&obj->matcher_mask, 213 err = uverbs_copy_from(&obj->matcher_mask,
@@ -165,6 +233,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
165 if (err) 233 if (err)
166 goto end; 234 goto end;
167 235
236 err = uverbs_get_flags32(&flags, attrs,
237 MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
238 IB_FLOW_ATTR_FLAGS_EGRESS);
239 if (err)
240 goto end;
241
242 if (flags) {
243 err = mlx5_ib_ft_type_to_namespace(
244 MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, &obj->ns_type);
245 if (err)
246 goto end;
247 }
248
168 uobj->object = obj; 249 uobj->object = obj;
169 obj->mdev = dev->mdev; 250 obj->mdev = dev->mdev;
170 atomic_set(&obj->usecnt, 0); 251 atomic_set(&obj->usecnt, 0);
@@ -175,6 +256,248 @@ end:
175 return err; 256 return err;
176} 257}
177 258
259void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
260{
261 switch (maction->flow_action_raw.sub_type) {
262 case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
263 mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
264 maction->flow_action_raw.action_id);
265 break;
266 case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
267 mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
268 maction->flow_action_raw.action_id);
269 break;
270 case MLX5_IB_FLOW_ACTION_DECAP:
271 break;
272 default:
273 break;
274 }
275}
276
277static struct ib_flow_action *
278mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
279 enum mlx5_ib_uapi_flow_table_type ft_type,
280 u8 num_actions, void *in)
281{
282 enum mlx5_flow_namespace_type namespace;
283 struct mlx5_ib_flow_action *maction;
284 int ret;
285
286 ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
287 if (ret)
288 return ERR_PTR(-EINVAL);
289
290 maction = kzalloc(sizeof(*maction), GFP_KERNEL);
291 if (!maction)
292 return ERR_PTR(-ENOMEM);
293
294 ret = mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in,
295 &maction->flow_action_raw.action_id);
296
297 if (ret) {
298 kfree(maction);
299 return ERR_PTR(ret);
300 }
301 maction->flow_action_raw.sub_type =
302 MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
303 maction->flow_action_raw.dev = dev;
304
305 return &maction->ib_action;
306}
307
308static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
309{
310 return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
311 max_modify_header_actions) ||
312 MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, max_modify_header_actions);
313}
314
315static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
316 struct ib_uverbs_file *file,
317 struct uverbs_attr_bundle *attrs)
318{
319 struct ib_uobject *uobj = uverbs_attr_get_uobject(
320 attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
321 struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
322 enum mlx5_ib_uapi_flow_table_type ft_type;
323 struct ib_flow_action *action;
324 size_t num_actions;
325 void *in;
326 int len;
327 int ret;
328
329 if (!mlx5_ib_modify_header_supported(mdev))
330 return -EOPNOTSUPP;
331
332 in = uverbs_attr_get_alloced_ptr(attrs,
333 MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
334 len = uverbs_attr_get_len(attrs,
335 MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
336
337 if (len % MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto))
338 return -EINVAL;
339
340 ret = uverbs_get_const(&ft_type, attrs,
341 MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
342 if (ret)
343 return ret;
344
345 num_actions = len / MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto),
346 action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
347 if (IS_ERR(action))
348 return PTR_ERR(action);
349
350 uverbs_flow_action_fill_action(action, uobj, uobj->context->device,
351 IB_FLOW_ACTION_UNSPECIFIED);
352
353 return 0;
354}
355
356static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
357 u8 packet_reformat_type,
358 u8 ft_type)
359{
360 switch (packet_reformat_type) {
361 case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
362 if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
363 return MLX5_CAP_FLOWTABLE(ibdev->mdev,
364 encap_general_header);
365 break;
366 case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
367 if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
368 return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
369 reformat_l2_to_l3_tunnel);
370 break;
371 case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
372 if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
373 return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
374 reformat_l3_tunnel_to_l2);
375 break;
376 case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
377 if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
378 return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
379 break;
380 default:
381 break;
382 }
383
384 return false;
385}
386
387static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
388{
389 switch (dv_prt) {
390 case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
391 *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
392 break;
393 case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
394 *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
395 break;
396 case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
397 *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
398 break;
399 default:
400 return -EINVAL;
401 }
402
403 return 0;
404}
405
406static int mlx5_ib_flow_action_create_packet_reformat_ctx(
407 struct mlx5_ib_dev *dev,
408 struct mlx5_ib_flow_action *maction,
409 u8 ft_type, u8 dv_prt,
410 void *in, size_t len)
411{
412 enum mlx5_flow_namespace_type namespace;
413 u8 prm_prt;
414 int ret;
415
416 ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
417 if (ret)
418 return ret;
419
420 ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
421 if (ret)
422 return ret;
423
424 ret = mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
425 in, namespace,
426 &maction->flow_action_raw.action_id);
427 if (ret)
428 return ret;
429
430 maction->flow_action_raw.sub_type =
431 MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
432 maction->flow_action_raw.dev = dev;
433
434 return 0;
435}
436
437static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
438 struct ib_uverbs_file *file,
439 struct uverbs_attr_bundle *attrs)
440{
441 struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
442 MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
443 struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
444 enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
445 enum mlx5_ib_uapi_flow_table_type ft_type;
446 struct mlx5_ib_flow_action *maction;
447 int ret;
448
449 ret = uverbs_get_const(&ft_type, attrs,
450 MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
451 if (ret)
452 return ret;
453
454 ret = uverbs_get_const(&dv_prt, attrs,
455 MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
456 if (ret)
457 return ret;
458
459 if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
460 return -EOPNOTSUPP;
461
462 maction = kzalloc(sizeof(*maction), GFP_KERNEL);
463 if (!maction)
464 return -ENOMEM;
465
466 if (dv_prt ==
467 MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
468 maction->flow_action_raw.sub_type =
469 MLX5_IB_FLOW_ACTION_DECAP;
470 maction->flow_action_raw.dev = mdev;
471 } else {
472 void *in;
473 int len;
474
475 in = uverbs_attr_get_alloced_ptr(attrs,
476 MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
477 if (IS_ERR(in)) {
478 ret = PTR_ERR(in);
479 goto free_maction;
480 }
481
482 len = uverbs_attr_get_len(attrs,
483 MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
484
485 ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
486 maction, ft_type, dv_prt, in, len);
487 if (ret)
488 goto free_maction;
489 }
490
491 uverbs_flow_action_fill_action(&maction->ib_action, uobj,
492 uobj->context->device,
493 IB_FLOW_ACTION_UNSPECIFIED);
494 return 0;
495
496free_maction:
497 kfree(maction);
498 return ret;
499}
500
178DECLARE_UVERBS_NAMED_METHOD( 501DECLARE_UVERBS_NAMED_METHOD(
179 MLX5_IB_METHOD_CREATE_FLOW, 502 MLX5_IB_METHOD_CREATE_FLOW,
180 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE, 503 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
@@ -195,7 +518,15 @@ DECLARE_UVERBS_NAMED_METHOD(
195 UVERBS_ACCESS_READ), 518 UVERBS_ACCESS_READ),
196 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, 519 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
197 MLX5_IB_OBJECT_DEVX_OBJ, 520 MLX5_IB_OBJECT_DEVX_OBJ,
198 UVERBS_ACCESS_READ)); 521 UVERBS_ACCESS_READ),
522 UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
523 UVERBS_OBJECT_FLOW_ACTION,
524 UVERBS_ACCESS_READ, 1,
525 MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
526 UA_OPTIONAL),
527 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
528 UVERBS_ATTR_TYPE(u32),
529 UA_OPTIONAL));
199 530
200DECLARE_UVERBS_NAMED_METHOD_DESTROY( 531DECLARE_UVERBS_NAMED_METHOD_DESTROY(
201 MLX5_IB_METHOD_DESTROY_FLOW, 532 MLX5_IB_METHOD_DESTROY_FLOW,
@@ -210,6 +541,44 @@ ADD_UVERBS_METHODS(mlx5_ib_fs,
210 &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW)); 541 &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
211 542
212DECLARE_UVERBS_NAMED_METHOD( 543DECLARE_UVERBS_NAMED_METHOD(
544 MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
545 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
546 UVERBS_OBJECT_FLOW_ACTION,
547 UVERBS_ACCESS_NEW,
548 UA_MANDATORY),
549 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
550 UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
551 set_action_in_add_action_in_auto)),
552 UA_MANDATORY,
553 UA_ALLOC_AND_COPY),
554 UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
555 enum mlx5_ib_uapi_flow_table_type,
556 UA_MANDATORY));
557
558DECLARE_UVERBS_NAMED_METHOD(
559 MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
560 UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
561 UVERBS_OBJECT_FLOW_ACTION,
562 UVERBS_ACCESS_NEW,
563 UA_MANDATORY),
564 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
565 UVERBS_ATTR_MIN_SIZE(1),
566 UA_ALLOC_AND_COPY,
567 UA_OPTIONAL),
568 UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
569 enum mlx5_ib_uapi_flow_action_packet_reformat_type,
570 UA_MANDATORY),
571 UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
572 enum mlx5_ib_uapi_flow_table_type,
573 UA_MANDATORY));
574
575ADD_UVERBS_METHODS(
576 mlx5_ib_flow_actions,
577 UVERBS_OBJECT_FLOW_ACTION,
578 &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
579 &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
580
581DECLARE_UVERBS_NAMED_METHOD(
213 MLX5_IB_METHOD_FLOW_MATCHER_CREATE, 582 MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
214 UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE, 583 UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
215 MLX5_IB_OBJECT_FLOW_MATCHER, 584 MLX5_IB_OBJECT_FLOW_MATCHER,
@@ -224,7 +593,10 @@ DECLARE_UVERBS_NAMED_METHOD(
224 UA_MANDATORY), 593 UA_MANDATORY),
225 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, 594 UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
226 UVERBS_ATTR_TYPE(u8), 595 UVERBS_ATTR_TYPE(u8),
227 UA_MANDATORY)); 596 UA_MANDATORY),
597 UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
598 enum ib_flow_flags,
599 UA_OPTIONAL));
228 600
229DECLARE_UVERBS_NAMED_METHOD_DESTROY( 601DECLARE_UVERBS_NAMED_METHOD_DESTROY(
230 MLX5_IB_METHOD_FLOW_MATCHER_DESTROY, 602 MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
@@ -247,6 +619,7 @@ int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
247 619
248 root[i++] = &flow_objects; 620 root[i++] = &flow_objects;
249 root[i++] = &mlx5_ib_fs; 621 root[i++] = &mlx5_ib_fs;
622 root[i++] = &mlx5_ib_flow_actions;
250 623
251 return i; 624 return i;
252} 625}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index 35a0e04c38f2..584ff2ea7810 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -39,9 +39,6 @@ static const struct mlx5_ib_profile rep_profile = {
39 STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, 39 STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
40 mlx5_ib_stage_post_ib_reg_umr_init, 40 mlx5_ib_stage_post_ib_reg_umr_init,
41 NULL), 41 NULL),
42 STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
43 mlx5_ib_stage_class_attr_init,
44 NULL),
45}; 42};
46 43
47static int 44static int
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index af32899bb72a..e9c428071df3 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1571,14 +1571,57 @@ static void deallocate_uars(struct mlx5_ib_dev *dev,
1571 mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); 1571 mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
1572} 1572}
1573 1573
1574static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn) 1574int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
1575{
1576 int err = 0;
1577
1578 mutex_lock(&dev->lb.mutex);
1579 if (td)
1580 dev->lb.user_td++;
1581 if (qp)
1582 dev->lb.qps++;
1583
1584 if (dev->lb.user_td == 2 ||
1585 dev->lb.qps == 1) {
1586 if (!dev->lb.enabled) {
1587 err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
1588 dev->lb.enabled = true;
1589 }
1590 }
1591
1592 mutex_unlock(&dev->lb.mutex);
1593
1594 return err;
1595}
1596
1597void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
1598{
1599 mutex_lock(&dev->lb.mutex);
1600 if (td)
1601 dev->lb.user_td--;
1602 if (qp)
1603 dev->lb.qps--;
1604
1605 if (dev->lb.user_td == 1 &&
1606 dev->lb.qps == 0) {
1607 if (dev->lb.enabled) {
1608 mlx5_nic_vport_update_local_lb(dev->mdev, false);
1609 dev->lb.enabled = false;
1610 }
1611 }
1612
1613 mutex_unlock(&dev->lb.mutex);
1614}
1615
1616static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn,
1617 u16 uid)
1575{ 1618{
1576 int err; 1619 int err;
1577 1620
1578 if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) 1621 if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
1579 return 0; 1622 return 0;
1580 1623
1581 err = mlx5_core_alloc_transport_domain(dev->mdev, tdn); 1624 err = mlx5_cmd_alloc_transport_domain(dev->mdev, tdn, uid);
1582 if (err) 1625 if (err)
1583 return err; 1626 return err;
1584 1627
@@ -1587,35 +1630,23 @@ static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
1587 !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) 1630 !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
1588 return err; 1631 return err;
1589 1632
1590 mutex_lock(&dev->lb_mutex); 1633 return mlx5_ib_enable_lb(dev, true, false);
1591 dev->user_td++;
1592
1593 if (dev->user_td == 2)
1594 err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
1595
1596 mutex_unlock(&dev->lb_mutex);
1597 return err;
1598} 1634}
1599 1635
1600static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn) 1636static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn,
1637 u16 uid)
1601{ 1638{
1602 if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) 1639 if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
1603 return; 1640 return;
1604 1641
1605 mlx5_core_dealloc_transport_domain(dev->mdev, tdn); 1642 mlx5_cmd_dealloc_transport_domain(dev->mdev, tdn, uid);
1606 1643
1607 if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || 1644 if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
1608 (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) && 1645 (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) &&
1609 !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) 1646 !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
1610 return; 1647 return;
1611 1648
1612 mutex_lock(&dev->lb_mutex); 1649 mlx5_ib_disable_lb(dev, true, false);
1613 dev->user_td--;
1614
1615 if (dev->user_td < 2)
1616 mlx5_nic_vport_update_local_lb(dev->mdev, false);
1617
1618 mutex_unlock(&dev->lb_mutex);
1619} 1650}
1620 1651
1621static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, 1652static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
@@ -1727,30 +1758,24 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
1727 context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; 1758 context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
1728#endif 1759#endif
1729 1760
1730 err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
1731 if (err)
1732 goto out_uars;
1733
1734 if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { 1761 if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
1735 /* Block DEVX on Infiniband as of SELinux */ 1762 err = mlx5_ib_devx_create(dev);
1736 if (mlx5_ib_port_link_layer(ibdev, 1) != IB_LINK_LAYER_ETHERNET) { 1763 if (err < 0)
1737 err = -EPERM; 1764 goto out_uars;
1738 goto out_td; 1765 context->devx_uid = err;
1739 }
1740
1741 err = mlx5_ib_devx_create(dev, context);
1742 if (err)
1743 goto out_td;
1744 } 1766 }
1745 1767
1768 err = mlx5_ib_alloc_transport_domain(dev, &context->tdn,
1769 context->devx_uid);
1770 if (err)
1771 goto out_devx;
1772
1746 if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { 1773 if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
1747 err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey); 1774 err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey);
1748 if (err) 1775 if (err)
1749 goto out_mdev; 1776 goto out_mdev;
1750 } 1777 }
1751 1778
1752 INIT_LIST_HEAD(&context->vma_private_list);
1753 mutex_init(&context->vma_private_list_mutex);
1754 INIT_LIST_HEAD(&context->db_page_list); 1779 INIT_LIST_HEAD(&context->db_page_list);
1755 mutex_init(&context->db_page_mutex); 1780 mutex_init(&context->db_page_mutex);
1756 1781
@@ -1826,13 +1851,21 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
1826 context->lib_caps = req.lib_caps; 1851 context->lib_caps = req.lib_caps;
1827 print_lib_caps(dev, context->lib_caps); 1852 print_lib_caps(dev, context->lib_caps);
1828 1853
1854 if (mlx5_lag_is_active(dev->mdev)) {
1855 u8 port = mlx5_core_native_port_num(dev->mdev);
1856
1857 atomic_set(&context->tx_port_affinity,
1858 atomic_add_return(
1859 1, &dev->roce[port].tx_port_affinity));
1860 }
1861
1829 return &context->ibucontext; 1862 return &context->ibucontext;
1830 1863
1831out_mdev: 1864out_mdev:
1865 mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
1866out_devx:
1832 if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) 1867 if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
1833 mlx5_ib_devx_destroy(dev, context); 1868 mlx5_ib_devx_destroy(dev, context->devx_uid);
1834out_td:
1835 mlx5_ib_dealloc_transport_domain(dev, context->tdn);
1836 1869
1837out_uars: 1870out_uars:
1838 deallocate_uars(dev, context); 1871 deallocate_uars(dev, context);
@@ -1855,11 +1888,18 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
1855 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 1888 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
1856 struct mlx5_bfreg_info *bfregi; 1889 struct mlx5_bfreg_info *bfregi;
1857 1890
1858 if (context->devx_uid) 1891#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1859 mlx5_ib_devx_destroy(dev, context); 1892 /* All umem's must be destroyed before destroying the ucontext. */
1893 mutex_lock(&ibcontext->per_mm_list_lock);
1894 WARN_ON(!list_empty(&ibcontext->per_mm_list));
1895 mutex_unlock(&ibcontext->per_mm_list_lock);
1896#endif
1860 1897
1861 bfregi = &context->bfregi; 1898 bfregi = &context->bfregi;
1862 mlx5_ib_dealloc_transport_domain(dev, context->tdn); 1899 mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
1900
1901 if (context->devx_uid)
1902 mlx5_ib_devx_destroy(dev, context->devx_uid);
1863 1903
1864 deallocate_uars(dev, context); 1904 deallocate_uars(dev, context);
1865 kfree(bfregi->sys_pages); 1905 kfree(bfregi->sys_pages);
@@ -1900,94 +1940,9 @@ static int get_extended_index(unsigned long offset)
1900 return get_arg(offset) | ((offset >> 16) & 0xff) << 8; 1940 return get_arg(offset) | ((offset >> 16) & 0xff) << 8;
1901} 1941}
1902 1942
1903static void mlx5_ib_vma_open(struct vm_area_struct *area)
1904{
1905 /* vma_open is called when a new VMA is created on top of our VMA. This
1906 * is done through either mremap flow or split_vma (usually due to
1907 * mlock, madvise, munmap, etc.) We do not support a clone of the VMA,
1908 * as this VMA is strongly hardware related. Therefore we set the
1909 * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
1910 * calling us again and trying to do incorrect actions. We assume that
1911 * the original VMA size is exactly a single page, and therefore all
1912 * "splitting" operation will not happen to it.
1913 */
1914 area->vm_ops = NULL;
1915}
1916
1917static void mlx5_ib_vma_close(struct vm_area_struct *area)
1918{
1919 struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data;
1920
1921 /* It's guaranteed that all VMAs opened on a FD are closed before the
1922 * file itself is closed, therefore no sync is needed with the regular
1923 * closing flow. (e.g. mlx5 ib_dealloc_ucontext)
1924 * However need a sync with accessing the vma as part of
1925 * mlx5_ib_disassociate_ucontext.
1926 * The close operation is usually called under mm->mmap_sem except when
1927 * process is exiting.
1928 * The exiting case is handled explicitly as part of
1929 * mlx5_ib_disassociate_ucontext.
1930 */
1931 mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data;
1932
1933 /* setting the vma context pointer to null in the mlx5_ib driver's
1934 * private data, to protect a race condition in
1935 * mlx5_ib_disassociate_ucontext().
1936 */
1937 mlx5_ib_vma_priv_data->vma = NULL;
1938 mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
1939 list_del(&mlx5_ib_vma_priv_data->list);
1940 mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
1941 kfree(mlx5_ib_vma_priv_data);
1942}
1943
1944static const struct vm_operations_struct mlx5_ib_vm_ops = {
1945 .open = mlx5_ib_vma_open,
1946 .close = mlx5_ib_vma_close
1947};
1948
1949static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
1950 struct mlx5_ib_ucontext *ctx)
1951{
1952 struct mlx5_ib_vma_private_data *vma_prv;
1953 struct list_head *vma_head = &ctx->vma_private_list;
1954
1955 vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL);
1956 if (!vma_prv)
1957 return -ENOMEM;
1958
1959 vma_prv->vma = vma;
1960 vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex;
1961 vma->vm_private_data = vma_prv;
1962 vma->vm_ops = &mlx5_ib_vm_ops;
1963
1964 mutex_lock(&ctx->vma_private_list_mutex);
1965 list_add(&vma_prv->list, vma_head);
1966 mutex_unlock(&ctx->vma_private_list_mutex);
1967
1968 return 0;
1969}
1970 1943
1971static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) 1944static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
1972{ 1945{
1973 struct vm_area_struct *vma;
1974 struct mlx5_ib_vma_private_data *vma_private, *n;
1975 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
1976
1977 mutex_lock(&context->vma_private_list_mutex);
1978 list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
1979 list) {
1980 vma = vma_private->vma;
1981 zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
1982 /* context going to be destroyed, should
1983 * not access ops any more.
1984 */
1985 vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
1986 vma->vm_ops = NULL;
1987 list_del(&vma_private->list);
1988 kfree(vma_private);
1989 }
1990 mutex_unlock(&context->vma_private_list_mutex);
1991} 1946}
1992 1947
1993static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) 1948static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
@@ -2010,9 +1965,6 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
2010 struct vm_area_struct *vma, 1965 struct vm_area_struct *vma,
2011 struct mlx5_ib_ucontext *context) 1966 struct mlx5_ib_ucontext *context)
2012{ 1967{
2013 phys_addr_t pfn;
2014 int err;
2015
2016 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1968 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
2017 return -EINVAL; 1969 return -EINVAL;
2018 1970
@@ -2025,13 +1977,8 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
2025 if (!dev->mdev->clock_info_page) 1977 if (!dev->mdev->clock_info_page)
2026 return -EOPNOTSUPP; 1978 return -EOPNOTSUPP;
2027 1979
2028 pfn = page_to_pfn(dev->mdev->clock_info_page); 1980 return rdma_user_mmap_page(&context->ibucontext, vma,
2029 err = remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, 1981 dev->mdev->clock_info_page, PAGE_SIZE);
2030 vma->vm_page_prot);
2031 if (err)
2032 return err;
2033
2034 return mlx5_ib_set_vma_data(vma, context);
2035} 1982}
2036 1983
2037static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, 1984static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
@@ -2121,21 +2068,15 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
2121 pfn = uar_index2pfn(dev, uar_index); 2068 pfn = uar_index2pfn(dev, uar_index);
2122 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn); 2069 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
2123 2070
2124 vma->vm_page_prot = prot; 2071 err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE,
2125 err = io_remap_pfn_range(vma, vma->vm_start, pfn, 2072 prot);
2126 PAGE_SIZE, vma->vm_page_prot);
2127 if (err) { 2073 if (err) {
2128 mlx5_ib_err(dev, 2074 mlx5_ib_err(dev,
2129 "io_remap_pfn_range failed with error=%d, mmap_cmd=%s\n", 2075 "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n",
2130 err, mmap_cmd2str(cmd)); 2076 err, mmap_cmd2str(cmd));
2131 err = -EAGAIN;
2132 goto err; 2077 goto err;
2133 } 2078 }
2134 2079
2135 err = mlx5_ib_set_vma_data(vma, context);
2136 if (err)
2137 goto err;
2138
2139 if (dyn_uar) 2080 if (dyn_uar)
2140 bfregi->sys_pages[idx] = uar_index; 2081 bfregi->sys_pages[idx] = uar_index;
2141 return 0; 2082 return 0;
@@ -2160,7 +2101,6 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
2160 size_t map_size = vma->vm_end - vma->vm_start; 2101 size_t map_size = vma->vm_end - vma->vm_start;
2161 u32 npages = map_size >> PAGE_SHIFT; 2102 u32 npages = map_size >> PAGE_SHIFT;
2162 phys_addr_t pfn; 2103 phys_addr_t pfn;
2163 pgprot_t prot;
2164 2104
2165 if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) != 2105 if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
2166 page_idx + npages) 2106 page_idx + npages)
@@ -2170,14 +2110,8 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
2170 MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >> 2110 MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
2171 PAGE_SHIFT) + 2111 PAGE_SHIFT) +
2172 page_idx; 2112 page_idx;
2173 prot = pgprot_writecombine(vma->vm_page_prot); 2113 return rdma_user_mmap_io(context, vma, pfn, map_size,
2174 vma->vm_page_prot = prot; 2114 pgprot_writecombine(vma->vm_page_prot));
2175
2176 if (io_remap_pfn_range(vma, vma->vm_start, pfn, map_size,
2177 vma->vm_page_prot))
2178 return -EAGAIN;
2179
2180 return mlx5_ib_set_vma_data(vma, mctx);
2181} 2115}
2182 2116
2183static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) 2117static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
@@ -2318,21 +2252,30 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
2318 struct mlx5_ib_alloc_pd_resp resp; 2252 struct mlx5_ib_alloc_pd_resp resp;
2319 struct mlx5_ib_pd *pd; 2253 struct mlx5_ib_pd *pd;
2320 int err; 2254 int err;
2255 u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {};
2256 u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {};
2257 u16 uid = 0;
2321 2258
2322 pd = kmalloc(sizeof(*pd), GFP_KERNEL); 2259 pd = kmalloc(sizeof(*pd), GFP_KERNEL);
2323 if (!pd) 2260 if (!pd)
2324 return ERR_PTR(-ENOMEM); 2261 return ERR_PTR(-ENOMEM);
2325 2262
2326 err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn); 2263 uid = context ? to_mucontext(context)->devx_uid : 0;
2264 MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
2265 MLX5_SET(alloc_pd_in, in, uid, uid);
2266 err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in),
2267 out, sizeof(out));
2327 if (err) { 2268 if (err) {
2328 kfree(pd); 2269 kfree(pd);
2329 return ERR_PTR(err); 2270 return ERR_PTR(err);
2330 } 2271 }
2331 2272
2273 pd->pdn = MLX5_GET(alloc_pd_out, out, pd);
2274 pd->uid = uid;
2332 if (context) { 2275 if (context) {
2333 resp.pdn = pd->pdn; 2276 resp.pdn = pd->pdn;
2334 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { 2277 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
2335 mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn); 2278 mlx5_cmd_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid);
2336 kfree(pd); 2279 kfree(pd);
2337 return ERR_PTR(-EFAULT); 2280 return ERR_PTR(-EFAULT);
2338 } 2281 }
@@ -2346,7 +2289,7 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
2346 struct mlx5_ib_dev *mdev = to_mdev(pd->device); 2289 struct mlx5_ib_dev *mdev = to_mdev(pd->device);
2347 struct mlx5_ib_pd *mpd = to_mpd(pd); 2290 struct mlx5_ib_pd *mpd = to_mpd(pd);
2348 2291
2349 mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn); 2292 mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
2350 kfree(mpd); 2293 kfree(mpd);
2351 2294
2352 return 0; 2295 return 0;
@@ -2452,20 +2395,50 @@ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
2452 offsetof(typeof(filter), field) -\ 2395 offsetof(typeof(filter), field) -\
2453 sizeof(filter.field)) 2396 sizeof(filter.field))
2454 2397
2455static int parse_flow_flow_action(const union ib_flow_spec *ib_spec, 2398int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
2456 const struct ib_flow_attr *flow_attr, 2399 bool is_egress,
2457 struct mlx5_flow_act *action) 2400 struct mlx5_flow_act *action)
2458{ 2401{
2459 struct mlx5_ib_flow_action *maction = to_mflow_act(ib_spec->action.act);
2460 2402
2461 switch (maction->ib_action.type) { 2403 switch (maction->ib_action.type) {
2462 case IB_FLOW_ACTION_ESP: 2404 case IB_FLOW_ACTION_ESP:
2405 if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
2406 MLX5_FLOW_CONTEXT_ACTION_DECRYPT))
2407 return -EINVAL;
2463 /* Currently only AES_GCM keymat is supported by the driver */ 2408 /* Currently only AES_GCM keymat is supported by the driver */
2464 action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; 2409 action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
2465 action->action |= flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS ? 2410 action->action |= is_egress ?
2466 MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : 2411 MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
2467 MLX5_FLOW_CONTEXT_ACTION_DECRYPT; 2412 MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
2468 return 0; 2413 return 0;
2414 case IB_FLOW_ACTION_UNSPECIFIED:
2415 if (maction->flow_action_raw.sub_type ==
2416 MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
2417 if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
2418 return -EINVAL;
2419 action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2420 action->modify_id = maction->flow_action_raw.action_id;
2421 return 0;
2422 }
2423 if (maction->flow_action_raw.sub_type ==
2424 MLX5_IB_FLOW_ACTION_DECAP) {
2425 if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
2426 return -EINVAL;
2427 action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2428 return 0;
2429 }
2430 if (maction->flow_action_raw.sub_type ==
2431 MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
2432 if (action->action &
2433 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
2434 return -EINVAL;
2435 action->action |=
2436 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
2437 action->reformat_id =
2438 maction->flow_action_raw.action_id;
2439 return 0;
2440 }
2441 /* fall through */
2469 default: 2442 default:
2470 return -EOPNOTSUPP; 2443 return -EOPNOTSUPP;
2471 } 2444 }
@@ -2802,7 +2775,8 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
2802 action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; 2775 action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
2803 break; 2776 break;
2804 case IB_FLOW_SPEC_ACTION_HANDLE: 2777 case IB_FLOW_SPEC_ACTION_HANDLE:
2805 ret = parse_flow_flow_action(ib_spec, flow_attr, action); 2778 ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
2779 flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
2806 if (ret) 2780 if (ret)
2807 return ret; 2781 return ret;
2808 break; 2782 break;
@@ -2883,7 +2857,7 @@ is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
2883 * rules would be supported, always return VALID_SPEC_NA. 2857 * rules would be supported, always return VALID_SPEC_NA.
2884 */ 2858 */
2885 if (!is_crypto) 2859 if (!is_crypto)
2886 return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA; 2860 return VALID_SPEC_NA;
2887 2861
2888 return is_crypto && is_ipsec && 2862 return is_crypto && is_ipsec &&
2889 (!egress || (!is_drop && !(flow_act->flags & FLOW_ACT_HAS_TAG))) ? 2863 (!egress || (!is_drop && !(flow_act->flags & FLOW_ACT_HAS_TAG))) ?
@@ -3026,14 +3000,15 @@ enum flow_table_type {
3026static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, 3000static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
3027 struct mlx5_ib_flow_prio *prio, 3001 struct mlx5_ib_flow_prio *prio,
3028 int priority, 3002 int priority,
3029 int num_entries, int num_groups) 3003 int num_entries, int num_groups,
3004 u32 flags)
3030{ 3005{
3031 struct mlx5_flow_table *ft; 3006 struct mlx5_flow_table *ft;
3032 3007
3033 ft = mlx5_create_auto_grouped_flow_table(ns, priority, 3008 ft = mlx5_create_auto_grouped_flow_table(ns, priority,
3034 num_entries, 3009 num_entries,
3035 num_groups, 3010 num_groups,
3036 0, 0); 3011 0, flags);
3037 if (IS_ERR(ft)) 3012 if (IS_ERR(ft))
3038 return ERR_CAST(ft); 3013 return ERR_CAST(ft);
3039 3014
@@ -3053,26 +3028,43 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
3053 int max_table_size; 3028 int max_table_size;
3054 int num_entries; 3029 int num_entries;
3055 int num_groups; 3030 int num_groups;
3031 u32 flags = 0;
3056 int priority; 3032 int priority;
3057 3033
3058 max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, 3034 max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
3059 log_max_ft_size)); 3035 log_max_ft_size));
3060 if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { 3036 if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
3061 if (ft_type == MLX5_IB_FT_TX) 3037 enum mlx5_flow_namespace_type fn_type;
3062 priority = 0; 3038
3063 else if (flow_is_multicast_only(flow_attr) && 3039 if (flow_is_multicast_only(flow_attr) &&
3064 !dont_trap) 3040 !dont_trap)
3065 priority = MLX5_IB_FLOW_MCAST_PRIO; 3041 priority = MLX5_IB_FLOW_MCAST_PRIO;
3066 else 3042 else
3067 priority = ib_prio_to_core_prio(flow_attr->priority, 3043 priority = ib_prio_to_core_prio(flow_attr->priority,
3068 dont_trap); 3044 dont_trap);
3069 ns = mlx5_get_flow_namespace(dev->mdev, 3045 if (ft_type == MLX5_IB_FT_RX) {
3070 ft_type == MLX5_IB_FT_TX ? 3046 fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
3071 MLX5_FLOW_NAMESPACE_EGRESS : 3047 prio = &dev->flow_db->prios[priority];
3072 MLX5_FLOW_NAMESPACE_BYPASS); 3048 if (!dev->rep &&
3049 MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
3050 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
3051 if (!dev->rep &&
3052 MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
3053 reformat_l3_tunnel_to_l2))
3054 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
3055 } else {
3056 max_table_size =
3057 BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
3058 log_max_ft_size));
3059 fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
3060 prio = &dev->flow_db->egress_prios[priority];
3061 if (!dev->rep &&
3062 MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
3063 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
3064 }
3065 ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
3073 num_entries = MLX5_FS_MAX_ENTRIES; 3066 num_entries = MLX5_FS_MAX_ENTRIES;
3074 num_groups = MLX5_FS_MAX_TYPES; 3067 num_groups = MLX5_FS_MAX_TYPES;
3075 prio = &dev->flow_db->prios[priority];
3076 } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 3068 } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
3077 flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { 3069 flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
3078 ns = mlx5_get_flow_namespace(dev->mdev, 3070 ns = mlx5_get_flow_namespace(dev->mdev,
@@ -3104,7 +3096,8 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
3104 3096
3105 ft = prio->flow_table; 3097 ft = prio->flow_table;
3106 if (!ft) 3098 if (!ft)
3107 return _get_prio(ns, prio, priority, num_entries, num_groups); 3099 return _get_prio(ns, prio, priority, num_entries, num_groups,
3100 flags);
3108 3101
3109 return prio; 3102 return prio;
3110} 3103}
@@ -3271,6 +3264,9 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
3271 if (!is_valid_attr(dev->mdev, flow_attr)) 3264 if (!is_valid_attr(dev->mdev, flow_attr))
3272 return ERR_PTR(-EINVAL); 3265 return ERR_PTR(-EINVAL);
3273 3266
3267 if (dev->rep && is_egress)
3268 return ERR_PTR(-EINVAL);
3269
3274 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 3270 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
3275 handler = kzalloc(sizeof(*handler), GFP_KERNEL); 3271 handler = kzalloc(sizeof(*handler), GFP_KERNEL);
3276 if (!handler || !spec) { 3272 if (!handler || !spec) {
@@ -3661,34 +3657,54 @@ free_ucmd:
3661 return ERR_PTR(err); 3657 return ERR_PTR(err);
3662} 3658}
3663 3659
3664static struct mlx5_ib_flow_prio *_get_flow_table(struct mlx5_ib_dev *dev, 3660static struct mlx5_ib_flow_prio *
3665 int priority, bool mcast) 3661_get_flow_table(struct mlx5_ib_dev *dev,
3662 struct mlx5_ib_flow_matcher *fs_matcher,
3663 bool mcast)
3666{ 3664{
3667 int max_table_size;
3668 struct mlx5_flow_namespace *ns = NULL; 3665 struct mlx5_flow_namespace *ns = NULL;
3669 struct mlx5_ib_flow_prio *prio; 3666 struct mlx5_ib_flow_prio *prio;
3667 int max_table_size;
3668 u32 flags = 0;
3669 int priority;
3670
3671 if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
3672 max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
3673 log_max_ft_size));
3674 if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
3675 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
3676 if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
3677 reformat_l3_tunnel_to_l2))
3678 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
3679 } else { /* Can only be MLX5_FLOW_NAMESPACE_EGRESS */
3680 max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
3681 log_max_ft_size));
3682 if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
3683 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
3684 }
3670 3685
3671 max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
3672 log_max_ft_size));
3673 if (max_table_size < MLX5_FS_MAX_ENTRIES) 3686 if (max_table_size < MLX5_FS_MAX_ENTRIES)
3674 return ERR_PTR(-ENOMEM); 3687 return ERR_PTR(-ENOMEM);
3675 3688
3676 if (mcast) 3689 if (mcast)
3677 priority = MLX5_IB_FLOW_MCAST_PRIO; 3690 priority = MLX5_IB_FLOW_MCAST_PRIO;
3678 else 3691 else
3679 priority = ib_prio_to_core_prio(priority, false); 3692 priority = ib_prio_to_core_prio(fs_matcher->priority, false);
3680 3693
3681 ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS); 3694 ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type);
3682 if (!ns) 3695 if (!ns)
3683 return ERR_PTR(-ENOTSUPP); 3696 return ERR_PTR(-ENOTSUPP);
3684 3697
3685 prio = &dev->flow_db->prios[priority]; 3698 if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS)
3699 prio = &dev->flow_db->prios[priority];
3700 else
3701 prio = &dev->flow_db->egress_prios[priority];
3686 3702
3687 if (prio->flow_table) 3703 if (prio->flow_table)
3688 return prio; 3704 return prio;
3689 3705
3690 return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES, 3706 return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES,
3691 MLX5_FS_MAX_TYPES); 3707 MLX5_FS_MAX_TYPES, flags);
3692} 3708}
3693 3709
3694static struct mlx5_ib_flow_handler * 3710static struct mlx5_ib_flow_handler *
@@ -3696,10 +3712,10 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
3696 struct mlx5_ib_flow_prio *ft_prio, 3712 struct mlx5_ib_flow_prio *ft_prio,
3697 struct mlx5_flow_destination *dst, 3713 struct mlx5_flow_destination *dst,
3698 struct mlx5_ib_flow_matcher *fs_matcher, 3714 struct mlx5_ib_flow_matcher *fs_matcher,
3715 struct mlx5_flow_act *flow_act,
3699 void *cmd_in, int inlen) 3716 void *cmd_in, int inlen)
3700{ 3717{
3701 struct mlx5_ib_flow_handler *handler; 3718 struct mlx5_ib_flow_handler *handler;
3702 struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
3703 struct mlx5_flow_spec *spec; 3719 struct mlx5_flow_spec *spec;
3704 struct mlx5_flow_table *ft = ft_prio->flow_table; 3720 struct mlx5_flow_table *ft = ft_prio->flow_table;
3705 int err = 0; 3721 int err = 0;
@@ -3718,9 +3734,8 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
3718 fs_matcher->mask_len); 3734 fs_matcher->mask_len);
3719 spec->match_criteria_enable = fs_matcher->match_criteria_enable; 3735 spec->match_criteria_enable = fs_matcher->match_criteria_enable;
3720 3736
3721 flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3722 handler->rule = mlx5_add_flow_rules(ft, spec, 3737 handler->rule = mlx5_add_flow_rules(ft, spec,
3723 &flow_act, dst, 1); 3738 flow_act, dst, 1);
3724 3739
3725 if (IS_ERR(handler->rule)) { 3740 if (IS_ERR(handler->rule)) {
3726 err = PTR_ERR(handler->rule); 3741 err = PTR_ERR(handler->rule);
@@ -3782,12 +3797,12 @@ static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
3782struct mlx5_ib_flow_handler * 3797struct mlx5_ib_flow_handler *
3783mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, 3798mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
3784 struct mlx5_ib_flow_matcher *fs_matcher, 3799 struct mlx5_ib_flow_matcher *fs_matcher,
3800 struct mlx5_flow_act *flow_act,
3785 void *cmd_in, int inlen, int dest_id, 3801 void *cmd_in, int inlen, int dest_id,
3786 int dest_type) 3802 int dest_type)
3787{ 3803{
3788 struct mlx5_flow_destination *dst; 3804 struct mlx5_flow_destination *dst;
3789 struct mlx5_ib_flow_prio *ft_prio; 3805 struct mlx5_ib_flow_prio *ft_prio;
3790 int priority = fs_matcher->priority;
3791 struct mlx5_ib_flow_handler *handler; 3806 struct mlx5_ib_flow_handler *handler;
3792 bool mcast; 3807 bool mcast;
3793 int err; 3808 int err;
@@ -3805,7 +3820,7 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
3805 mcast = raw_fs_is_multicast(fs_matcher, cmd_in); 3820 mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
3806 mutex_lock(&dev->flow_db->lock); 3821 mutex_lock(&dev->flow_db->lock);
3807 3822
3808 ft_prio = _get_flow_table(dev, priority, mcast); 3823 ft_prio = _get_flow_table(dev, fs_matcher, mcast);
3809 if (IS_ERR(ft_prio)) { 3824 if (IS_ERR(ft_prio)) {
3810 err = PTR_ERR(ft_prio); 3825 err = PTR_ERR(ft_prio);
3811 goto unlock; 3826 goto unlock;
@@ -3814,13 +3829,18 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
3814 if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { 3829 if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
3815 dst->type = dest_type; 3830 dst->type = dest_type;
3816 dst->tir_num = dest_id; 3831 dst->tir_num = dest_id;
3817 } else { 3832 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3833 } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
3818 dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; 3834 dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
3819 dst->ft_num = dest_id; 3835 dst->ft_num = dest_id;
3836 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3837 } else {
3838 dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
3839 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
3820 } 3840 }
3821 3841
3822 handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, cmd_in, 3842 handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act,
3823 inlen); 3843 cmd_in, inlen);
3824 3844
3825 if (IS_ERR(handler)) { 3845 if (IS_ERR(handler)) {
3826 err = PTR_ERR(handler); 3846 err = PTR_ERR(handler);
@@ -3998,6 +4018,9 @@ static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
3998 */ 4018 */
3999 mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); 4019 mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
4000 break; 4020 break;
4021 case IB_FLOW_ACTION_UNSPECIFIED:
4022 mlx5_ib_destroy_flow_action_raw(maction);
4023 break;
4001 default: 4024 default:
4002 WARN_ON(true); 4025 WARN_ON(true);
4003 break; 4026 break;
@@ -4012,13 +4035,17 @@ static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
4012 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 4035 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
4013 struct mlx5_ib_qp *mqp = to_mqp(ibqp); 4036 struct mlx5_ib_qp *mqp = to_mqp(ibqp);
4014 int err; 4037 int err;
4038 u16 uid;
4039
4040 uid = ibqp->pd ?
4041 to_mpd(ibqp->pd)->uid : 0;
4015 4042
4016 if (mqp->flags & MLX5_IB_QP_UNDERLAY) { 4043 if (mqp->flags & MLX5_IB_QP_UNDERLAY) {
4017 mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n"); 4044 mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n");
4018 return -EOPNOTSUPP; 4045 return -EOPNOTSUPP;
4019 } 4046 }
4020 4047
4021 err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num); 4048 err = mlx5_cmd_attach_mcg(dev->mdev, gid, ibqp->qp_num, uid);
4022 if (err) 4049 if (err)
4023 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n", 4050 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
4024 ibqp->qp_num, gid->raw); 4051 ibqp->qp_num, gid->raw);
@@ -4030,8 +4057,11 @@ static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
4030{ 4057{
4031 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 4058 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
4032 int err; 4059 int err;
4060 u16 uid;
4033 4061
4034 err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num); 4062 uid = ibqp->pd ?
4063 to_mpd(ibqp->pd)->uid : 0;
4064 err = mlx5_cmd_detach_mcg(dev->mdev, gid, ibqp->qp_num, uid);
4035 if (err) 4065 if (err)
4036 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n", 4066 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
4037 ibqp->qp_num, gid->raw); 4067 ibqp->qp_num, gid->raw);
@@ -4052,16 +4082,17 @@ static int init_node_data(struct mlx5_ib_dev *dev)
4052 return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid); 4082 return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
4053} 4083}
4054 4084
4055static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr, 4085static ssize_t fw_pages_show(struct device *device,
4056 char *buf) 4086 struct device_attribute *attr, char *buf)
4057{ 4087{
4058 struct mlx5_ib_dev *dev = 4088 struct mlx5_ib_dev *dev =
4059 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 4089 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
4060 4090
4061 return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages); 4091 return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
4062} 4092}
4093static DEVICE_ATTR_RO(fw_pages);
4063 4094
4064static ssize_t show_reg_pages(struct device *device, 4095static ssize_t reg_pages_show(struct device *device,
4065 struct device_attribute *attr, char *buf) 4096 struct device_attribute *attr, char *buf)
4066{ 4097{
4067 struct mlx5_ib_dev *dev = 4098 struct mlx5_ib_dev *dev =
@@ -4069,44 +4100,47 @@ static ssize_t show_reg_pages(struct device *device,
4069 4100
4070 return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); 4101 return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
4071} 4102}
4103static DEVICE_ATTR_RO(reg_pages);
4072 4104
4073static ssize_t show_hca(struct device *device, struct device_attribute *attr, 4105static ssize_t hca_type_show(struct device *device,
4074 char *buf) 4106 struct device_attribute *attr, char *buf)
4075{ 4107{
4076 struct mlx5_ib_dev *dev = 4108 struct mlx5_ib_dev *dev =
4077 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 4109 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
4078 return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); 4110 return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
4079} 4111}
4112static DEVICE_ATTR_RO(hca_type);
4080 4113
4081static ssize_t show_rev(struct device *device, struct device_attribute *attr, 4114static ssize_t hw_rev_show(struct device *device,
4082 char *buf) 4115 struct device_attribute *attr, char *buf)
4083{ 4116{
4084 struct mlx5_ib_dev *dev = 4117 struct mlx5_ib_dev *dev =
4085 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 4118 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
4086 return sprintf(buf, "%x\n", dev->mdev->rev_id); 4119 return sprintf(buf, "%x\n", dev->mdev->rev_id);
4087} 4120}
4121static DEVICE_ATTR_RO(hw_rev);
4088 4122
4089static ssize_t show_board(struct device *device, struct device_attribute *attr, 4123static ssize_t board_id_show(struct device *device,
4090 char *buf) 4124 struct device_attribute *attr, char *buf)
4091{ 4125{
4092 struct mlx5_ib_dev *dev = 4126 struct mlx5_ib_dev *dev =
4093 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 4127 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
4094 return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, 4128 return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
4095 dev->mdev->board_id); 4129 dev->mdev->board_id);
4096} 4130}
4131static DEVICE_ATTR_RO(board_id);
4097 4132
4098static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 4133static struct attribute *mlx5_class_attributes[] = {
4099static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 4134 &dev_attr_hw_rev.attr,
4100static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 4135 &dev_attr_hca_type.attr,
4101static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL); 4136 &dev_attr_board_id.attr,
4102static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL); 4137 &dev_attr_fw_pages.attr,
4138 &dev_attr_reg_pages.attr,
4139 NULL,
4140};
4103 4141
4104static struct device_attribute *mlx5_class_attributes[] = { 4142static const struct attribute_group mlx5_attr_group = {
4105 &dev_attr_hw_rev, 4143 .attrs = mlx5_class_attributes,
4106 &dev_attr_hca_type,
4107 &dev_attr_board_id,
4108 &dev_attr_fw_pages,
4109 &dev_attr_reg_pages,
4110}; 4144};
4111 4145
4112static void pkey_change_handler(struct work_struct *work) 4146static void pkey_change_handler(struct work_struct *work)
@@ -5631,7 +5665,6 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
5631int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) 5665int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
5632{ 5666{
5633 struct mlx5_core_dev *mdev = dev->mdev; 5667 struct mlx5_core_dev *mdev = dev->mdev;
5634 const char *name;
5635 int err; 5668 int err;
5636 int i; 5669 int i;
5637 5670
@@ -5664,12 +5697,6 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
5664 if (mlx5_use_mad_ifc(dev)) 5697 if (mlx5_use_mad_ifc(dev))
5665 get_ext_port_caps(dev); 5698 get_ext_port_caps(dev);
5666 5699
5667 if (!mlx5_lag_is_active(mdev))
5668 name = "mlx5_%d";
5669 else
5670 name = "mlx5_bond_%d";
5671
5672 strlcpy(dev->ib_dev.name, name, IB_DEVICE_NAME_MAX);
5673 dev->ib_dev.owner = THIS_MODULE; 5700 dev->ib_dev.owner = THIS_MODULE;
5674 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 5701 dev->ib_dev.node_type = RDMA_NODE_IB_CA;
5675 dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; 5702 dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
@@ -5876,7 +5903,7 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
5876 if ((MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && 5903 if ((MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
5877 (MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) || 5904 (MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) ||
5878 MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) 5905 MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
5879 mutex_init(&dev->lb_mutex); 5906 mutex_init(&dev->lb.mutex);
5880 5907
5881 return 0; 5908 return 0;
5882} 5909}
@@ -6083,7 +6110,14 @@ static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev)
6083 6110
6084int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) 6111int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
6085{ 6112{
6086 return ib_register_device(&dev->ib_dev, NULL); 6113 const char *name;
6114
6115 rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group);
6116 if (!mlx5_lag_is_active(dev->mdev))
6117 name = "mlx5_%d";
6118 else
6119 name = "mlx5_bond_%d";
6120 return ib_register_device(&dev->ib_dev, name, NULL);
6087} 6121}
6088 6122
6089void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) 6123void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
@@ -6113,21 +6147,6 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
6113 cancel_delay_drop(dev); 6147 cancel_delay_drop(dev);
6114} 6148}
6115 6149
6116int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev)
6117{
6118 int err;
6119 int i;
6120
6121 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
6122 err = device_create_file(&dev->ib_dev.dev,
6123 mlx5_class_attributes[i]);
6124 if (err)
6125 return err;
6126 }
6127
6128 return 0;
6129}
6130
6131static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev) 6150static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
6132{ 6151{
6133 mlx5_ib_register_vport_reps(dev); 6152 mlx5_ib_register_vport_reps(dev);
@@ -6151,6 +6170,8 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
6151 profile->stage[stage].cleanup(dev); 6170 profile->stage[stage].cleanup(dev);
6152 } 6171 }
6153 6172
6173 if (dev->devx_whitelist_uid)
6174 mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
6154 ib_dealloc_device((struct ib_device *)dev); 6175 ib_dealloc_device((struct ib_device *)dev);
6155} 6176}
6156 6177
@@ -6159,8 +6180,7 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
6159{ 6180{
6160 int err; 6181 int err;
6161 int i; 6182 int i;
6162 6183 int uid;
6163 printk_once(KERN_INFO "%s", mlx5_version);
6164 6184
6165 for (i = 0; i < MLX5_IB_STAGE_MAX; i++) { 6185 for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
6166 if (profile->stage[i].init) { 6186 if (profile->stage[i].init) {
@@ -6170,6 +6190,10 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
6170 } 6190 }
6171 } 6191 }
6172 6192
6193 uid = mlx5_ib_devx_create(dev);
6194 if (uid > 0)
6195 dev->devx_whitelist_uid = uid;
6196
6173 dev->profile = profile; 6197 dev->profile = profile;
6174 dev->ib_active = true; 6198 dev->ib_active = true;
6175 6199
@@ -6230,9 +6254,6 @@ static const struct mlx5_ib_profile pf_profile = {
6230 STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP, 6254 STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
6231 mlx5_ib_stage_delay_drop_init, 6255 mlx5_ib_stage_delay_drop_init,
6232 mlx5_ib_stage_delay_drop_cleanup), 6256 mlx5_ib_stage_delay_drop_cleanup),
6233 STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
6234 mlx5_ib_stage_class_attr_init,
6235 NULL),
6236}; 6257};
6237 6258
6238static const struct mlx5_ib_profile nic_rep_profile = { 6259static const struct mlx5_ib_profile nic_rep_profile = {
@@ -6275,9 +6296,6 @@ static const struct mlx5_ib_profile nic_rep_profile = {
6275 STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, 6296 STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
6276 mlx5_ib_stage_post_ib_reg_umr_init, 6297 mlx5_ib_stage_post_ib_reg_umr_init,
6277 NULL), 6298 NULL),
6278 STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR,
6279 mlx5_ib_stage_class_attr_init,
6280 NULL),
6281 STAGE_CREATE(MLX5_IB_STAGE_REP_REG, 6299 STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
6282 mlx5_ib_stage_rep_reg_init, 6300 mlx5_ib_stage_rep_reg_init,
6283 mlx5_ib_stage_rep_reg_cleanup), 6301 mlx5_ib_stage_rep_reg_cleanup),
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index f3dbd75a0a96..549234988bb4 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -57,7 +57,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
57 int entry; 57 int entry;
58 unsigned long page_shift = umem->page_shift; 58 unsigned long page_shift = umem->page_shift;
59 59
60 if (umem->odp_data) { 60 if (umem->is_odp) {
61 *ncont = ib_umem_page_count(umem); 61 *ncont = ib_umem_page_count(umem);
62 *count = *ncont << (page_shift - PAGE_SHIFT); 62 *count = *ncont << (page_shift - PAGE_SHIFT);
63 *shift = page_shift; 63 *shift = page_shift;
@@ -152,14 +152,13 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
152 struct scatterlist *sg; 152 struct scatterlist *sg;
153 int entry; 153 int entry;
154#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 154#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
155 const bool odp = umem->odp_data != NULL; 155 if (umem->is_odp) {
156
157 if (odp) {
158 WARN_ON(shift != 0); 156 WARN_ON(shift != 0);
159 WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)); 157 WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
160 158
161 for (i = 0; i < num_pages; ++i) { 159 for (i = 0; i < num_pages; ++i) {
162 dma_addr_t pa = umem->odp_data->dma_list[offset + i]; 160 dma_addr_t pa =
161 to_ib_umem_odp(umem)->dma_list[offset + i];
163 162
164 pas[i] = cpu_to_be64(umem_dma_to_mtt(pa)); 163 pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
165 } 164 }
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 289c18db2611..b651a7a6fde9 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -39,8 +39,10 @@
39#include <rdma/ib_smi.h> 39#include <rdma/ib_smi.h>
40#include <linux/mlx5/driver.h> 40#include <linux/mlx5/driver.h>
41#include <linux/mlx5/cq.h> 41#include <linux/mlx5/cq.h>
42#include <linux/mlx5/fs.h>
42#include <linux/mlx5/qp.h> 43#include <linux/mlx5/qp.h>
43#include <linux/mlx5/srq.h> 44#include <linux/mlx5/srq.h>
45#include <linux/mlx5/fs.h>
44#include <linux/types.h> 46#include <linux/types.h>
45#include <linux/mlx5/transobj.h> 47#include <linux/mlx5/transobj.h>
46#include <rdma/ib_user_verbs.h> 48#include <rdma/ib_user_verbs.h>
@@ -48,17 +50,17 @@
48#include <rdma/uverbs_ioctl.h> 50#include <rdma/uverbs_ioctl.h>
49#include <rdma/mlx5_user_ioctl_cmds.h> 51#include <rdma/mlx5_user_ioctl_cmds.h>
50 52
51#define mlx5_ib_dbg(dev, format, arg...) \ 53#define mlx5_ib_dbg(_dev, format, arg...) \
52pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ 54 dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
53 __LINE__, current->pid, ##arg) 55 __LINE__, current->pid, ##arg)
54 56
55#define mlx5_ib_err(dev, format, arg...) \ 57#define mlx5_ib_err(_dev, format, arg...) \
56pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ 58 dev_err(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
57 __LINE__, current->pid, ##arg) 59 __LINE__, current->pid, ##arg)
58 60
59#define mlx5_ib_warn(dev, format, arg...) \ 61#define mlx5_ib_warn(_dev, format, arg...) \
60pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ 62 dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
61 __LINE__, current->pid, ##arg) 63 __LINE__, current->pid, ##arg)
62 64
63#define field_avail(type, fld, sz) (offsetof(type, fld) + \ 65#define field_avail(type, fld, sz) (offsetof(type, fld) + \
64 sizeof(((type *)0)->fld) <= (sz)) 66 sizeof(((type *)0)->fld) <= (sz))
@@ -114,13 +116,6 @@ enum {
114 MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN, 116 MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN,
115}; 117};
116 118
117struct mlx5_ib_vma_private_data {
118 struct list_head list;
119 struct vm_area_struct *vma;
120 /* protect vma_private_list add/del */
121 struct mutex *vma_private_list_mutex;
122};
123
124struct mlx5_ib_ucontext { 119struct mlx5_ib_ucontext {
125 struct ib_ucontext ibucontext; 120 struct ib_ucontext ibucontext;
126 struct list_head db_page_list; 121 struct list_head db_page_list;
@@ -132,13 +127,12 @@ struct mlx5_ib_ucontext {
132 u8 cqe_version; 127 u8 cqe_version;
133 /* Transport Domain number */ 128 /* Transport Domain number */
134 u32 tdn; 129 u32 tdn;
135 struct list_head vma_private_list;
136 /* protect vma_private_list add/del */
137 struct mutex vma_private_list_mutex;
138 130
139 u64 lib_caps; 131 u64 lib_caps;
140 DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES); 132 DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES);
141 u16 devx_uid; 133 u16 devx_uid;
134 /* For RoCE LAG TX affinity */
135 atomic_t tx_port_affinity;
142}; 136};
143 137
144static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) 138static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -149,6 +143,13 @@ static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibuconte
149struct mlx5_ib_pd { 143struct mlx5_ib_pd {
150 struct ib_pd ibpd; 144 struct ib_pd ibpd;
151 u32 pdn; 145 u32 pdn;
146 u16 uid;
147};
148
149enum {
150 MLX5_IB_FLOW_ACTION_MODIFY_HEADER,
151 MLX5_IB_FLOW_ACTION_PACKET_REFORMAT,
152 MLX5_IB_FLOW_ACTION_DECAP,
152}; 153};
153 154
154#define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1) 155#define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1)
@@ -180,6 +181,7 @@ struct mlx5_ib_flow_matcher {
180 struct mlx5_ib_match_params matcher_mask; 181 struct mlx5_ib_match_params matcher_mask;
181 int mask_len; 182 int mask_len;
182 enum mlx5_ib_flow_type flow_type; 183 enum mlx5_ib_flow_type flow_type;
184 enum mlx5_flow_namespace_type ns_type;
183 u16 priority; 185 u16 priority;
184 struct mlx5_core_dev *mdev; 186 struct mlx5_core_dev *mdev;
185 atomic_t usecnt; 187 atomic_t usecnt;
@@ -188,6 +190,7 @@ struct mlx5_ib_flow_matcher {
188 190
189struct mlx5_ib_flow_db { 191struct mlx5_ib_flow_db {
190 struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; 192 struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT];
193 struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT];
191 struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; 194 struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS];
192 struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS]; 195 struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS];
193 struct mlx5_flow_table *lag_demux_ft; 196 struct mlx5_flow_table *lag_demux_ft;
@@ -322,6 +325,7 @@ enum {
322struct mlx5_ib_rwq_ind_table { 325struct mlx5_ib_rwq_ind_table {
323 struct ib_rwq_ind_table ib_rwq_ind_tbl; 326 struct ib_rwq_ind_table ib_rwq_ind_tbl;
324 u32 rqtn; 327 u32 rqtn;
328 u16 uid;
325}; 329};
326 330
327struct mlx5_ib_ubuffer { 331struct mlx5_ib_ubuffer {
@@ -428,7 +432,7 @@ struct mlx5_ib_qp {
428 struct list_head cq_send_list; 432 struct list_head cq_send_list;
429 struct mlx5_rate_limit rl; 433 struct mlx5_rate_limit rl;
430 u32 underlay_qpn; 434 u32 underlay_qpn;
431 bool tunnel_offload_en; 435 u32 flags_en;
432 /* storage for qp sub type when core qp type is IB_QPT_DRIVER */ 436 /* storage for qp sub type when core qp type is IB_QPT_DRIVER */
433 enum ib_qp_type qp_sub_type; 437 enum ib_qp_type qp_sub_type;
434}; 438};
@@ -536,6 +540,7 @@ struct mlx5_ib_srq {
536struct mlx5_ib_xrcd { 540struct mlx5_ib_xrcd {
537 struct ib_xrcd ibxrcd; 541 struct ib_xrcd ibxrcd;
538 u32 xrcdn; 542 u32 xrcdn;
543 u16 uid;
539}; 544};
540 545
541enum mlx5_ib_mtt_access_flags { 546enum mlx5_ib_mtt_access_flags {
@@ -700,7 +705,7 @@ struct mlx5_roce {
700 rwlock_t netdev_lock; 705 rwlock_t netdev_lock;
701 struct net_device *netdev; 706 struct net_device *netdev;
702 struct notifier_block nb; 707 struct notifier_block nb;
703 atomic_t next_port; 708 atomic_t tx_port_affinity;
704 enum ib_port_state last_port_state; 709 enum ib_port_state last_port_state;
705 struct mlx5_ib_dev *dev; 710 struct mlx5_ib_dev *dev;
706 u8 native_port_num; 711 u8 native_port_num;
@@ -815,6 +820,11 @@ struct mlx5_ib_flow_action {
815 u64 ib_flags; 820 u64 ib_flags;
816 struct mlx5_accel_esp_xfrm *ctx; 821 struct mlx5_accel_esp_xfrm *ctx;
817 } esp_aes_gcm; 822 } esp_aes_gcm;
823 struct {
824 struct mlx5_ib_dev *dev;
825 u32 sub_type;
826 u32 action_id;
827 } flow_action_raw;
818 }; 828 };
819}; 829};
820 830
@@ -859,9 +869,20 @@ to_mcounters(struct ib_counters *ibcntrs)
859 return container_of(ibcntrs, struct mlx5_ib_mcounters, ibcntrs); 869 return container_of(ibcntrs, struct mlx5_ib_mcounters, ibcntrs);
860} 870}
861 871
872int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
873 bool is_egress,
874 struct mlx5_flow_act *action);
875struct mlx5_ib_lb_state {
876 /* protect the user_td */
877 struct mutex mutex;
878 u32 user_td;
879 int qps;
880 bool enabled;
881};
882
862struct mlx5_ib_dev { 883struct mlx5_ib_dev {
863 struct ib_device ib_dev; 884 struct ib_device ib_dev;
864 const struct uverbs_object_tree_def *driver_trees[6]; 885 const struct uverbs_object_tree_def *driver_trees[7];
865 struct mlx5_core_dev *mdev; 886 struct mlx5_core_dev *mdev;
866 struct mlx5_roce roce[MLX5_MAX_PORTS]; 887 struct mlx5_roce roce[MLX5_MAX_PORTS];
867 int num_ports; 888 int num_ports;
@@ -900,13 +921,12 @@ struct mlx5_ib_dev {
900 const struct mlx5_ib_profile *profile; 921 const struct mlx5_ib_profile *profile;
901 struct mlx5_eswitch_rep *rep; 922 struct mlx5_eswitch_rep *rep;
902 923
903 /* protect the user_td */ 924 struct mlx5_ib_lb_state lb;
904 struct mutex lb_mutex;
905 u32 user_td;
906 u8 umr_fence; 925 u8 umr_fence;
907 struct list_head ib_dev_list; 926 struct list_head ib_dev_list;
908 u64 sys_image_guid; 927 u64 sys_image_guid;
909 struct mlx5_memic memic; 928 struct mlx5_memic memic;
929 u16 devx_whitelist_uid;
910}; 930};
911 931
912static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) 932static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1017,6 +1037,8 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
1017int mlx5_ib_destroy_srq(struct ib_srq *srq); 1037int mlx5_ib_destroy_srq(struct ib_srq *srq);
1018int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, 1038int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
1019 const struct ib_recv_wr **bad_wr); 1039 const struct ib_recv_wr **bad_wr);
1040int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
1041void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
1020struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, 1042struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
1021 struct ib_qp_init_attr *init_attr, 1043 struct ib_qp_init_attr *init_attr,
1022 struct ib_udata *udata); 1044 struct ib_udata *udata);
@@ -1106,7 +1128,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
1106void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, 1128void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
1107 int page_shift, __be64 *pas, int access_flags); 1129 int page_shift, __be64 *pas, int access_flags);
1108void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); 1130void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
1109int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq); 1131int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
1110int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); 1132int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
1111int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); 1133int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
1112 1134
@@ -1141,7 +1163,7 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
1141int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev); 1163int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
1142int __init mlx5_ib_odp_init(void); 1164int __init mlx5_ib_odp_init(void);
1143void mlx5_ib_odp_cleanup(void); 1165void mlx5_ib_odp_cleanup(void);
1144void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, 1166void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
1145 unsigned long end); 1167 unsigned long end);
1146void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent); 1168void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
1147void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, 1169void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
@@ -1180,7 +1202,6 @@ void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev);
1180int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev); 1202int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev);
1181void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev); 1203void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev);
1182int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev); 1204int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev);
1183int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev);
1184void __mlx5_ib_remove(struct mlx5_ib_dev *dev, 1205void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
1185 const struct mlx5_ib_profile *profile, 1206 const struct mlx5_ib_profile *profile,
1186 int stage); 1207 int stage);
@@ -1229,22 +1250,20 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
1229 u8 port_num); 1250 u8 port_num);
1230 1251
1231#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) 1252#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
1232int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, 1253int mlx5_ib_devx_create(struct mlx5_ib_dev *dev);
1233 struct mlx5_ib_ucontext *context); 1254void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
1234void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
1235 struct mlx5_ib_ucontext *context);
1236const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void); 1255const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
1237struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( 1256struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
1238 struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, 1257 struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
1239 void *cmd_in, int inlen, int dest_id, int dest_type); 1258 struct mlx5_flow_act *flow_act, void *cmd_in, int inlen,
1259 int dest_id, int dest_type);
1240bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); 1260bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
1241int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root); 1261int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root);
1262void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction);
1242#else 1263#else
1243static inline int 1264static inline int
1244mlx5_ib_devx_create(struct mlx5_ib_dev *dev, 1265mlx5_ib_devx_create(struct mlx5_ib_dev *dev) { return -EOPNOTSUPP; };
1245 struct mlx5_ib_ucontext *context) { return -EOPNOTSUPP; }; 1266static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
1246static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
1247 struct mlx5_ib_ucontext *context) {}
1248static inline const struct uverbs_object_tree_def * 1267static inline const struct uverbs_object_tree_def *
1249mlx5_ib_get_devx_tree(void) { return NULL; } 1268mlx5_ib_get_devx_tree(void) { return NULL; }
1250static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, 1269static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
@@ -1257,6 +1276,11 @@ mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
1257{ 1276{
1258 return 0; 1277 return 0;
1259} 1278}
1279static inline void
1280mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
1281{
1282 return;
1283};
1260#endif 1284#endif
1261static inline void init_query_mad(struct ib_smp *mad) 1285static inline void init_query_mad(struct ib_smp *mad)
1262{ 1286{
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index e22314837645..9b195d65a13e 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -98,7 +98,7 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
98#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 98#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
99static void update_odp_mr(struct mlx5_ib_mr *mr) 99static void update_odp_mr(struct mlx5_ib_mr *mr)
100{ 100{
101 if (mr->umem->odp_data) { 101 if (mr->umem->is_odp) {
102 /* 102 /*
103 * This barrier prevents the compiler from moving the 103 * This barrier prevents the compiler from moving the
104 * setting of umem->odp_data->private to point to our 104 * setting of umem->odp_data->private to point to our
@@ -107,7 +107,7 @@ static void update_odp_mr(struct mlx5_ib_mr *mr)
107 * handle invalidations. 107 * handle invalidations.
108 */ 108 */
109 smp_wmb(); 109 smp_wmb();
110 mr->umem->odp_data->private = mr; 110 to_ib_umem_odp(mr->umem)->private = mr;
111 /* 111 /*
112 * Make sure we will see the new 112 * Make sure we will see the new
113 * umem->odp_data->private value in the invalidation 113 * umem->odp_data->private value in the invalidation
@@ -691,7 +691,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
691 init_completion(&ent->compl); 691 init_completion(&ent->compl);
692 INIT_WORK(&ent->work, cache_work_func); 692 INIT_WORK(&ent->work, cache_work_func);
693 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 693 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
694 queue_work(cache->wq, &ent->work);
695 694
696 if (i > MR_CACHE_LAST_STD_ENTRY) { 695 if (i > MR_CACHE_LAST_STD_ENTRY) {
697 mlx5_odp_init_mr_cache_entry(ent); 696 mlx5_odp_init_mr_cache_entry(ent);
@@ -711,6 +710,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
711 ent->limit = dev->mdev->profile->mr_cache[i].limit; 710 ent->limit = dev->mdev->profile->mr_cache[i].limit;
712 else 711 else
713 ent->limit = 0; 712 ent->limit = 0;
713 queue_work(cache->wq, &ent->work);
714 } 714 }
715 715
716 err = mlx5_mr_cache_debugfs_init(dev); 716 err = mlx5_mr_cache_debugfs_init(dev);
@@ -1627,14 +1627,16 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1627 struct ib_umem *umem = mr->umem; 1627 struct ib_umem *umem = mr->umem;
1628 1628
1629#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1629#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1630 if (umem && umem->odp_data) { 1630 if (umem && umem->is_odp) {
1631 struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem);
1632
1631 /* Prevent new page faults from succeeding */ 1633 /* Prevent new page faults from succeeding */
1632 mr->live = 0; 1634 mr->live = 0;
1633 /* Wait for all running page-fault handlers to finish. */ 1635 /* Wait for all running page-fault handlers to finish. */
1634 synchronize_srcu(&dev->mr_srcu); 1636 synchronize_srcu(&dev->mr_srcu);
1635 /* Destroy all page mappings */ 1637 /* Destroy all page mappings */
1636 if (umem->odp_data->page_list) 1638 if (umem_odp->page_list)
1637 mlx5_ib_invalidate_range(umem, ib_umem_start(umem), 1639 mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem),
1638 ib_umem_end(umem)); 1640 ib_umem_end(umem));
1639 else 1641 else
1640 mlx5_ib_free_implicit_mr(mr); 1642 mlx5_ib_free_implicit_mr(mr);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index d216e0d2921d..b04eb6775326 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -61,13 +61,21 @@ static int check_parent(struct ib_umem_odp *odp,
61 return mr && mr->parent == parent && !odp->dying; 61 return mr && mr->parent == parent && !odp->dying;
62} 62}
63 63
64struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr)
65{
66 if (WARN_ON(!mr || !mr->umem || !mr->umem->is_odp))
67 return NULL;
68
69 return to_ib_umem_odp(mr->umem)->per_mm;
70}
71
64static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp) 72static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
65{ 73{
66 struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent; 74 struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent;
67 struct ib_ucontext *ctx = odp->umem->context; 75 struct ib_ucontext_per_mm *per_mm = odp->per_mm;
68 struct rb_node *rb; 76 struct rb_node *rb;
69 77
70 down_read(&ctx->umem_rwsem); 78 down_read(&per_mm->umem_rwsem);
71 while (1) { 79 while (1) {
72 rb = rb_next(&odp->interval_tree.rb); 80 rb = rb_next(&odp->interval_tree.rb);
73 if (!rb) 81 if (!rb)
@@ -79,19 +87,19 @@ static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
79not_found: 87not_found:
80 odp = NULL; 88 odp = NULL;
81end: 89end:
82 up_read(&ctx->umem_rwsem); 90 up_read(&per_mm->umem_rwsem);
83 return odp; 91 return odp;
84} 92}
85 93
86static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx, 94static struct ib_umem_odp *odp_lookup(u64 start, u64 length,
87 u64 start, u64 length,
88 struct mlx5_ib_mr *parent) 95 struct mlx5_ib_mr *parent)
89{ 96{
97 struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(parent);
90 struct ib_umem_odp *odp; 98 struct ib_umem_odp *odp;
91 struct rb_node *rb; 99 struct rb_node *rb;
92 100
93 down_read(&ctx->umem_rwsem); 101 down_read(&per_mm->umem_rwsem);
94 odp = rbt_ib_umem_lookup(&ctx->umem_tree, start, length); 102 odp = rbt_ib_umem_lookup(&per_mm->umem_tree, start, length);
95 if (!odp) 103 if (!odp)
96 goto end; 104 goto end;
97 105
@@ -102,13 +110,13 @@ static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx,
102 if (!rb) 110 if (!rb)
103 goto not_found; 111 goto not_found;
104 odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); 112 odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
105 if (ib_umem_start(odp->umem) > start + length) 113 if (ib_umem_start(&odp->umem) > start + length)
106 goto not_found; 114 goto not_found;
107 } 115 }
108not_found: 116not_found:
109 odp = NULL; 117 odp = NULL;
110end: 118end:
111 up_read(&ctx->umem_rwsem); 119 up_read(&per_mm->umem_rwsem);
112 return odp; 120 return odp;
113} 121}
114 122
@@ -116,7 +124,6 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
116 size_t nentries, struct mlx5_ib_mr *mr, int flags) 124 size_t nentries, struct mlx5_ib_mr *mr, int flags)
117{ 125{
118 struct ib_pd *pd = mr->ibmr.pd; 126 struct ib_pd *pd = mr->ibmr.pd;
119 struct ib_ucontext *ctx = pd->uobject->context;
120 struct mlx5_ib_dev *dev = to_mdev(pd->device); 127 struct mlx5_ib_dev *dev = to_mdev(pd->device);
121 struct ib_umem_odp *odp; 128 struct ib_umem_odp *odp;
122 unsigned long va; 129 unsigned long va;
@@ -131,13 +138,13 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
131 return; 138 return;
132 } 139 }
133 140
134 odp = odp_lookup(ctx, offset * MLX5_IMR_MTT_SIZE, 141 odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE,
135 nentries * MLX5_IMR_MTT_SIZE, mr); 142 nentries * MLX5_IMR_MTT_SIZE, mr);
136 143
137 for (i = 0; i < nentries; i++, pklm++) { 144 for (i = 0; i < nentries; i++, pklm++) {
138 pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); 145 pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
139 va = (offset + i) * MLX5_IMR_MTT_SIZE; 146 va = (offset + i) * MLX5_IMR_MTT_SIZE;
140 if (odp && odp->umem->address == va) { 147 if (odp && odp->umem.address == va) {
141 struct mlx5_ib_mr *mtt = odp->private; 148 struct mlx5_ib_mr *mtt = odp->private;
142 149
143 pklm->key = cpu_to_be32(mtt->ibmr.lkey); 150 pklm->key = cpu_to_be32(mtt->ibmr.lkey);
@@ -153,13 +160,13 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
153static void mr_leaf_free_action(struct work_struct *work) 160static void mr_leaf_free_action(struct work_struct *work)
154{ 161{
155 struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); 162 struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
156 int idx = ib_umem_start(odp->umem) >> MLX5_IMR_MTT_SHIFT; 163 int idx = ib_umem_start(&odp->umem) >> MLX5_IMR_MTT_SHIFT;
157 struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; 164 struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
158 165
159 mr->parent = NULL; 166 mr->parent = NULL;
160 synchronize_srcu(&mr->dev->mr_srcu); 167 synchronize_srcu(&mr->dev->mr_srcu);
161 168
162 ib_umem_release(odp->umem); 169 ib_umem_release(&odp->umem);
163 if (imr->live) 170 if (imr->live)
164 mlx5_ib_update_xlt(imr, idx, 1, 0, 171 mlx5_ib_update_xlt(imr, idx, 1, 0,
165 MLX5_IB_UPD_XLT_INDIRECT | 172 MLX5_IB_UPD_XLT_INDIRECT |
@@ -170,22 +177,24 @@ static void mr_leaf_free_action(struct work_struct *work)
170 wake_up(&imr->q_leaf_free); 177 wake_up(&imr->q_leaf_free);
171} 178}
172 179
173void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, 180void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
174 unsigned long end) 181 unsigned long end)
175{ 182{
176 struct mlx5_ib_mr *mr; 183 struct mlx5_ib_mr *mr;
177 const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / 184 const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT /
178 sizeof(struct mlx5_mtt)) - 1; 185 sizeof(struct mlx5_mtt)) - 1;
179 u64 idx = 0, blk_start_idx = 0; 186 u64 idx = 0, blk_start_idx = 0;
187 struct ib_umem *umem;
180 int in_block = 0; 188 int in_block = 0;
181 u64 addr; 189 u64 addr;
182 190
183 if (!umem || !umem->odp_data) { 191 if (!umem_odp) {
184 pr_err("invalidation called on NULL umem or non-ODP umem\n"); 192 pr_err("invalidation called on NULL umem or non-ODP umem\n");
185 return; 193 return;
186 } 194 }
195 umem = &umem_odp->umem;
187 196
188 mr = umem->odp_data->private; 197 mr = umem_odp->private;
189 198
190 if (!mr || !mr->ibmr.pd) 199 if (!mr || !mr->ibmr.pd)
191 return; 200 return;
@@ -208,7 +217,7 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
208 * estimate the cost of another UMR vs. the cost of bigger 217 * estimate the cost of another UMR vs. the cost of bigger
209 * UMR. 218 * UMR.
210 */ 219 */
211 if (umem->odp_data->dma_list[idx] & 220 if (umem_odp->dma_list[idx] &
212 (ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) { 221 (ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) {
213 if (!in_block) { 222 if (!in_block) {
214 blk_start_idx = idx; 223 blk_start_idx = idx;
@@ -237,13 +246,13 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
237 * needed. 246 * needed.
238 */ 247 */
239 248
240 ib_umem_odp_unmap_dma_pages(umem, start, end); 249 ib_umem_odp_unmap_dma_pages(umem_odp, start, end);
241 250
242 if (unlikely(!umem->npages && mr->parent && 251 if (unlikely(!umem->npages && mr->parent &&
243 !umem->odp_data->dying)) { 252 !umem_odp->dying)) {
244 WRITE_ONCE(umem->odp_data->dying, 1); 253 WRITE_ONCE(umem_odp->dying, 1);
245 atomic_inc(&mr->parent->num_leaf_free); 254 atomic_inc(&mr->parent->num_leaf_free);
246 schedule_work(&umem->odp_data->work); 255 schedule_work(&umem_odp->work);
247 } 256 }
248} 257}
249 258
@@ -366,16 +375,15 @@ fail:
366static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr, 375static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr,
367 u64 io_virt, size_t bcnt) 376 u64 io_virt, size_t bcnt)
368{ 377{
369 struct ib_ucontext *ctx = mr->ibmr.pd->uobject->context;
370 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device); 378 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device);
371 struct ib_umem_odp *odp, *result = NULL; 379 struct ib_umem_odp *odp, *result = NULL;
380 struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
372 u64 addr = io_virt & MLX5_IMR_MTT_MASK; 381 u64 addr = io_virt & MLX5_IMR_MTT_MASK;
373 int nentries = 0, start_idx = 0, ret; 382 int nentries = 0, start_idx = 0, ret;
374 struct mlx5_ib_mr *mtt; 383 struct mlx5_ib_mr *mtt;
375 struct ib_umem *umem;
376 384
377 mutex_lock(&mr->umem->odp_data->umem_mutex); 385 mutex_lock(&odp_mr->umem_mutex);
378 odp = odp_lookup(ctx, addr, 1, mr); 386 odp = odp_lookup(addr, 1, mr);
379 387
380 mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n", 388 mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n",
381 io_virt, bcnt, addr, odp); 389 io_virt, bcnt, addr, odp);
@@ -385,22 +393,23 @@ next_mr:
385 if (nentries) 393 if (nentries)
386 nentries++; 394 nentries++;
387 } else { 395 } else {
388 umem = ib_alloc_odp_umem(ctx, addr, MLX5_IMR_MTT_SIZE); 396 odp = ib_alloc_odp_umem(odp_mr->per_mm, addr,
389 if (IS_ERR(umem)) { 397 MLX5_IMR_MTT_SIZE);
390 mutex_unlock(&mr->umem->odp_data->umem_mutex); 398 if (IS_ERR(odp)) {
391 return ERR_CAST(umem); 399 mutex_unlock(&odp_mr->umem_mutex);
400 return ERR_CAST(odp);
392 } 401 }
393 402
394 mtt = implicit_mr_alloc(mr->ibmr.pd, umem, 0, mr->access_flags); 403 mtt = implicit_mr_alloc(mr->ibmr.pd, &odp->umem, 0,
404 mr->access_flags);
395 if (IS_ERR(mtt)) { 405 if (IS_ERR(mtt)) {
396 mutex_unlock(&mr->umem->odp_data->umem_mutex); 406 mutex_unlock(&odp_mr->umem_mutex);
397 ib_umem_release(umem); 407 ib_umem_release(&odp->umem);
398 return ERR_CAST(mtt); 408 return ERR_CAST(mtt);
399 } 409 }
400 410
401 odp = umem->odp_data;
402 odp->private = mtt; 411 odp->private = mtt;
403 mtt->umem = umem; 412 mtt->umem = &odp->umem;
404 mtt->mmkey.iova = addr; 413 mtt->mmkey.iova = addr;
405 mtt->parent = mr; 414 mtt->parent = mr;
406 INIT_WORK(&odp->work, mr_leaf_free_action); 415 INIT_WORK(&odp->work, mr_leaf_free_action);
@@ -417,7 +426,7 @@ next_mr:
417 addr += MLX5_IMR_MTT_SIZE; 426 addr += MLX5_IMR_MTT_SIZE;
418 if (unlikely(addr < io_virt + bcnt)) { 427 if (unlikely(addr < io_virt + bcnt)) {
419 odp = odp_next(odp); 428 odp = odp_next(odp);
420 if (odp && odp->umem->address != addr) 429 if (odp && odp->umem.address != addr)
421 odp = NULL; 430 odp = NULL;
422 goto next_mr; 431 goto next_mr;
423 } 432 }
@@ -432,7 +441,7 @@ next_mr:
432 } 441 }
433 } 442 }
434 443
435 mutex_unlock(&mr->umem->odp_data->umem_mutex); 444 mutex_unlock(&odp_mr->umem_mutex);
436 return result; 445 return result;
437} 446}
438 447
@@ -460,36 +469,36 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
460 return imr; 469 return imr;
461} 470}
462 471
463static int mr_leaf_free(struct ib_umem *umem, u64 start, 472static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end,
464 u64 end, void *cookie) 473 void *cookie)
465{ 474{
466 struct mlx5_ib_mr *mr = umem->odp_data->private, *imr = cookie; 475 struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie;
476 struct ib_umem *umem = &umem_odp->umem;
467 477
468 if (mr->parent != imr) 478 if (mr->parent != imr)
469 return 0; 479 return 0;
470 480
471 ib_umem_odp_unmap_dma_pages(umem, 481 ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
472 ib_umem_start(umem),
473 ib_umem_end(umem)); 482 ib_umem_end(umem));
474 483
475 if (umem->odp_data->dying) 484 if (umem_odp->dying)
476 return 0; 485 return 0;
477 486
478 WRITE_ONCE(umem->odp_data->dying, 1); 487 WRITE_ONCE(umem_odp->dying, 1);
479 atomic_inc(&imr->num_leaf_free); 488 atomic_inc(&imr->num_leaf_free);
480 schedule_work(&umem->odp_data->work); 489 schedule_work(&umem_odp->work);
481 490
482 return 0; 491 return 0;
483} 492}
484 493
485void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) 494void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
486{ 495{
487 struct ib_ucontext *ctx = imr->ibmr.pd->uobject->context; 496 struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr);
488 497
489 down_read(&ctx->umem_rwsem); 498 down_read(&per_mm->umem_rwsem);
490 rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX, 499 rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, 0, ULLONG_MAX,
491 mr_leaf_free, true, imr); 500 mr_leaf_free, true, imr);
492 up_read(&ctx->umem_rwsem); 501 up_read(&per_mm->umem_rwsem);
493 502
494 wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); 503 wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
495} 504}
@@ -497,6 +506,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
497static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, 506static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
498 u64 io_virt, size_t bcnt, u32 *bytes_mapped) 507 u64 io_virt, size_t bcnt, u32 *bytes_mapped)
499{ 508{
509 struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
500 u64 access_mask = ODP_READ_ALLOWED_BIT; 510 u64 access_mask = ODP_READ_ALLOWED_BIT;
501 int npages = 0, page_shift, np; 511 int npages = 0, page_shift, np;
502 u64 start_idx, page_mask; 512 u64 start_idx, page_mask;
@@ -505,7 +515,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
505 size_t size; 515 size_t size;
506 int ret; 516 int ret;
507 517
508 if (!mr->umem->odp_data->page_list) { 518 if (!odp_mr->page_list) {
509 odp = implicit_mr_get_data(mr, io_virt, bcnt); 519 odp = implicit_mr_get_data(mr, io_virt, bcnt);
510 520
511 if (IS_ERR(odp)) 521 if (IS_ERR(odp))
@@ -513,11 +523,11 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
513 mr = odp->private; 523 mr = odp->private;
514 524
515 } else { 525 } else {
516 odp = mr->umem->odp_data; 526 odp = odp_mr;
517 } 527 }
518 528
519next_mr: 529next_mr:
520 size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt); 530 size = min_t(size_t, bcnt, ib_umem_end(&odp->umem) - io_virt);
521 531
522 page_shift = mr->umem->page_shift; 532 page_shift = mr->umem->page_shift;
523 page_mask = ~(BIT(page_shift) - 1); 533 page_mask = ~(BIT(page_shift) - 1);
@@ -533,7 +543,7 @@ next_mr:
533 */ 543 */
534 smp_rmb(); 544 smp_rmb();
535 545
536 ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size, 546 ret = ib_umem_odp_map_dma_pages(to_ib_umem_odp(mr->umem), io_virt, size,
537 access_mask, current_seq); 547 access_mask, current_seq);
538 548
539 if (ret < 0) 549 if (ret < 0)
@@ -542,7 +552,8 @@ next_mr:
542 np = ret; 552 np = ret;
543 553
544 mutex_lock(&odp->umem_mutex); 554 mutex_lock(&odp->umem_mutex);
545 if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) { 555 if (!ib_umem_mmu_notifier_retry(to_ib_umem_odp(mr->umem),
556 current_seq)) {
546 /* 557 /*
547 * No need to check whether the MTTs really belong to 558 * No need to check whether the MTTs really belong to
548 * this MR, since ib_umem_odp_map_dma_pages already 559 * this MR, since ib_umem_odp_map_dma_pages already
@@ -575,7 +586,7 @@ next_mr:
575 586
576 io_virt += size; 587 io_virt += size;
577 next = odp_next(odp); 588 next = odp_next(odp);
578 if (unlikely(!next || next->umem->address != io_virt)) { 589 if (unlikely(!next || next->umem.address != io_virt)) {
579 mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n", 590 mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
580 io_virt, next); 591 io_virt, next);
581 return -EAGAIN; 592 return -EAGAIN;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index daf1eb84cd31..6841c0f9237f 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -37,6 +37,7 @@
37#include <linux/mlx5/fs.h> 37#include <linux/mlx5/fs.h>
38#include "mlx5_ib.h" 38#include "mlx5_ib.h"
39#include "ib_rep.h" 39#include "ib_rep.h"
40#include "cmd.h"
40 41
41/* not supported currently */ 42/* not supported currently */
42static int wq_signature; 43static int wq_signature;
@@ -850,6 +851,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
850 goto err_umem; 851 goto err_umem;
851 } 852 }
852 853
854 MLX5_SET(create_qp_in, *in, uid, to_mpd(pd)->uid);
853 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); 855 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
854 if (ubuffer->umem) 856 if (ubuffer->umem)
855 mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); 857 mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
@@ -1051,7 +1053,8 @@ static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
1051 1053
1052static int is_connected(enum ib_qp_type qp_type) 1054static int is_connected(enum ib_qp_type qp_type)
1053{ 1055{
1054 if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC) 1056 if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC ||
1057 qp_type == MLX5_IB_QPT_DCI)
1055 return 1; 1058 return 1;
1056 1059
1057 return 0; 1060 return 0;
@@ -1059,11 +1062,13 @@ static int is_connected(enum ib_qp_type qp_type)
1059 1062
1060static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, 1063static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
1061 struct mlx5_ib_qp *qp, 1064 struct mlx5_ib_qp *qp,
1062 struct mlx5_ib_sq *sq, u32 tdn) 1065 struct mlx5_ib_sq *sq, u32 tdn,
1066 struct ib_pd *pd)
1063{ 1067{
1064 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; 1068 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
1065 void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); 1069 void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
1066 1070
1071 MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid);
1067 MLX5_SET(tisc, tisc, transport_domain, tdn); 1072 MLX5_SET(tisc, tisc, transport_domain, tdn);
1068 if (qp->flags & MLX5_IB_QP_UNDERLAY) 1073 if (qp->flags & MLX5_IB_QP_UNDERLAY)
1069 MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn); 1074 MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
@@ -1072,9 +1077,9 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
1072} 1077}
1073 1078
1074static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev, 1079static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
1075 struct mlx5_ib_sq *sq) 1080 struct mlx5_ib_sq *sq, struct ib_pd *pd)
1076{ 1081{
1077 mlx5_core_destroy_tis(dev->mdev, sq->tisn); 1082 mlx5_cmd_destroy_tis(dev->mdev, sq->tisn, to_mpd(pd)->uid);
1078} 1083}
1079 1084
1080static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev, 1085static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
@@ -1114,6 +1119,7 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
1114 goto err_umem; 1119 goto err_umem;
1115 } 1120 }
1116 1121
1122 MLX5_SET(create_sq_in, in, uid, to_mpd(pd)->uid);
1117 sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); 1123 sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1118 MLX5_SET(sqc, sqc, flush_in_error_en, 1); 1124 MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1119 if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe)) 1125 if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe))
@@ -1188,7 +1194,7 @@ static size_t get_rq_pas_size(void *qpc)
1188 1194
1189static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, 1195static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
1190 struct mlx5_ib_rq *rq, void *qpin, 1196 struct mlx5_ib_rq *rq, void *qpin,
1191 size_t qpinlen) 1197 size_t qpinlen, struct ib_pd *pd)
1192{ 1198{
1193 struct mlx5_ib_qp *mqp = rq->base.container_mibqp; 1199 struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
1194 __be64 *pas; 1200 __be64 *pas;
@@ -1209,6 +1215,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
1209 if (!in) 1215 if (!in)
1210 return -ENOMEM; 1216 return -ENOMEM;
1211 1217
1218 MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid);
1212 rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); 1219 rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
1213 if (!(rq->flags & MLX5_IB_RQ_CVLAN_STRIPPING)) 1220 if (!(rq->flags & MLX5_IB_RQ_CVLAN_STRIPPING))
1214 MLX5_SET(rqc, rqc, vsd, 1); 1221 MLX5_SET(rqc, rqc, vsd, 1);
@@ -1256,10 +1263,23 @@ static bool tunnel_offload_supported(struct mlx5_core_dev *dev)
1256 MLX5_CAP_ETH(dev, tunnel_stateless_geneve_rx)); 1263 MLX5_CAP_ETH(dev, tunnel_stateless_geneve_rx));
1257} 1264}
1258 1265
1266static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
1267 struct mlx5_ib_rq *rq,
1268 u32 qp_flags_en,
1269 struct ib_pd *pd)
1270{
1271 if (qp_flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
1272 MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC))
1273 mlx5_ib_disable_lb(dev, false, true);
1274 mlx5_cmd_destroy_tir(dev->mdev, rq->tirn, to_mpd(pd)->uid);
1275}
1276
1259static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, 1277static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
1260 struct mlx5_ib_rq *rq, u32 tdn, 1278 struct mlx5_ib_rq *rq, u32 tdn,
1261 bool tunnel_offload_en) 1279 u32 *qp_flags_en,
1280 struct ib_pd *pd)
1262{ 1281{
1282 u8 lb_flag = 0;
1263 u32 *in; 1283 u32 *in;
1264 void *tirc; 1284 void *tirc;
1265 int inlen; 1285 int inlen;
@@ -1270,33 +1290,45 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
1270 if (!in) 1290 if (!in)
1271 return -ENOMEM; 1291 return -ENOMEM;
1272 1292
1293 MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid);
1273 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); 1294 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1274 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); 1295 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
1275 MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn); 1296 MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
1276 MLX5_SET(tirc, tirc, transport_domain, tdn); 1297 MLX5_SET(tirc, tirc, transport_domain, tdn);
1277 if (tunnel_offload_en) 1298 if (*qp_flags_en & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
1278 MLX5_SET(tirc, tirc, tunneled_offload_en, 1); 1299 MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
1279 1300
1280 if (dev->rep) 1301 if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC)
1281 MLX5_SET(tirc, tirc, self_lb_block, 1302 lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
1282 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST); 1303
1304 if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)
1305 lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
1306
1307 if (dev->rep) {
1308 lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
1309 *qp_flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
1310 }
1311
1312 MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
1283 1313
1284 err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn); 1314 err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
1285 1315
1316 if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
1317 err = mlx5_ib_enable_lb(dev, false, true);
1318
1319 if (err)
1320 destroy_raw_packet_qp_tir(dev, rq, 0, pd);
1321 }
1286 kvfree(in); 1322 kvfree(in);
1287 1323
1288 return err; 1324 return err;
1289} 1325}
1290 1326
1291static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
1292 struct mlx5_ib_rq *rq)
1293{
1294 mlx5_core_destroy_tir(dev->mdev, rq->tirn);
1295}
1296
1297static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, 1327static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1298 u32 *in, size_t inlen, 1328 u32 *in, size_t inlen,
1299 struct ib_pd *pd) 1329 struct ib_pd *pd,
1330 struct ib_udata *udata,
1331 struct mlx5_ib_create_qp_resp *resp)
1300{ 1332{
1301 struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; 1333 struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
1302 struct mlx5_ib_sq *sq = &raw_packet_qp->sq; 1334 struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
@@ -1306,9 +1338,10 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1306 struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext); 1338 struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
1307 int err; 1339 int err;
1308 u32 tdn = mucontext->tdn; 1340 u32 tdn = mucontext->tdn;
1341 u16 uid = to_mpd(pd)->uid;
1309 1342
1310 if (qp->sq.wqe_cnt) { 1343 if (qp->sq.wqe_cnt) {
1311 err = create_raw_packet_qp_tis(dev, qp, sq, tdn); 1344 err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd);
1312 if (err) 1345 if (err)
1313 return err; 1346 return err;
1314 1347
@@ -1316,6 +1349,13 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1316 if (err) 1349 if (err)
1317 goto err_destroy_tis; 1350 goto err_destroy_tis;
1318 1351
1352 if (uid) {
1353 resp->tisn = sq->tisn;
1354 resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TISN;
1355 resp->sqn = sq->base.mqp.qpn;
1356 resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_SQN;
1357 }
1358
1319 sq->base.container_mibqp = qp; 1359 sq->base.container_mibqp = qp;
1320 sq->base.mqp.event = mlx5_ib_qp_event; 1360 sq->base.mqp.event = mlx5_ib_qp_event;
1321 } 1361 }
@@ -1327,22 +1367,32 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1327 rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; 1367 rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
1328 if (qp->flags & MLX5_IB_QP_PCI_WRITE_END_PADDING) 1368 if (qp->flags & MLX5_IB_QP_PCI_WRITE_END_PADDING)
1329 rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; 1369 rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING;
1330 err = create_raw_packet_qp_rq(dev, rq, in, inlen); 1370 err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd);
1331 if (err) 1371 if (err)
1332 goto err_destroy_sq; 1372 goto err_destroy_sq;
1333 1373
1334 1374 err = create_raw_packet_qp_tir(dev, rq, tdn, &qp->flags_en, pd);
1335 err = create_raw_packet_qp_tir(dev, rq, tdn,
1336 qp->tunnel_offload_en);
1337 if (err) 1375 if (err)
1338 goto err_destroy_rq; 1376 goto err_destroy_rq;
1377
1378 if (uid) {
1379 resp->rqn = rq->base.mqp.qpn;
1380 resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN;
1381 resp->tirn = rq->tirn;
1382 resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
1383 }
1339 } 1384 }
1340 1385
1341 qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn : 1386 qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
1342 rq->base.mqp.qpn; 1387 rq->base.mqp.qpn;
1388 err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp)));
1389 if (err)
1390 goto err_destroy_tir;
1343 1391
1344 return 0; 1392 return 0;
1345 1393
1394err_destroy_tir:
1395 destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, pd);
1346err_destroy_rq: 1396err_destroy_rq:
1347 destroy_raw_packet_qp_rq(dev, rq); 1397 destroy_raw_packet_qp_rq(dev, rq);
1348err_destroy_sq: 1398err_destroy_sq:
@@ -1350,7 +1400,7 @@ err_destroy_sq:
1350 return err; 1400 return err;
1351 destroy_raw_packet_qp_sq(dev, sq); 1401 destroy_raw_packet_qp_sq(dev, sq);
1352err_destroy_tis: 1402err_destroy_tis:
1353 destroy_raw_packet_qp_tis(dev, sq); 1403 destroy_raw_packet_qp_tis(dev, sq, pd);
1354 1404
1355 return err; 1405 return err;
1356} 1406}
@@ -1363,13 +1413,13 @@ static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev,
1363 struct mlx5_ib_rq *rq = &raw_packet_qp->rq; 1413 struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
1364 1414
1365 if (qp->rq.wqe_cnt) { 1415 if (qp->rq.wqe_cnt) {
1366 destroy_raw_packet_qp_tir(dev, rq); 1416 destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, qp->ibqp.pd);
1367 destroy_raw_packet_qp_rq(dev, rq); 1417 destroy_raw_packet_qp_rq(dev, rq);
1368 } 1418 }
1369 1419
1370 if (qp->sq.wqe_cnt) { 1420 if (qp->sq.wqe_cnt) {
1371 destroy_raw_packet_qp_sq(dev, sq); 1421 destroy_raw_packet_qp_sq(dev, sq);
1372 destroy_raw_packet_qp_tis(dev, sq); 1422 destroy_raw_packet_qp_tis(dev, sq, qp->ibqp.pd);
1373 } 1423 }
1374} 1424}
1375 1425
@@ -1387,7 +1437,11 @@ static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
1387 1437
1388static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) 1438static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
1389{ 1439{
1390 mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn); 1440 if (qp->flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
1441 MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC))
1442 mlx5_ib_disable_lb(dev, false, true);
1443 mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn,
1444 to_mpd(qp->ibqp.pd)->uid);
1391} 1445}
1392 1446
1393static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, 1447static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
@@ -1410,6 +1464,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1410 u32 tdn = mucontext->tdn; 1464 u32 tdn = mucontext->tdn;
1411 struct mlx5_ib_create_qp_rss ucmd = {}; 1465 struct mlx5_ib_create_qp_rss ucmd = {};
1412 size_t required_cmd_sz; 1466 size_t required_cmd_sz;
1467 u8 lb_flag = 0;
1413 1468
1414 if (init_attr->qp_type != IB_QPT_RAW_PACKET) 1469 if (init_attr->qp_type != IB_QPT_RAW_PACKET)
1415 return -EOPNOTSUPP; 1470 return -EOPNOTSUPP;
@@ -1444,7 +1499,9 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1444 return -EOPNOTSUPP; 1499 return -EOPNOTSUPP;
1445 } 1500 }
1446 1501
1447 if (ucmd.flags & ~MLX5_QP_FLAG_TUNNEL_OFFLOADS) { 1502 if (ucmd.flags & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS |
1503 MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
1504 MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)) {
1448 mlx5_ib_dbg(dev, "invalid flags\n"); 1505 mlx5_ib_dbg(dev, "invalid flags\n");
1449 return -EOPNOTSUPP; 1506 return -EOPNOTSUPP;
1450 } 1507 }
@@ -1461,6 +1518,16 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1461 return -EOPNOTSUPP; 1518 return -EOPNOTSUPP;
1462 } 1519 }
1463 1520
1521 if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC || dev->rep) {
1522 lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
1523 qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
1524 }
1525
1526 if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) {
1527 lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
1528 qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
1529 }
1530
1464 err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); 1531 err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
1465 if (err) { 1532 if (err) {
1466 mlx5_ib_dbg(dev, "copy failed\n"); 1533 mlx5_ib_dbg(dev, "copy failed\n");
@@ -1472,6 +1539,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1472 if (!in) 1539 if (!in)
1473 return -ENOMEM; 1540 return -ENOMEM;
1474 1541
1542 MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid);
1475 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); 1543 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1476 MLX5_SET(tirc, tirc, disp_type, 1544 MLX5_SET(tirc, tirc, disp_type,
1477 MLX5_TIRC_DISP_TYPE_INDIRECT); 1545 MLX5_TIRC_DISP_TYPE_INDIRECT);
@@ -1484,6 +1552,8 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1484 if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) 1552 if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
1485 MLX5_SET(tirc, tirc, tunneled_offload_en, 1); 1553 MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
1486 1554
1555 MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
1556
1487 if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER) 1557 if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER)
1488 hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner); 1558 hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner);
1489 else 1559 else
@@ -1580,26 +1650,141 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
1580 MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields); 1650 MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
1581 1651
1582create_tir: 1652create_tir:
1583 if (dev->rep)
1584 MLX5_SET(tirc, tirc, self_lb_block,
1585 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
1586
1587 err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn); 1653 err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
1588 1654
1655 if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
1656 err = mlx5_ib_enable_lb(dev, false, true);
1657
1658 if (err)
1659 mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn,
1660 to_mpd(pd)->uid);
1661 }
1662
1589 if (err) 1663 if (err)
1590 goto err; 1664 goto err;
1591 1665
1666 if (mucontext->devx_uid) {
1667 resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
1668 resp.tirn = qp->rss_qp.tirn;
1669 }
1670
1671 err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
1672 if (err)
1673 goto err_copy;
1674
1592 kvfree(in); 1675 kvfree(in);
1593 /* qpn is reserved for that QP */ 1676 /* qpn is reserved for that QP */
1594 qp->trans_qp.base.mqp.qpn = 0; 1677 qp->trans_qp.base.mqp.qpn = 0;
1595 qp->flags |= MLX5_IB_QP_RSS; 1678 qp->flags |= MLX5_IB_QP_RSS;
1596 return 0; 1679 return 0;
1597 1680
1681err_copy:
1682 mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn, mucontext->devx_uid);
1598err: 1683err:
1599 kvfree(in); 1684 kvfree(in);
1600 return err; 1685 return err;
1601} 1686}
1602 1687
1688static void configure_responder_scat_cqe(struct ib_qp_init_attr *init_attr,
1689 void *qpc)
1690{
1691 int rcqe_sz;
1692
1693 if (init_attr->qp_type == MLX5_IB_QPT_DCI)
1694 return;
1695
1696 rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq);
1697
1698 if (rcqe_sz == 128) {
1699 MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
1700 return;
1701 }
1702
1703 if (init_attr->qp_type != MLX5_IB_QPT_DCT)
1704 MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE);
1705}
1706
1707static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
1708 struct ib_qp_init_attr *init_attr,
1709 struct mlx5_ib_create_qp *ucmd,
1710 void *qpc)
1711{
1712 enum ib_qp_type qpt = init_attr->qp_type;
1713 int scqe_sz;
1714 bool allow_scat_cqe = 0;
1715
1716 if (qpt == IB_QPT_UC || qpt == IB_QPT_UD)
1717 return;
1718
1719 if (ucmd)
1720 allow_scat_cqe = ucmd->flags & MLX5_QP_FLAG_ALLOW_SCATTER_CQE;
1721
1722 if (!allow_scat_cqe && init_attr->sq_sig_type != IB_SIGNAL_ALL_WR)
1723 return;
1724
1725 scqe_sz = mlx5_ib_get_cqe_size(init_attr->send_cq);
1726 if (scqe_sz == 128) {
1727 MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE);
1728 return;
1729 }
1730
1731 if (init_attr->qp_type != MLX5_IB_QPT_DCI ||
1732 MLX5_CAP_GEN(dev->mdev, dc_req_scat_data_cqe))
1733 MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE);
1734}
1735
1736static int atomic_size_to_mode(int size_mask)
1737{
1738 /* driver does not support atomic_size > 256B
1739 * and does not know how to translate bigger sizes
1740 */
1741 int supported_size_mask = size_mask & 0x1ff;
1742 int log_max_size;
1743
1744 if (!supported_size_mask)
1745 return -EOPNOTSUPP;
1746
1747 log_max_size = __fls(supported_size_mask);
1748
1749 if (log_max_size > 3)
1750 return log_max_size;
1751
1752 return MLX5_ATOMIC_MODE_8B;
1753}
1754
1755static int get_atomic_mode(struct mlx5_ib_dev *dev,
1756 enum ib_qp_type qp_type)
1757{
1758 u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
1759 u8 atomic = MLX5_CAP_GEN(dev->mdev, atomic);
1760 int atomic_mode = -EOPNOTSUPP;
1761 int atomic_size_mask;
1762
1763 if (!atomic)
1764 return -EOPNOTSUPP;
1765
1766 if (qp_type == MLX5_IB_QPT_DCT)
1767 atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
1768 else
1769 atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
1770
1771 if ((atomic_operations & MLX5_ATOMIC_OPS_EXTENDED_CMP_SWAP) ||
1772 (atomic_operations & MLX5_ATOMIC_OPS_EXTENDED_FETCH_ADD))
1773 atomic_mode = atomic_size_to_mode(atomic_size_mask);
1774
1775 if (atomic_mode <= 0 &&
1776 (atomic_operations & MLX5_ATOMIC_OPS_CMP_SWAP &&
1777 atomic_operations & MLX5_ATOMIC_OPS_FETCH_ADD))
1778 atomic_mode = MLX5_ATOMIC_MODE_IB_COMP;
1779
1780 return atomic_mode;
1781}
1782
1783static inline bool check_flags_mask(uint64_t input, uint64_t supported)
1784{
1785 return (input & ~supported) == 0;
1786}
1787
1603static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, 1788static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1604 struct ib_qp_init_attr *init_attr, 1789 struct ib_qp_init_attr *init_attr,
1605 struct ib_udata *udata, struct mlx5_ib_qp *qp) 1790 struct ib_udata *udata, struct mlx5_ib_qp *qp)
@@ -1697,20 +1882,47 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1697 return -EFAULT; 1882 return -EFAULT;
1698 } 1883 }
1699 1884
1885 if (!check_flags_mask(ucmd.flags,
1886 MLX5_QP_FLAG_SIGNATURE |
1887 MLX5_QP_FLAG_SCATTER_CQE |
1888 MLX5_QP_FLAG_TUNNEL_OFFLOADS |
1889 MLX5_QP_FLAG_BFREG_INDEX |
1890 MLX5_QP_FLAG_TYPE_DCT |
1891 MLX5_QP_FLAG_TYPE_DCI |
1892 MLX5_QP_FLAG_ALLOW_SCATTER_CQE))
1893 return -EINVAL;
1894
1700 err = get_qp_user_index(to_mucontext(pd->uobject->context), 1895 err = get_qp_user_index(to_mucontext(pd->uobject->context),
1701 &ucmd, udata->inlen, &uidx); 1896 &ucmd, udata->inlen, &uidx);
1702 if (err) 1897 if (err)
1703 return err; 1898 return err;
1704 1899
1705 qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE); 1900 qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
1706 qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); 1901 if (MLX5_CAP_GEN(dev->mdev, sctr_data_cqe))
1902 qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
1707 if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) { 1903 if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) {
1708 if (init_attr->qp_type != IB_QPT_RAW_PACKET || 1904 if (init_attr->qp_type != IB_QPT_RAW_PACKET ||
1709 !tunnel_offload_supported(mdev)) { 1905 !tunnel_offload_supported(mdev)) {
1710 mlx5_ib_dbg(dev, "Tunnel offload isn't supported\n"); 1906 mlx5_ib_dbg(dev, "Tunnel offload isn't supported\n");
1711 return -EOPNOTSUPP; 1907 return -EOPNOTSUPP;
1712 } 1908 }
1713 qp->tunnel_offload_en = true; 1909 qp->flags_en |= MLX5_QP_FLAG_TUNNEL_OFFLOADS;
1910 }
1911
1912 if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC) {
1913 if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
1914 mlx5_ib_dbg(dev, "Self-LB UC isn't supported\n");
1915 return -EOPNOTSUPP;
1916 }
1917 qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
1918 }
1919
1920 if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) {
1921 if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
1922 mlx5_ib_dbg(dev, "Self-LB UM isn't supported\n");
1923 return -EOPNOTSUPP;
1924 }
1925 qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
1714 } 1926 }
1715 1927
1716 if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) { 1928 if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) {
@@ -1811,23 +2023,10 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1811 MLX5_SET(qpc, qpc, cd_slave_receive, 1); 2023 MLX5_SET(qpc, qpc, cd_slave_receive, 1);
1812 2024
1813 if (qp->scat_cqe && is_connected(init_attr->qp_type)) { 2025 if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
1814 int rcqe_sz; 2026 configure_responder_scat_cqe(init_attr, qpc);
1815 int scqe_sz; 2027 configure_requester_scat_cqe(dev, init_attr,
1816 2028 (pd && pd->uobject) ? &ucmd : NULL,
1817 rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq); 2029 qpc);
1818 scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq);
1819
1820 if (rcqe_sz == 128)
1821 MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
1822 else
1823 MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE);
1824
1825 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) {
1826 if (scqe_sz == 128)
1827 MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE);
1828 else
1829 MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE);
1830 }
1831 } 2030 }
1832 2031
1833 if (qp->rq.wqe_cnt) { 2032 if (qp->rq.wqe_cnt) {
@@ -1911,7 +2110,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
1911 qp->flags & MLX5_IB_QP_UNDERLAY) { 2110 qp->flags & MLX5_IB_QP_UNDERLAY) {
1912 qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr; 2111 qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
1913 raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); 2112 raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
1914 err = create_raw_packet_qp(dev, qp, in, inlen, pd); 2113 err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata,
2114 &resp);
1915 } else { 2115 } else {
1916 err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen); 2116 err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
1917 } 2117 }
@@ -2192,6 +2392,7 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
2192 goto err_free; 2392 goto err_free;
2193 } 2393 }
2194 2394
2395 MLX5_SET(create_dct_in, qp->dct.in, uid, to_mpd(pd)->uid);
2195 dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry); 2396 dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
2196 qp->qp_sub_type = MLX5_IB_QPT_DCT; 2397 qp->qp_sub_type = MLX5_IB_QPT_DCT;
2197 MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn); 2398 MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn);
@@ -2200,6 +2401,9 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
2200 MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key); 2401 MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key);
2201 MLX5_SET(dctc, dctc, user_index, uidx); 2402 MLX5_SET(dctc, dctc, user_index, uidx);
2202 2403
2404 if (ucmd->flags & MLX5_QP_FLAG_SCATTER_CQE)
2405 configure_responder_scat_cqe(attr, dctc);
2406
2203 qp->state = IB_QPS_RESET; 2407 qp->state = IB_QPS_RESET;
2204 2408
2205 return &qp->ibqp; 2409 return &qp->ibqp;
@@ -2405,13 +2609,15 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp)
2405 return 0; 2609 return 0;
2406} 2610}
2407 2611
2408static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr, 2612static int to_mlx5_access_flags(struct mlx5_ib_qp *qp,
2409 int attr_mask) 2613 const struct ib_qp_attr *attr,
2614 int attr_mask, __be32 *hw_access_flags)
2410{ 2615{
2411 u32 hw_access_flags = 0;
2412 u8 dest_rd_atomic; 2616 u8 dest_rd_atomic;
2413 u32 access_flags; 2617 u32 access_flags;
2414 2618
2619 struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
2620
2415 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) 2621 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
2416 dest_rd_atomic = attr->max_dest_rd_atomic; 2622 dest_rd_atomic = attr->max_dest_rd_atomic;
2417 else 2623 else
@@ -2426,13 +2632,25 @@ static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_att
2426 access_flags &= IB_ACCESS_REMOTE_WRITE; 2632 access_flags &= IB_ACCESS_REMOTE_WRITE;
2427 2633
2428 if (access_flags & IB_ACCESS_REMOTE_READ) 2634 if (access_flags & IB_ACCESS_REMOTE_READ)
2429 hw_access_flags |= MLX5_QP_BIT_RRE; 2635 *hw_access_flags |= MLX5_QP_BIT_RRE;
2430 if (access_flags & IB_ACCESS_REMOTE_ATOMIC) 2636 if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
2431 hw_access_flags |= (MLX5_QP_BIT_RAE | MLX5_ATOMIC_MODE_CX); 2637 qp->ibqp.qp_type == IB_QPT_RC) {
2638 int atomic_mode;
2639
2640 atomic_mode = get_atomic_mode(dev, qp->ibqp.qp_type);
2641 if (atomic_mode < 0)
2642 return -EOPNOTSUPP;
2643
2644 *hw_access_flags |= MLX5_QP_BIT_RAE;
2645 *hw_access_flags |= atomic_mode << MLX5_ATOMIC_MODE_OFFSET;
2646 }
2647
2432 if (access_flags & IB_ACCESS_REMOTE_WRITE) 2648 if (access_flags & IB_ACCESS_REMOTE_WRITE)
2433 hw_access_flags |= MLX5_QP_BIT_RWE; 2649 *hw_access_flags |= MLX5_QP_BIT_RWE;
2650
2651 *hw_access_flags = cpu_to_be32(*hw_access_flags);
2434 2652
2435 return cpu_to_be32(hw_access_flags); 2653 return 0;
2436} 2654}
2437 2655
2438enum { 2656enum {
@@ -2458,7 +2676,8 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
2458} 2676}
2459 2677
2460static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, 2678static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
2461 struct mlx5_ib_sq *sq, u8 sl) 2679 struct mlx5_ib_sq *sq, u8 sl,
2680 struct ib_pd *pd)
2462{ 2681{
2463 void *in; 2682 void *in;
2464 void *tisc; 2683 void *tisc;
@@ -2471,6 +2690,7 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
2471 return -ENOMEM; 2690 return -ENOMEM;
2472 2691
2473 MLX5_SET(modify_tis_in, in, bitmask.prio, 1); 2692 MLX5_SET(modify_tis_in, in, bitmask.prio, 1);
2693 MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid);
2474 2694
2475 tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx); 2695 tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
2476 MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1)); 2696 MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
@@ -2483,7 +2703,8 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
2483} 2703}
2484 2704
2485static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev, 2705static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
2486 struct mlx5_ib_sq *sq, u8 tx_affinity) 2706 struct mlx5_ib_sq *sq, u8 tx_affinity,
2707 struct ib_pd *pd)
2487{ 2708{
2488 void *in; 2709 void *in;
2489 void *tisc; 2710 void *tisc;
@@ -2496,6 +2717,7 @@ static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
2496 return -ENOMEM; 2717 return -ENOMEM;
2497 2718
2498 MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1); 2719 MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1);
2720 MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid);
2499 2721
2500 tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx); 2722 tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
2501 MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity); 2723 MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity);
@@ -2580,7 +2802,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
2580 if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt) 2802 if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
2581 return modify_raw_packet_eth_prio(dev->mdev, 2803 return modify_raw_packet_eth_prio(dev->mdev,
2582 &qp->raw_packet_qp.sq, 2804 &qp->raw_packet_qp.sq,
2583 sl & 0xf); 2805 sl & 0xf, qp->ibqp.pd);
2584 2806
2585 return 0; 2807 return 0;
2586} 2808}
@@ -2728,9 +2950,9 @@ static int ib_mask_to_mlx5_opt(int ib_mask)
2728 return result; 2950 return result;
2729} 2951}
2730 2952
2731static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev, 2953static int modify_raw_packet_qp_rq(
2732 struct mlx5_ib_rq *rq, int new_state, 2954 struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, int new_state,
2733 const struct mlx5_modify_raw_qp_param *raw_qp_param) 2955 const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd)
2734{ 2956{
2735 void *in; 2957 void *in;
2736 void *rqc; 2958 void *rqc;
@@ -2743,6 +2965,7 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
2743 return -ENOMEM; 2965 return -ENOMEM;
2744 2966
2745 MLX5_SET(modify_rq_in, in, rq_state, rq->state); 2967 MLX5_SET(modify_rq_in, in, rq_state, rq->state);
2968 MLX5_SET(modify_rq_in, in, uid, to_mpd(pd)->uid);
2746 2969
2747 rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); 2970 rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
2748 MLX5_SET(rqc, rqc, state, new_state); 2971 MLX5_SET(rqc, rqc, state, new_state);
@@ -2753,8 +2976,9 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
2753 MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID); 2976 MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
2754 MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id); 2977 MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id);
2755 } else 2978 } else
2756 pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n", 2979 dev_info_once(
2757 dev->ib_dev.name); 2980 &dev->ib_dev.dev,
2981 "RAW PACKET QP counters are not supported on current FW\n");
2758 } 2982 }
2759 2983
2760 err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in, inlen); 2984 err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in, inlen);
@@ -2768,10 +2992,9 @@ out:
2768 return err; 2992 return err;
2769} 2993}
2770 2994
2771static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, 2995static int modify_raw_packet_qp_sq(
2772 struct mlx5_ib_sq *sq, 2996 struct mlx5_core_dev *dev, struct mlx5_ib_sq *sq, int new_state,
2773 int new_state, 2997 const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd)
2774 const struct mlx5_modify_raw_qp_param *raw_qp_param)
2775{ 2998{
2776 struct mlx5_ib_qp *ibqp = sq->base.container_mibqp; 2999 struct mlx5_ib_qp *ibqp = sq->base.container_mibqp;
2777 struct mlx5_rate_limit old_rl = ibqp->rl; 3000 struct mlx5_rate_limit old_rl = ibqp->rl;
@@ -2788,6 +3011,7 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
2788 if (!in) 3011 if (!in)
2789 return -ENOMEM; 3012 return -ENOMEM;
2790 3013
3014 MLX5_SET(modify_sq_in, in, uid, to_mpd(pd)->uid);
2791 MLX5_SET(modify_sq_in, in, sq_state, sq->state); 3015 MLX5_SET(modify_sq_in, in, sq_state, sq->state);
2792 3016
2793 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); 3017 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
@@ -2890,7 +3114,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
2890 } 3114 }
2891 3115
2892 if (modify_rq) { 3116 if (modify_rq) {
2893 err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param); 3117 err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param,
3118 qp->ibqp.pd);
2894 if (err) 3119 if (err)
2895 return err; 3120 return err;
2896 } 3121 }
@@ -2898,17 +3123,50 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
2898 if (modify_sq) { 3123 if (modify_sq) {
2899 if (tx_affinity) { 3124 if (tx_affinity) {
2900 err = modify_raw_packet_tx_affinity(dev->mdev, sq, 3125 err = modify_raw_packet_tx_affinity(dev->mdev, sq,
2901 tx_affinity); 3126 tx_affinity,
3127 qp->ibqp.pd);
2902 if (err) 3128 if (err)
2903 return err; 3129 return err;
2904 } 3130 }
2905 3131
2906 return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, raw_qp_param); 3132 return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state,
3133 raw_qp_param, qp->ibqp.pd);
2907 } 3134 }
2908 3135
2909 return 0; 3136 return 0;
2910} 3137}
2911 3138
3139static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev,
3140 struct mlx5_ib_pd *pd,
3141 struct mlx5_ib_qp_base *qp_base,
3142 u8 port_num)
3143{
3144 struct mlx5_ib_ucontext *ucontext = NULL;
3145 unsigned int tx_port_affinity;
3146
3147 if (pd && pd->ibpd.uobject && pd->ibpd.uobject->context)
3148 ucontext = to_mucontext(pd->ibpd.uobject->context);
3149
3150 if (ucontext) {
3151 tx_port_affinity = (unsigned int)atomic_add_return(
3152 1, &ucontext->tx_port_affinity) %
3153 MLX5_MAX_PORTS +
3154 1;
3155 mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x ucontext %p\n",
3156 tx_port_affinity, qp_base->mqp.qpn, ucontext);
3157 } else {
3158 tx_port_affinity =
3159 (unsigned int)atomic_add_return(
3160 1, &dev->roce[port_num].tx_port_affinity) %
3161 MLX5_MAX_PORTS +
3162 1;
3163 mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n",
3164 tx_port_affinity, qp_base->mqp.qpn);
3165 }
3166
3167 return tx_port_affinity;
3168}
3169
2912static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, 3170static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
2913 const struct ib_qp_attr *attr, int attr_mask, 3171 const struct ib_qp_attr *attr, int attr_mask,
2914 enum ib_qp_state cur_state, enum ib_qp_state new_state, 3172 enum ib_qp_state cur_state, enum ib_qp_state new_state,
@@ -2974,6 +3232,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
2974 if (!context) 3232 if (!context)
2975 return -ENOMEM; 3233 return -ENOMEM;
2976 3234
3235 pd = get_pd(qp);
2977 context->flags = cpu_to_be32(mlx5_st << 16); 3236 context->flags = cpu_to_be32(mlx5_st << 16);
2978 3237
2979 if (!(attr_mask & IB_QP_PATH_MIG_STATE)) { 3238 if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
@@ -3002,9 +3261,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
3002 (ibqp->qp_type == IB_QPT_XRC_TGT)) { 3261 (ibqp->qp_type == IB_QPT_XRC_TGT)) {
3003 if (mlx5_lag_is_active(dev->mdev)) { 3262 if (mlx5_lag_is_active(dev->mdev)) {
3004 u8 p = mlx5_core_native_port_num(dev->mdev); 3263 u8 p = mlx5_core_native_port_num(dev->mdev);
3005 tx_affinity = (unsigned int)atomic_add_return(1, 3264 tx_affinity = get_tx_affinity(dev, pd, base, p);
3006 &dev->roce[p].next_port) %
3007 MLX5_MAX_PORTS + 1;
3008 context->flags |= cpu_to_be32(tx_affinity << 24); 3265 context->flags |= cpu_to_be32(tx_affinity << 24);
3009 } 3266 }
3010 } 3267 }
@@ -3062,7 +3319,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
3062 goto out; 3319 goto out;
3063 } 3320 }
3064 3321
3065 pd = get_pd(qp);
3066 get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq, 3322 get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
3067 &send_cq, &recv_cq); 3323 &send_cq, &recv_cq);
3068 3324
@@ -3092,8 +3348,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
3092 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); 3348 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
3093 } 3349 }
3094 3350
3095 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) 3351 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
3096 context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask); 3352 __be32 access_flags = 0;
3353
3354 err = to_mlx5_access_flags(qp, attr, attr_mask, &access_flags);
3355 if (err)
3356 goto out;
3357
3358 context->params2 |= access_flags;
3359 }
3097 3360
3098 if (attr_mask & IB_QP_MIN_RNR_TIMER) 3361 if (attr_mask & IB_QP_MIN_RNR_TIMER)
3099 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); 3362 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
@@ -3243,7 +3506,9 @@ static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new
3243 int req = IB_QP_STATE; 3506 int req = IB_QP_STATE;
3244 int opt = 0; 3507 int opt = 0;
3245 3508
3246 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { 3509 if (new_state == IB_QPS_RESET) {
3510 return is_valid_mask(attr_mask, req, opt);
3511 } else if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
3247 req |= IB_QP_PKEY_INDEX | IB_QP_PORT; 3512 req |= IB_QP_PKEY_INDEX | IB_QP_PORT;
3248 return is_valid_mask(attr_mask, req, opt); 3513 return is_valid_mask(attr_mask, req, opt);
3249 } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { 3514 } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
@@ -3307,10 +3572,14 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
3307 if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) 3572 if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
3308 MLX5_SET(dctc, dctc, rwe, 1); 3573 MLX5_SET(dctc, dctc, rwe, 1);
3309 if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) { 3574 if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) {
3310 if (!mlx5_ib_dc_atomic_is_supported(dev)) 3575 int atomic_mode;
3576
3577 atomic_mode = get_atomic_mode(dev, MLX5_IB_QPT_DCT);
3578 if (atomic_mode < 0)
3311 return -EOPNOTSUPP; 3579 return -EOPNOTSUPP;
3580
3581 MLX5_SET(dctc, dctc, atomic_mode, atomic_mode);
3312 MLX5_SET(dctc, dctc, rae, 1); 3582 MLX5_SET(dctc, dctc, rae, 1);
3313 MLX5_SET(dctc, dctc, atomic_mode, MLX5_ATOMIC_MODE_DCT_CX);
3314 } 3583 }
3315 MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index); 3584 MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index);
3316 MLX5_SET(dctc, dctc, port, attr->port_num); 3585 MLX5_SET(dctc, dctc, port, attr->port_num);
@@ -3367,7 +3636,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
3367 size_t required_cmd_sz; 3636 size_t required_cmd_sz;
3368 int err = -EINVAL; 3637 int err = -EINVAL;
3369 int port; 3638 int port;
3370 enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
3371 3639
3372 if (ibqp->rwq_ind_tbl) 3640 if (ibqp->rwq_ind_tbl)
3373 return -ENOSYS; 3641 return -ENOSYS;
@@ -3413,7 +3681,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
3413 3681
3414 if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) { 3682 if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
3415 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; 3683 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
3416 ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
3417 } 3684 }
3418 3685
3419 if (qp->flags & MLX5_IB_QP_UNDERLAY) { 3686 if (qp->flags & MLX5_IB_QP_UNDERLAY) {
@@ -3424,7 +3691,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
3424 } 3691 }
3425 } else if (qp_type != MLX5_IB_QPT_REG_UMR && 3692 } else if (qp_type != MLX5_IB_QPT_REG_UMR &&
3426 qp_type != MLX5_IB_QPT_DCI && 3693 qp_type != MLX5_IB_QPT_DCI &&
3427 !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) { 3694 !ib_modify_qp_is_ok(cur_state, new_state, qp_type,
3695 attr_mask)) {
3428 mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n", 3696 mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
3429 cur_state, new_state, ibqp->qp_type, attr_mask); 3697 cur_state, new_state, ibqp->qp_type, attr_mask);
3430 goto out; 3698 goto out;
@@ -4371,6 +4639,12 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
4371 u8 next_fence = 0; 4639 u8 next_fence = 0;
4372 u8 fence; 4640 u8 fence;
4373 4641
4642 if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
4643 !drain)) {
4644 *bad_wr = wr;
4645 return -EIO;
4646 }
4647
4374 if (unlikely(ibqp->qp_type == IB_QPT_GSI)) 4648 if (unlikely(ibqp->qp_type == IB_QPT_GSI))
4375 return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); 4649 return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
4376 4650
@@ -4380,13 +4654,6 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
4380 4654
4381 spin_lock_irqsave(&qp->sq.lock, flags); 4655 spin_lock_irqsave(&qp->sq.lock, flags);
4382 4656
4383 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) {
4384 err = -EIO;
4385 *bad_wr = wr;
4386 nreq = 0;
4387 goto out;
4388 }
4389
4390 for (nreq = 0; wr; nreq++, wr = wr->next) { 4657 for (nreq = 0; wr; nreq++, wr = wr->next) {
4391 if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) { 4658 if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
4392 mlx5_ib_warn(dev, "\n"); 4659 mlx5_ib_warn(dev, "\n");
@@ -4700,18 +4967,17 @@ static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
4700 int ind; 4967 int ind;
4701 int i; 4968 int i;
4702 4969
4970 if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
4971 !drain)) {
4972 *bad_wr = wr;
4973 return -EIO;
4974 }
4975
4703 if (unlikely(ibqp->qp_type == IB_QPT_GSI)) 4976 if (unlikely(ibqp->qp_type == IB_QPT_GSI))
4704 return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr); 4977 return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
4705 4978
4706 spin_lock_irqsave(&qp->rq.lock, flags); 4979 spin_lock_irqsave(&qp->rq.lock, flags);
4707 4980
4708 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) {
4709 err = -EIO;
4710 *bad_wr = wr;
4711 nreq = 0;
4712 goto out;
4713 }
4714
4715 ind = qp->rq.head & (qp->rq.wqe_cnt - 1); 4981 ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
4716 4982
4717 for (nreq = 0; wr; nreq++, wr = wr->next) { 4983 for (nreq = 0; wr; nreq++, wr = wr->next) {
@@ -5175,6 +5441,7 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
5175 struct mlx5_ib_dev *dev = to_mdev(ibdev); 5441 struct mlx5_ib_dev *dev = to_mdev(ibdev);
5176 struct mlx5_ib_xrcd *xrcd; 5442 struct mlx5_ib_xrcd *xrcd;
5177 int err; 5443 int err;
5444 u16 uid;
5178 5445
5179 if (!MLX5_CAP_GEN(dev->mdev, xrc)) 5446 if (!MLX5_CAP_GEN(dev->mdev, xrc))
5180 return ERR_PTR(-ENOSYS); 5447 return ERR_PTR(-ENOSYS);
@@ -5183,12 +5450,14 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
5183 if (!xrcd) 5450 if (!xrcd)
5184 return ERR_PTR(-ENOMEM); 5451 return ERR_PTR(-ENOMEM);
5185 5452
5186 err = mlx5_core_xrcd_alloc(dev->mdev, &xrcd->xrcdn); 5453 uid = context ? to_mucontext(context)->devx_uid : 0;
5454 err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, uid);
5187 if (err) { 5455 if (err) {
5188 kfree(xrcd); 5456 kfree(xrcd);
5189 return ERR_PTR(-ENOMEM); 5457 return ERR_PTR(-ENOMEM);
5190 } 5458 }
5191 5459
5460 xrcd->uid = uid;
5192 return &xrcd->ibxrcd; 5461 return &xrcd->ibxrcd;
5193} 5462}
5194 5463
@@ -5196,9 +5465,10 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
5196{ 5465{
5197 struct mlx5_ib_dev *dev = to_mdev(xrcd->device); 5466 struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
5198 u32 xrcdn = to_mxrcd(xrcd)->xrcdn; 5467 u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
5468 u16 uid = to_mxrcd(xrcd)->uid;
5199 int err; 5469 int err;
5200 5470
5201 err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn); 5471 err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, uid);
5202 if (err) 5472 if (err)
5203 mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn); 5473 mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
5204 5474
@@ -5268,6 +5538,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
5268 if (!in) 5538 if (!in)
5269 return -ENOMEM; 5539 return -ENOMEM;
5270 5540
5541 MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid);
5271 rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); 5542 rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
5272 MLX5_SET(rqc, rqc, mem_rq_type, 5543 MLX5_SET(rqc, rqc, mem_rq_type,
5273 MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE); 5544 MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
@@ -5443,8 +5714,7 @@ static int prepare_user_rq(struct ib_pd *pd,
5443 err = create_user_rq(dev, pd, rwq, &ucmd); 5714 err = create_user_rq(dev, pd, rwq, &ucmd);
5444 if (err) { 5715 if (err) {
5445 mlx5_ib_dbg(dev, "err %d\n", err); 5716 mlx5_ib_dbg(dev, "err %d\n", err);
5446 if (err) 5717 return err;
5447 return err;
5448 } 5718 }
5449 5719
5450 rwq->user_index = ucmd.user_index; 5720 rwq->user_index = ucmd.user_index;
@@ -5573,6 +5843,9 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
5573 for (i = 0; i < sz; i++) 5843 for (i = 0; i < sz; i++)
5574 MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num); 5844 MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num);
5575 5845
5846 rwq_ind_tbl->uid = to_mpd(init_attr->ind_tbl[0]->pd)->uid;
5847 MLX5_SET(create_rqt_in, in, uid, rwq_ind_tbl->uid);
5848
5576 err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn); 5849 err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn);
5577 kvfree(in); 5850 kvfree(in);
5578 5851
@@ -5591,7 +5864,7 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
5591 return &rwq_ind_tbl->ib_rwq_ind_tbl; 5864 return &rwq_ind_tbl->ib_rwq_ind_tbl;
5592 5865
5593err_copy: 5866err_copy:
5594 mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn); 5867 mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
5595err: 5868err:
5596 kfree(rwq_ind_tbl); 5869 kfree(rwq_ind_tbl);
5597 return ERR_PTR(err); 5870 return ERR_PTR(err);
@@ -5602,7 +5875,7 @@ int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
5602 struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl); 5875 struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl);
5603 struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device); 5876 struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device);
5604 5877
5605 mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn); 5878 mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
5606 5879
5607 kfree(rwq_ind_tbl); 5880 kfree(rwq_ind_tbl);
5608 return 0; 5881 return 0;
@@ -5653,6 +5926,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
5653 if (wq_state == IB_WQS_ERR) 5926 if (wq_state == IB_WQS_ERR)
5654 wq_state = MLX5_RQC_STATE_ERR; 5927 wq_state = MLX5_RQC_STATE_ERR;
5655 MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state); 5928 MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
5929 MLX5_SET(modify_rq_in, in, uid, to_mpd(wq->pd)->uid);
5656 MLX5_SET(rqc, rqc, state, wq_state); 5930 MLX5_SET(rqc, rqc, state, wq_state);
5657 5931
5658 if (wq_attr_mask & IB_WQ_FLAGS) { 5932 if (wq_attr_mask & IB_WQ_FLAGS) {
@@ -5684,8 +5958,9 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
5684 MLX5_SET(rqc, rqc, counter_set_id, 5958 MLX5_SET(rqc, rqc, counter_set_id,
5685 dev->port->cnts.set_id); 5959 dev->port->cnts.set_id);
5686 } else 5960 } else
5687 pr_info_once("%s: Receive WQ counters are not supported on current FW\n", 5961 dev_info_once(
5688 dev->ib_dev.name); 5962 &dev->ib_dev.dev,
5963 "Receive WQ counters are not supported on current FW\n");
5689 } 5964 }
5690 5965
5691 err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen); 5966 err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index d359fecf7a5b..d012e7dbcc38 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -144,6 +144,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
144 144
145 in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; 145 in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
146 in->page_offset = offset; 146 in->page_offset = offset;
147 in->uid = to_mpd(pd)->uid;
147 if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && 148 if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
148 in->type != IB_SRQT_BASIC) 149 in->type != IB_SRQT_BASIC)
149 in->user_index = uidx; 150 in->user_index = uidx;
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 093f7755c843..2e5dc0a67cfc 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -58,8 +58,9 @@ static int mthca_update_rate(struct mthca_dev *dev, u8 port_num)
58 58
59 ret = ib_query_port(&dev->ib_dev, port_num, tprops); 59 ret = ib_query_port(&dev->ib_dev, port_num, tprops);
60 if (ret) { 60 if (ret) {
61 printk(KERN_WARNING "ib_query_port failed (%d) for %s port %d\n", 61 dev_warn(&dev->ib_dev.dev,
62 ret, dev->ib_dev.name, port_num); 62 "ib_query_port failed (%d) forport %d\n", ret,
63 port_num);
63 goto out; 64 goto out;
64 } 65 }
65 66
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index f3e80dec1334..92c49bff22bc 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -986,7 +986,8 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
986 goto err_free_dev; 986 goto err_free_dev;
987 } 987 }
988 988
989 if (mthca_cmd_init(mdev)) { 989 err = mthca_cmd_init(mdev);
990 if (err) {
990 mthca_err(mdev, "Failed to init command interface, aborting.\n"); 991 mthca_err(mdev, "Failed to init command interface, aborting.\n");
991 goto err_free_dev; 992 goto err_free_dev;
992 } 993 }
@@ -1014,8 +1015,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
1014 1015
1015 err = mthca_setup_hca(mdev); 1016 err = mthca_setup_hca(mdev);
1016 if (err == -EBUSY && (mdev->mthca_flags & MTHCA_FLAG_MSI_X)) { 1017 if (err == -EBUSY && (mdev->mthca_flags & MTHCA_FLAG_MSI_X)) {
1017 if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) 1018 pci_free_irq_vectors(pdev);
1018 pci_free_irq_vectors(pdev);
1019 mdev->mthca_flags &= ~MTHCA_FLAG_MSI_X; 1019 mdev->mthca_flags &= ~MTHCA_FLAG_MSI_X;
1020 1020
1021 err = mthca_setup_hca(mdev); 1021 err = mthca_setup_hca(mdev);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 0d3473b4596e..691c6f048938 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1076,16 +1076,17 @@ static int mthca_unmap_fmr(struct list_head *fmr_list)
1076 return err; 1076 return err;
1077} 1077}
1078 1078
1079static ssize_t show_rev(struct device *device, struct device_attribute *attr, 1079static ssize_t hw_rev_show(struct device *device,
1080 char *buf) 1080 struct device_attribute *attr, char *buf)
1081{ 1081{
1082 struct mthca_dev *dev = 1082 struct mthca_dev *dev =
1083 container_of(device, struct mthca_dev, ib_dev.dev); 1083 container_of(device, struct mthca_dev, ib_dev.dev);
1084 return sprintf(buf, "%x\n", dev->rev_id); 1084 return sprintf(buf, "%x\n", dev->rev_id);
1085} 1085}
1086static DEVICE_ATTR_RO(hw_rev);
1086 1087
1087static ssize_t show_hca(struct device *device, struct device_attribute *attr, 1088static ssize_t hca_type_show(struct device *device,
1088 char *buf) 1089 struct device_attribute *attr, char *buf)
1089{ 1090{
1090 struct mthca_dev *dev = 1091 struct mthca_dev *dev =
1091 container_of(device, struct mthca_dev, ib_dev.dev); 1092 container_of(device, struct mthca_dev, ib_dev.dev);
@@ -1103,23 +1104,26 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
1103 return sprintf(buf, "unknown\n"); 1104 return sprintf(buf, "unknown\n");
1104 } 1105 }
1105} 1106}
1107static DEVICE_ATTR_RO(hca_type);
1106 1108
1107static ssize_t show_board(struct device *device, struct device_attribute *attr, 1109static ssize_t board_id_show(struct device *device,
1108 char *buf) 1110 struct device_attribute *attr, char *buf)
1109{ 1111{
1110 struct mthca_dev *dev = 1112 struct mthca_dev *dev =
1111 container_of(device, struct mthca_dev, ib_dev.dev); 1113 container_of(device, struct mthca_dev, ib_dev.dev);
1112 return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); 1114 return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id);
1113} 1115}
1116static DEVICE_ATTR_RO(board_id);
1114 1117
1115static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 1118static struct attribute *mthca_dev_attributes[] = {
1116static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 1119 &dev_attr_hw_rev.attr,
1117static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 1120 &dev_attr_hca_type.attr,
1121 &dev_attr_board_id.attr,
1122 NULL
1123};
1118 1124
1119static struct device_attribute *mthca_dev_attributes[] = { 1125static const struct attribute_group mthca_attr_group = {
1120 &dev_attr_hw_rev, 1126 .attrs = mthca_dev_attributes,
1121 &dev_attr_hca_type,
1122 &dev_attr_board_id
1123}; 1127};
1124 1128
1125static int mthca_init_node_data(struct mthca_dev *dev) 1129static int mthca_init_node_data(struct mthca_dev *dev)
@@ -1192,13 +1196,11 @@ static void get_dev_fw_str(struct ib_device *device, char *str)
1192int mthca_register_device(struct mthca_dev *dev) 1196int mthca_register_device(struct mthca_dev *dev)
1193{ 1197{
1194 int ret; 1198 int ret;
1195 int i;
1196 1199
1197 ret = mthca_init_node_data(dev); 1200 ret = mthca_init_node_data(dev);
1198 if (ret) 1201 if (ret)
1199 return ret; 1202 return ret;
1200 1203
1201 strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
1202 dev->ib_dev.owner = THIS_MODULE; 1204 dev->ib_dev.owner = THIS_MODULE;
1203 1205
1204 dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION; 1206 dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION;
@@ -1296,20 +1298,12 @@ int mthca_register_device(struct mthca_dev *dev)
1296 1298
1297 mutex_init(&dev->cap_mask_mutex); 1299 mutex_init(&dev->cap_mask_mutex);
1298 1300
1301 rdma_set_device_sysfs_group(&dev->ib_dev, &mthca_attr_group);
1299 dev->ib_dev.driver_id = RDMA_DRIVER_MTHCA; 1302 dev->ib_dev.driver_id = RDMA_DRIVER_MTHCA;
1300 ret = ib_register_device(&dev->ib_dev, NULL); 1303 ret = ib_register_device(&dev->ib_dev, "mthca%d", NULL);
1301 if (ret) 1304 if (ret)
1302 return ret; 1305 return ret;
1303 1306
1304 for (i = 0; i < ARRAY_SIZE(mthca_dev_attributes); ++i) {
1305 ret = device_create_file(&dev->ib_dev.dev,
1306 mthca_dev_attributes[i]);
1307 if (ret) {
1308 ib_unregister_device(&dev->ib_dev);
1309 return ret;
1310 }
1311 }
1312
1313 mthca_start_catas_poll(dev); 1307 mthca_start_catas_poll(dev);
1314 1308
1315 return 0; 1309 return 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 3d37f2373d63..9d178ee3c96a 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -872,8 +872,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
872 872
873 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 873 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
874 874
875 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, 875 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
876 IB_LINK_LAYER_UNSPECIFIED)) { 876 attr_mask)) {
877 mthca_dbg(dev, "Bad QP transition (transport %d) " 877 mthca_dbg(dev, "Bad QP transition (transport %d) "
878 "%d->%d with attr 0x%08x\n", 878 "%d->%d with attr 0x%08x\n",
879 qp->transport, cur_state, new_state, 879 qp->transport, cur_state, new_state,
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 42b68aa999fc..e00add6d78ec 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -456,9 +456,6 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
456 void __iomem *mmio_regs = NULL; 456 void __iomem *mmio_regs = NULL;
457 u8 hw_rev; 457 u8 hw_rev;
458 458
459 assert(pcidev != NULL);
460 assert(ent != NULL);
461
462 printk(KERN_INFO PFX "NetEffect RNIC driver v%s loading. (%s)\n", 459 printk(KERN_INFO PFX "NetEffect RNIC driver v%s loading. (%s)\n",
463 DRV_VERSION, pci_name(pcidev)); 460 DRV_VERSION, pci_name(pcidev));
464 461
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index bedaa02749fb..a895fe980d10 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -149,18 +149,9 @@ do { \
149 printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args); \ 149 printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args); \
150} while (0) 150} while (0)
151 151
152#define assert(expr) \
153do { \
154 if (!(expr)) { \
155 printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \
156 #expr, __FILE__, __func__, __LINE__); \
157 } \
158} while (0)
159
160#define NES_EVENT_TIMEOUT 1200000 152#define NES_EVENT_TIMEOUT 1200000
161#else 153#else
162#define nes_debug(level, fmt, args...) no_printk(fmt, ##args) 154#define nes_debug(level, fmt, args...) no_printk(fmt, ##args)
163#define assert(expr) do {} while (0)
164 155
165#define NES_EVENT_TIMEOUT 100000 156#define NES_EVENT_TIMEOUT 100000
166#endif 157#endif
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index bd0675d8f298..5517e392bc01 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -1443,7 +1443,7 @@ static int nes_init_2025_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_inde
1443 mdelay(1); 1443 mdelay(1);
1444 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); 1444 nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
1445 temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); 1445 temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
1446 } while ((temp_phy_data2 == temp_phy_data)); 1446 } while (temp_phy_data2 == temp_phy_data);
1447 1447
1448 /* wait for tracking */ 1448 /* wait for tracking */
1449 counter = 0; 1449 counter = 0;
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 61014e251555..16f33454c198 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -146,8 +146,6 @@ static int nes_netdev_open(struct net_device *netdev)
146 struct list_head *list_pos, *list_temp; 146 struct list_head *list_pos, *list_temp;
147 unsigned long flags; 147 unsigned long flags;
148 148
149 assert(nesdev != NULL);
150
151 if (nesvnic->netdev_open == 1) 149 if (nesvnic->netdev_open == 1)
152 return 0; 150 return 0;
153 151
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 6940c7215961..92d1cadd4cfd 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -687,7 +687,7 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev,
687 } 687 }
688 688
689 nes_debug(NES_DBG_PD, "Allocating PD (%p) for ib device %s\n", 689 nes_debug(NES_DBG_PD, "Allocating PD (%p) for ib device %s\n",
690 nespd, nesvnic->nesibdev->ibdev.name); 690 nespd, dev_name(&nesvnic->nesibdev->ibdev.dev));
691 691
692 nespd->pd_id = (pd_num << (PAGE_SHIFT-12)) + nesadapter->base_pd; 692 nespd->pd_id = (pd_num << (PAGE_SHIFT-12)) + nesadapter->base_pd;
693 693
@@ -2556,8 +2556,8 @@ static int nes_dereg_mr(struct ib_mr *ib_mr)
2556/** 2556/**
2557 * show_rev 2557 * show_rev
2558 */ 2558 */
2559static ssize_t show_rev(struct device *dev, struct device_attribute *attr, 2559static ssize_t hw_rev_show(struct device *dev,
2560 char *buf) 2560 struct device_attribute *attr, char *buf)
2561{ 2561{
2562 struct nes_ib_device *nesibdev = 2562 struct nes_ib_device *nesibdev =
2563 container_of(dev, struct nes_ib_device, ibdev.dev); 2563 container_of(dev, struct nes_ib_device, ibdev.dev);
@@ -2566,40 +2566,40 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
2566 nes_debug(NES_DBG_INIT, "\n"); 2566 nes_debug(NES_DBG_INIT, "\n");
2567 return sprintf(buf, "%x\n", nesvnic->nesdev->nesadapter->hw_rev); 2567 return sprintf(buf, "%x\n", nesvnic->nesdev->nesadapter->hw_rev);
2568} 2568}
2569 2569static DEVICE_ATTR_RO(hw_rev);
2570 2570
2571/** 2571/**
2572 * show_hca 2572 * show_hca
2573 */ 2573 */
2574static ssize_t show_hca(struct device *dev, struct device_attribute *attr, 2574static ssize_t hca_type_show(struct device *dev,
2575 char *buf) 2575 struct device_attribute *attr, char *buf)
2576{ 2576{
2577 nes_debug(NES_DBG_INIT, "\n"); 2577 nes_debug(NES_DBG_INIT, "\n");
2578 return sprintf(buf, "NES020\n"); 2578 return sprintf(buf, "NES020\n");
2579} 2579}
2580 2580static DEVICE_ATTR_RO(hca_type);
2581 2581
2582/** 2582/**
2583 * show_board 2583 * show_board
2584 */ 2584 */
2585static ssize_t show_board(struct device *dev, struct device_attribute *attr, 2585static ssize_t board_id_show(struct device *dev,
2586 char *buf) 2586 struct device_attribute *attr, char *buf)
2587{ 2587{
2588 nes_debug(NES_DBG_INIT, "\n"); 2588 nes_debug(NES_DBG_INIT, "\n");
2589 return sprintf(buf, "%.*s\n", 32, "NES020 Board ID"); 2589 return sprintf(buf, "%.*s\n", 32, "NES020 Board ID");
2590} 2590}
2591static DEVICE_ATTR_RO(board_id);
2591 2592
2592 2593static struct attribute *nes_dev_attributes[] = {
2593static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 2594 &dev_attr_hw_rev.attr,
2594static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 2595 &dev_attr_hca_type.attr,
2595static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 2596 &dev_attr_board_id.attr,
2596 2597 NULL
2597static struct device_attribute *nes_dev_attributes[] = {
2598 &dev_attr_hw_rev,
2599 &dev_attr_hca_type,
2600 &dev_attr_board_id
2601}; 2598};
2602 2599
2600static const struct attribute_group nes_attr_group = {
2601 .attrs = nes_dev_attributes,
2602};
2603 2603
2604/** 2604/**
2605 * nes_query_qp 2605 * nes_query_qp
@@ -3640,7 +3640,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
3640 if (nesibdev == NULL) { 3640 if (nesibdev == NULL) {
3641 return NULL; 3641 return NULL;
3642 } 3642 }
3643 strlcpy(nesibdev->ibdev.name, "nes%d", IB_DEVICE_NAME_MAX);
3644 nesibdev->ibdev.owner = THIS_MODULE; 3643 nesibdev->ibdev.owner = THIS_MODULE;
3645 3644
3646 nesibdev->ibdev.node_type = RDMA_NODE_RNIC; 3645 nesibdev->ibdev.node_type = RDMA_NODE_RNIC;
@@ -3795,10 +3794,11 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev)
3795 struct nes_vnic *nesvnic = nesibdev->nesvnic; 3794 struct nes_vnic *nesvnic = nesibdev->nesvnic;
3796 struct nes_device *nesdev = nesvnic->nesdev; 3795 struct nes_device *nesdev = nesvnic->nesdev;
3797 struct nes_adapter *nesadapter = nesdev->nesadapter; 3796 struct nes_adapter *nesadapter = nesdev->nesadapter;
3798 int i, ret; 3797 int ret;
3799 3798
3799 rdma_set_device_sysfs_group(&nesvnic->nesibdev->ibdev, &nes_attr_group);
3800 nesvnic->nesibdev->ibdev.driver_id = RDMA_DRIVER_NES; 3800 nesvnic->nesibdev->ibdev.driver_id = RDMA_DRIVER_NES;
3801 ret = ib_register_device(&nesvnic->nesibdev->ibdev, NULL); 3801 ret = ib_register_device(&nesvnic->nesibdev->ibdev, "nes%d", NULL);
3802 if (ret) { 3802 if (ret) {
3803 return ret; 3803 return ret;
3804 } 3804 }
@@ -3809,19 +3809,6 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev)
3809 nesibdev->max_qp = (nesadapter->max_qp-NES_FIRST_QPN) / nesadapter->port_count; 3809 nesibdev->max_qp = (nesadapter->max_qp-NES_FIRST_QPN) / nesadapter->port_count;
3810 nesibdev->max_pd = nesadapter->max_pd / nesadapter->port_count; 3810 nesibdev->max_pd = nesadapter->max_pd / nesadapter->port_count;
3811 3811
3812 for (i = 0; i < ARRAY_SIZE(nes_dev_attributes); ++i) {
3813 ret = device_create_file(&nesibdev->ibdev.dev, nes_dev_attributes[i]);
3814 if (ret) {
3815 while (i > 0) {
3816 i--;
3817 device_remove_file(&nesibdev->ibdev.dev,
3818 nes_dev_attributes[i]);
3819 }
3820 ib_unregister_device(&nesibdev->ibdev);
3821 return ret;
3822 }
3823 }
3824
3825 nesvnic->of_device_registered = 1; 3812 nesvnic->of_device_registered = 1;
3826 3813
3827 return 0; 3814 return 0;
@@ -3834,15 +3821,9 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev)
3834static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev) 3821static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev)
3835{ 3822{
3836 struct nes_vnic *nesvnic = nesibdev->nesvnic; 3823 struct nes_vnic *nesvnic = nesibdev->nesvnic;
3837 int i;
3838 3824
3839 for (i = 0; i < ARRAY_SIZE(nes_dev_attributes); ++i) { 3825 if (nesvnic->of_device_registered)
3840 device_remove_file(&nesibdev->ibdev.dev, nes_dev_attributes[i]);
3841 }
3842
3843 if (nesvnic->of_device_registered) {
3844 ib_unregister_device(&nesibdev->ibdev); 3826 ib_unregister_device(&nesibdev->ibdev);
3845 }
3846 3827
3847 nesvnic->of_device_registered = 0; 3828 nesvnic->of_device_registered = 0;
3848} 3829}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index e578281471af..241a57a07485 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -792,7 +792,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
792 qp->srq->ibsrq. 792 qp->srq->ibsrq.
793 srq_context); 793 srq_context);
794 } else if (dev_event) { 794 } else if (dev_event) {
795 pr_err("%s: Fatal event received\n", dev->ibdev.name); 795 dev_err(&dev->ibdev.dev, "Fatal event received\n");
796 ib_dispatch_event(&ib_evt); 796 ib_dispatch_event(&ib_evt);
797 } 797 }
798 798
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 7832ee3e0c84..873cc7f6fe61 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -114,9 +114,37 @@ static void get_dev_fw_str(struct ib_device *device, char *str)
114 snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", &dev->attr.fw_ver[0]); 114 snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", &dev->attr.fw_ver[0]);
115} 115}
116 116
117/* OCRDMA sysfs interface */
118static ssize_t hw_rev_show(struct device *device,
119 struct device_attribute *attr, char *buf)
120{
121 struct ocrdma_dev *dev = dev_get_drvdata(device);
122
123 return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
124}
125static DEVICE_ATTR_RO(hw_rev);
126
127static ssize_t hca_type_show(struct device *device,
128 struct device_attribute *attr, char *buf)
129{
130 struct ocrdma_dev *dev = dev_get_drvdata(device);
131
132 return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]);
133}
134static DEVICE_ATTR_RO(hca_type);
135
136static struct attribute *ocrdma_attributes[] = {
137 &dev_attr_hw_rev.attr,
138 &dev_attr_hca_type.attr,
139 NULL
140};
141
142static const struct attribute_group ocrdma_attr_group = {
143 .attrs = ocrdma_attributes,
144};
145
117static int ocrdma_register_device(struct ocrdma_dev *dev) 146static int ocrdma_register_device(struct ocrdma_dev *dev)
118{ 147{
119 strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX);
120 ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid); 148 ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid);
121 BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); 149 BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
122 memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, 150 memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC,
@@ -213,8 +241,9 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
213 dev->ibdev.destroy_srq = ocrdma_destroy_srq; 241 dev->ibdev.destroy_srq = ocrdma_destroy_srq;
214 dev->ibdev.post_srq_recv = ocrdma_post_srq_recv; 242 dev->ibdev.post_srq_recv = ocrdma_post_srq_recv;
215 } 243 }
244 rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group);
216 dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA; 245 dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA;
217 return ib_register_device(&dev->ibdev, NULL); 246 return ib_register_device(&dev->ibdev, "ocrdma%d", NULL);
218} 247}
219 248
220static int ocrdma_alloc_resources(struct ocrdma_dev *dev) 249static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
@@ -260,42 +289,9 @@ static void ocrdma_free_resources(struct ocrdma_dev *dev)
260 kfree(dev->cq_tbl); 289 kfree(dev->cq_tbl);
261} 290}
262 291
263/* OCRDMA sysfs interface */
264static ssize_t show_rev(struct device *device, struct device_attribute *attr,
265 char *buf)
266{
267 struct ocrdma_dev *dev = dev_get_drvdata(device);
268
269 return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
270}
271
272static ssize_t show_hca_type(struct device *device,
273 struct device_attribute *attr, char *buf)
274{
275 struct ocrdma_dev *dev = dev_get_drvdata(device);
276
277 return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]);
278}
279
280static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
281static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
282
283static struct device_attribute *ocrdma_attributes[] = {
284 &dev_attr_hw_rev,
285 &dev_attr_hca_type
286};
287
288static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
289{
290 int i;
291
292 for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
293 device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]);
294}
295
296static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) 292static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
297{ 293{
298 int status = 0, i; 294 int status = 0;
299 u8 lstate = 0; 295 u8 lstate = 0;
300 struct ocrdma_dev *dev; 296 struct ocrdma_dev *dev;
301 297
@@ -331,9 +327,6 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
331 if (!status) 327 if (!status)
332 ocrdma_update_link_state(dev, lstate); 328 ocrdma_update_link_state(dev, lstate);
333 329
334 for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
335 if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i]))
336 goto sysfs_err;
337 /* Init stats */ 330 /* Init stats */
338 ocrdma_add_port_stats(dev); 331 ocrdma_add_port_stats(dev);
339 /* Interrupt Moderation */ 332 /* Interrupt Moderation */
@@ -348,8 +341,6 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
348 dev_name(&dev->nic_info.pdev->dev), dev->id); 341 dev_name(&dev->nic_info.pdev->dev), dev->id);
349 return dev; 342 return dev;
350 343
351sysfs_err:
352 ocrdma_remove_sysfiles(dev);
353alloc_err: 344alloc_err:
354 ocrdma_free_resources(dev); 345 ocrdma_free_resources(dev);
355 ocrdma_cleanup_hw(dev); 346 ocrdma_cleanup_hw(dev);
@@ -376,7 +367,6 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
376 * of the registered clients. 367 * of the registered clients.
377 */ 368 */
378 cancel_delayed_work_sync(&dev->eqd_work); 369 cancel_delayed_work_sync(&dev->eqd_work);
379 ocrdma_remove_sysfiles(dev);
380 ib_unregister_device(&dev->ibdev); 370 ib_unregister_device(&dev->ibdev);
381 371
382 ocrdma_rem_port_stats(dev); 372 ocrdma_rem_port_stats(dev);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 24d20a4aa262..290d776edf48 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -764,7 +764,8 @@ void ocrdma_add_port_stats(struct ocrdma_dev *dev)
764 return; 764 return;
765 765
766 /* Create post stats base dir */ 766 /* Create post stats base dir */
767 dev->dir = debugfs_create_dir(dev->ibdev.name, ocrdma_dbgfs_dir); 767 dev->dir =
768 debugfs_create_dir(dev_name(&dev->ibdev.dev), ocrdma_dbgfs_dir);
768 if (!dev->dir) 769 if (!dev->dir)
769 goto err; 770 goto err;
770 771
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index c158ca9fde6d..06d2a7f3304c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -1480,8 +1480,7 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1480 new_qps = old_qps; 1480 new_qps = old_qps;
1481 spin_unlock_irqrestore(&qp->q_lock, flags); 1481 spin_unlock_irqrestore(&qp->q_lock, flags);
1482 1482
1483 if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask, 1483 if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
1484 IB_LINK_LAYER_ETHERNET)) {
1485 pr_err("%s(%d) invalid attribute mask=0x%x specified for\n" 1484 pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
1486 "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n", 1485 "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
1487 __func__, dev->id, attr_mask, qp->id, ibqp->qp_type, 1486 __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index a0af6d424aed..8d6ff9df49fe 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -133,6 +133,33 @@ static int qedr_iw_port_immutable(struct ib_device *ibdev, u8 port_num,
133 return 0; 133 return 0;
134} 134}
135 135
136/* QEDR sysfs interface */
137static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
138 char *buf)
139{
140 struct qedr_dev *dev = dev_get_drvdata(device);
141
142 return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor);
143}
144static DEVICE_ATTR_RO(hw_rev);
145
146static ssize_t hca_type_show(struct device *device,
147 struct device_attribute *attr, char *buf)
148{
149 return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET");
150}
151static DEVICE_ATTR_RO(hca_type);
152
153static struct attribute *qedr_attributes[] = {
154 &dev_attr_hw_rev.attr,
155 &dev_attr_hca_type.attr,
156 NULL
157};
158
159static const struct attribute_group qedr_attr_group = {
160 .attrs = qedr_attributes,
161};
162
136static int qedr_iw_register_device(struct qedr_dev *dev) 163static int qedr_iw_register_device(struct qedr_dev *dev)
137{ 164{
138 dev->ibdev.node_type = RDMA_NODE_RNIC; 165 dev->ibdev.node_type = RDMA_NODE_RNIC;
@@ -170,8 +197,6 @@ static int qedr_register_device(struct qedr_dev *dev)
170{ 197{
171 int rc; 198 int rc;
172 199
173 strlcpy(dev->ibdev.name, "qedr%d", IB_DEVICE_NAME_MAX);
174
175 dev->ibdev.node_guid = dev->attr.node_guid; 200 dev->ibdev.node_guid = dev->attr.node_guid;
176 memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC)); 201 memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC));
177 dev->ibdev.owner = THIS_MODULE; 202 dev->ibdev.owner = THIS_MODULE;
@@ -262,9 +287,9 @@ static int qedr_register_device(struct qedr_dev *dev)
262 287
263 dev->ibdev.get_link_layer = qedr_link_layer; 288 dev->ibdev.get_link_layer = qedr_link_layer;
264 dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str; 289 dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str;
265 290 rdma_set_device_sysfs_group(&dev->ibdev, &qedr_attr_group);
266 dev->ibdev.driver_id = RDMA_DRIVER_QEDR; 291 dev->ibdev.driver_id = RDMA_DRIVER_QEDR;
267 return ib_register_device(&dev->ibdev, NULL); 292 return ib_register_device(&dev->ibdev, "qedr%d", NULL);
268} 293}
269 294
270/* This function allocates fast-path status block memory */ 295/* This function allocates fast-path status block memory */
@@ -404,37 +429,6 @@ err1:
404 return rc; 429 return rc;
405} 430}
406 431
407/* QEDR sysfs interface */
408static ssize_t show_rev(struct device *device, struct device_attribute *attr,
409 char *buf)
410{
411 struct qedr_dev *dev = dev_get_drvdata(device);
412
413 return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor);
414}
415
416static ssize_t show_hca_type(struct device *device,
417 struct device_attribute *attr, char *buf)
418{
419 return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET");
420}
421
422static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
423static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
424
425static struct device_attribute *qedr_attributes[] = {
426 &dev_attr_hw_rev,
427 &dev_attr_hca_type
428};
429
430static void qedr_remove_sysfiles(struct qedr_dev *dev)
431{
432 int i;
433
434 for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++)
435 device_remove_file(&dev->ibdev.dev, qedr_attributes[i]);
436}
437
438static void qedr_pci_set_atomic(struct qedr_dev *dev, struct pci_dev *pdev) 432static void qedr_pci_set_atomic(struct qedr_dev *dev, struct pci_dev *pdev)
439{ 433{
440 int rc = pci_enable_atomic_ops_to_root(pdev, 434 int rc = pci_enable_atomic_ops_to_root(pdev,
@@ -855,7 +849,7 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
855{ 849{
856 struct qed_dev_rdma_info dev_info; 850 struct qed_dev_rdma_info dev_info;
857 struct qedr_dev *dev; 851 struct qedr_dev *dev;
858 int rc = 0, i; 852 int rc = 0;
859 853
860 dev = (struct qedr_dev *)ib_alloc_device(sizeof(*dev)); 854 dev = (struct qedr_dev *)ib_alloc_device(sizeof(*dev));
861 if (!dev) { 855 if (!dev) {
@@ -914,18 +908,12 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
914 goto reg_err; 908 goto reg_err;
915 } 909 }
916 910
917 for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++)
918 if (device_create_file(&dev->ibdev.dev, qedr_attributes[i]))
919 goto sysfs_err;
920
921 if (!test_and_set_bit(QEDR_ENET_STATE_BIT, &dev->enet_state)) 911 if (!test_and_set_bit(QEDR_ENET_STATE_BIT, &dev->enet_state))
922 qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_PORT_ACTIVE); 912 qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_PORT_ACTIVE);
923 913
924 DP_DEBUG(dev, QEDR_MSG_INIT, "qedr driver loaded successfully\n"); 914 DP_DEBUG(dev, QEDR_MSG_INIT, "qedr driver loaded successfully\n");
925 return dev; 915 return dev;
926 916
927sysfs_err:
928 ib_unregister_device(&dev->ibdev);
929reg_err: 917reg_err:
930 qedr_sync_free_irqs(dev); 918 qedr_sync_free_irqs(dev);
931irq_err: 919irq_err:
@@ -944,7 +932,6 @@ static void qedr_remove(struct qedr_dev *dev)
944 /* First unregister with stack to stop all the active traffic 932 /* First unregister with stack to stop all the active traffic
945 * of the registered clients. 933 * of the registered clients.
946 */ 934 */
947 qedr_remove_sysfiles(dev);
948 ib_unregister_device(&dev->ibdev); 935 ib_unregister_device(&dev->ibdev);
949 936
950 qedr_stop_hw(dev); 937 qedr_stop_hw(dev);
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index a2d708dceb8d..53bbe6b4e6e6 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -43,7 +43,7 @@
43#include "qedr_hsi_rdma.h" 43#include "qedr_hsi_rdma.h"
44 44
45#define QEDR_NODE_DESC "QLogic 579xx RoCE HCA" 45#define QEDR_NODE_DESC "QLogic 579xx RoCE HCA"
46#define DP_NAME(dev) ((dev)->ibdev.name) 46#define DP_NAME(_dev) dev_name(&(_dev)->ibdev.dev)
47#define IS_IWARP(_dev) ((_dev)->rdma_type == QED_RDMA_TYPE_IWARP) 47#define IS_IWARP(_dev) ((_dev)->rdma_type == QED_RDMA_TYPE_IWARP)
48#define IS_ROCE(_dev) ((_dev)->rdma_type == QED_RDMA_TYPE_ROCE) 48#define IS_ROCE(_dev) ((_dev)->rdma_type == QED_RDMA_TYPE_ROCE)
49 49
diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
index 85578887421b..e1ac2fd60bb1 100644
--- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
@@ -519,9 +519,9 @@ static inline int qedr_gsi_build_packet(struct qedr_dev *dev,
519 } 519 }
520 520
521 if (ether_addr_equal(udh.eth.smac_h, udh.eth.dmac_h)) 521 if (ether_addr_equal(udh.eth.smac_h, udh.eth.dmac_h))
522 packet->tx_dest = QED_ROCE_LL2_TX_DEST_LB; 522 packet->tx_dest = QED_LL2_TX_DEST_LB;
523 else 523 else
524 packet->tx_dest = QED_ROCE_LL2_TX_DEST_NW; 524 packet->tx_dest = QED_LL2_TX_DEST_NW;
525 525
526 packet->roce_mode = roce_mode; 526 packet->roce_mode = roce_mode;
527 memcpy(packet->header.vaddr, ud_header_buffer, header_size); 527 memcpy(packet->header.vaddr, ud_header_buffer, header_size);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 8cc3df24e04e..82ee4b4a7084 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -1447,7 +1447,6 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
1447 u64 pbl_base_addr, phy_prod_pair_addr; 1447 u64 pbl_base_addr, phy_prod_pair_addr;
1448 struct ib_ucontext *ib_ctx = NULL; 1448 struct ib_ucontext *ib_ctx = NULL;
1449 struct qedr_srq_hwq_info *hw_srq; 1449 struct qedr_srq_hwq_info *hw_srq;
1450 struct qedr_ucontext *ctx = NULL;
1451 u32 page_cnt, page_size; 1450 u32 page_cnt, page_size;
1452 struct qedr_srq *srq; 1451 struct qedr_srq *srq;
1453 int rc = 0; 1452 int rc = 0;
@@ -1473,7 +1472,6 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
1473 1472
1474 if (udata && ibpd->uobject && ibpd->uobject->context) { 1473 if (udata && ibpd->uobject && ibpd->uobject->context) {
1475 ib_ctx = ibpd->uobject->context; 1474 ib_ctx = ibpd->uobject->context;
1476 ctx = get_qedr_ucontext(ib_ctx);
1477 1475
1478 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { 1476 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
1479 DP_ERR(dev, 1477 DP_ERR(dev,
@@ -2240,8 +2238,7 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2240 2238
2241 if (rdma_protocol_roce(&dev->ibdev, 1)) { 2239 if (rdma_protocol_roce(&dev->ibdev, 1)) {
2242 if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state, 2240 if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
2243 ibqp->qp_type, attr_mask, 2241 ibqp->qp_type, attr_mask)) {
2244 IB_LINK_LAYER_ETHERNET)) {
2245 DP_ERR(dev, 2242 DP_ERR(dev,
2246 "modify qp: invalid attribute mask=0x%x specified for\n" 2243 "modify qp: invalid attribute mask=0x%x specified for\n"
2247 "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n", 2244 "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 3461df002f81..83d2349188db 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1390,13 +1390,13 @@ static inline u32 qib_get_hdrqtail(const struct qib_ctxtdata *rcd)
1390 */ 1390 */
1391 1391
1392extern const char ib_qib_version[]; 1392extern const char ib_qib_version[];
1393extern const struct attribute_group qib_attr_group;
1393 1394
1394int qib_device_create(struct qib_devdata *); 1395int qib_device_create(struct qib_devdata *);
1395void qib_device_remove(struct qib_devdata *); 1396void qib_device_remove(struct qib_devdata *);
1396 1397
1397int qib_create_port_files(struct ib_device *ibdev, u8 port_num, 1398int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
1398 struct kobject *kobj); 1399 struct kobject *kobj);
1399int qib_verbs_register_sysfs(struct qib_devdata *);
1400void qib_verbs_unregister_sysfs(struct qib_devdata *); 1400void qib_verbs_unregister_sysfs(struct qib_devdata *);
1401/* Hook for sysfs read of QSFP */ 1401/* Hook for sysfs read of QSFP */
1402extern int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len); 1402extern int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len);
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 344e401915f7..a81905df2d0f 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -378,25 +378,22 @@ void qib_flush_qp_waiters(struct rvt_qp *qp)
378 * qib_check_send_wqe - validate wr/wqe 378 * qib_check_send_wqe - validate wr/wqe
379 * @qp - The qp 379 * @qp - The qp
380 * @wqe - The built wqe 380 * @wqe - The built wqe
381 * @call_send - Determine if the send should be posted or scheduled
381 * 382 *
382 * validate wr/wqe. This is called 383 * Returns 0 on success, -EINVAL on failure
383 * prior to inserting the wqe into
384 * the ring but after the wqe has been
385 * setup.
386 *
387 * Returns 1 to force direct progress, 0 otherwise, -EINVAL on failure
388 */ 384 */
389int qib_check_send_wqe(struct rvt_qp *qp, 385int qib_check_send_wqe(struct rvt_qp *qp,
390 struct rvt_swqe *wqe) 386 struct rvt_swqe *wqe, bool *call_send)
391{ 387{
392 struct rvt_ah *ah; 388 struct rvt_ah *ah;
393 int ret = 0;
394 389
395 switch (qp->ibqp.qp_type) { 390 switch (qp->ibqp.qp_type) {
396 case IB_QPT_RC: 391 case IB_QPT_RC:
397 case IB_QPT_UC: 392 case IB_QPT_UC:
398 if (wqe->length > 0x80000000U) 393 if (wqe->length > 0x80000000U)
399 return -EINVAL; 394 return -EINVAL;
395 if (wqe->length > qp->pmtu)
396 *call_send = false;
400 break; 397 break;
401 case IB_QPT_SMI: 398 case IB_QPT_SMI:
402 case IB_QPT_GSI: 399 case IB_QPT_GSI:
@@ -405,12 +402,12 @@ int qib_check_send_wqe(struct rvt_qp *qp,
405 if (wqe->length > (1 << ah->log_pmtu)) 402 if (wqe->length > (1 << ah->log_pmtu))
406 return -EINVAL; 403 return -EINVAL;
407 /* progress hint */ 404 /* progress hint */
408 ret = 1; 405 *call_send = true;
409 break; 406 break;
410 default: 407 default:
411 break; 408 break;
412 } 409 }
413 return ret; 410 return 0;
414} 411}
415 412
416#ifdef CONFIG_DEBUG_FS 413#ifdef CONFIG_DEBUG_FS
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index f35fdeb14347..6fa002940451 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -254,7 +254,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
254 goto bail; 254 goto bail;
255 } 255 }
256 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 256 wqe = rvt_get_swqe_ptr(qp, qp->s_last);
257 qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ? 257 rvt_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
258 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); 258 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
259 /* will get called again */ 259 /* will get called again */
260 goto done; 260 goto done;
@@ -838,7 +838,7 @@ void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
838 qib_migrate_qp(qp); 838 qib_migrate_qp(qp);
839 qp->s_retry = qp->s_retry_cnt; 839 qp->s_retry = qp->s_retry_cnt;
840 } else if (qp->s_last == qp->s_acked) { 840 } else if (qp->s_last == qp->s_acked) {
841 qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); 841 rvt_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
842 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); 842 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
843 return; 843 return;
844 } else /* XXX need to handle delayed completion */ 844 } else /* XXX need to handle delayed completion */
@@ -1221,7 +1221,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
1221 ibp->rvp.n_other_naks++; 1221 ibp->rvp.n_other_naks++;
1222class_b: 1222class_b:
1223 if (qp->s_last == qp->s_acked) { 1223 if (qp->s_last == qp->s_acked) {
1224 qib_send_complete(qp, wqe, status); 1224 rvt_send_complete(qp, wqe, status);
1225 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); 1225 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1226 } 1226 }
1227 break; 1227 break;
@@ -1425,7 +1425,8 @@ read_middle:
1425 qp->s_rdma_read_len -= pmtu; 1425 qp->s_rdma_read_len -= pmtu;
1426 update_last_psn(qp, psn); 1426 update_last_psn(qp, psn);
1427 spin_unlock_irqrestore(&qp->s_lock, flags); 1427 spin_unlock_irqrestore(&qp->s_lock, flags);
1428 qib_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0); 1428 rvt_copy_sge(qp, &qp->s_rdma_read_sge,
1429 data, pmtu, false, false);
1429 goto bail; 1430 goto bail;
1430 1431
1431 case OP(RDMA_READ_RESPONSE_ONLY): 1432 case OP(RDMA_READ_RESPONSE_ONLY):
@@ -1471,7 +1472,8 @@ read_last:
1471 if (unlikely(tlen != qp->s_rdma_read_len)) 1472 if (unlikely(tlen != qp->s_rdma_read_len))
1472 goto ack_len_err; 1473 goto ack_len_err;
1473 aeth = be32_to_cpu(ohdr->u.aeth); 1474 aeth = be32_to_cpu(ohdr->u.aeth);
1474 qib_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0); 1475 rvt_copy_sge(qp, &qp->s_rdma_read_sge,
1476 data, tlen, false, false);
1475 WARN_ON(qp->s_rdma_read_sge.num_sge); 1477 WARN_ON(qp->s_rdma_read_sge.num_sge);
1476 (void) do_rc_ack(qp, aeth, psn, 1478 (void) do_rc_ack(qp, aeth, psn,
1477 OP(RDMA_READ_RESPONSE_LAST), 0, rcd); 1479 OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
@@ -1490,7 +1492,7 @@ ack_len_err:
1490 status = IB_WC_LOC_LEN_ERR; 1492 status = IB_WC_LOC_LEN_ERR;
1491ack_err: 1493ack_err:
1492 if (qp->s_last == qp->s_acked) { 1494 if (qp->s_last == qp->s_acked) {
1493 qib_send_complete(qp, wqe, status); 1495 rvt_send_complete(qp, wqe, status);
1494 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); 1496 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1495 } 1497 }
1496ack_done: 1498ack_done:
@@ -1844,7 +1846,7 @@ send_middle:
1844 qp->r_rcv_len += pmtu; 1846 qp->r_rcv_len += pmtu;
1845 if (unlikely(qp->r_rcv_len > qp->r_len)) 1847 if (unlikely(qp->r_rcv_len > qp->r_len))
1846 goto nack_inv; 1848 goto nack_inv;
1847 qib_copy_sge(&qp->r_sge, data, pmtu, 1); 1849 rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
1848 break; 1850 break;
1849 1851
1850 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): 1852 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -1890,7 +1892,7 @@ send_last:
1890 wc.byte_len = tlen + qp->r_rcv_len; 1892 wc.byte_len = tlen + qp->r_rcv_len;
1891 if (unlikely(wc.byte_len > qp->r_len)) 1893 if (unlikely(wc.byte_len > qp->r_len))
1892 goto nack_inv; 1894 goto nack_inv;
1893 qib_copy_sge(&qp->r_sge, data, tlen, 1); 1895 rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
1894 rvt_put_ss(&qp->r_sge); 1896 rvt_put_ss(&qp->r_sge);
1895 qp->r_msn++; 1897 qp->r_msn++;
1896 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) 1898 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index f8a7de795beb..1fa21938f310 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -171,307 +171,6 @@ err:
171} 171}
172 172
173/** 173/**
174 * qib_ruc_loopback - handle UC and RC lookback requests
175 * @sqp: the sending QP
176 *
177 * This is called from qib_do_send() to
178 * forward a WQE addressed to the same HCA.
179 * Note that although we are single threaded due to the tasklet, we still
180 * have to protect against post_send(). We don't have to worry about
181 * receive interrupts since this is a connected protocol and all packets
182 * will pass through here.
183 */
184static void qib_ruc_loopback(struct rvt_qp *sqp)
185{
186 struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
187 struct qib_pportdata *ppd = ppd_from_ibp(ibp);
188 struct qib_devdata *dd = ppd->dd;
189 struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
190 struct rvt_qp *qp;
191 struct rvt_swqe *wqe;
192 struct rvt_sge *sge;
193 unsigned long flags;
194 struct ib_wc wc;
195 u64 sdata;
196 atomic64_t *maddr;
197 enum ib_wc_status send_status;
198 int release;
199 int ret;
200
201 rcu_read_lock();
202 /*
203 * Note that we check the responder QP state after
204 * checking the requester's state.
205 */
206 qp = rvt_lookup_qpn(rdi, &ibp->rvp, sqp->remote_qpn);
207 if (!qp)
208 goto done;
209
210 spin_lock_irqsave(&sqp->s_lock, flags);
211
212 /* Return if we are already busy processing a work request. */
213 if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
214 !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
215 goto unlock;
216
217 sqp->s_flags |= RVT_S_BUSY;
218
219again:
220 if (sqp->s_last == READ_ONCE(sqp->s_head))
221 goto clr_busy;
222 wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
223
224 /* Return if it is not OK to start a new work reqeust. */
225 if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
226 if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
227 goto clr_busy;
228 /* We are in the error state, flush the work request. */
229 send_status = IB_WC_WR_FLUSH_ERR;
230 goto flush_send;
231 }
232
233 /*
234 * We can rely on the entry not changing without the s_lock
235 * being held until we update s_last.
236 * We increment s_cur to indicate s_last is in progress.
237 */
238 if (sqp->s_last == sqp->s_cur) {
239 if (++sqp->s_cur >= sqp->s_size)
240 sqp->s_cur = 0;
241 }
242 spin_unlock_irqrestore(&sqp->s_lock, flags);
243
244 if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
245 qp->ibqp.qp_type != sqp->ibqp.qp_type) {
246 ibp->rvp.n_pkt_drops++;
247 /*
248 * For RC, the requester would timeout and retry so
249 * shortcut the timeouts and just signal too many retries.
250 */
251 if (sqp->ibqp.qp_type == IB_QPT_RC)
252 send_status = IB_WC_RETRY_EXC_ERR;
253 else
254 send_status = IB_WC_SUCCESS;
255 goto serr;
256 }
257
258 memset(&wc, 0, sizeof(wc));
259 send_status = IB_WC_SUCCESS;
260
261 release = 1;
262 sqp->s_sge.sge = wqe->sg_list[0];
263 sqp->s_sge.sg_list = wqe->sg_list + 1;
264 sqp->s_sge.num_sge = wqe->wr.num_sge;
265 sqp->s_len = wqe->length;
266 switch (wqe->wr.opcode) {
267 case IB_WR_SEND_WITH_IMM:
268 wc.wc_flags = IB_WC_WITH_IMM;
269 wc.ex.imm_data = wqe->wr.ex.imm_data;
270 /* FALLTHROUGH */
271 case IB_WR_SEND:
272 ret = rvt_get_rwqe(qp, false);
273 if (ret < 0)
274 goto op_err;
275 if (!ret)
276 goto rnr_nak;
277 break;
278
279 case IB_WR_RDMA_WRITE_WITH_IMM:
280 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
281 goto inv_err;
282 wc.wc_flags = IB_WC_WITH_IMM;
283 wc.ex.imm_data = wqe->wr.ex.imm_data;
284 ret = rvt_get_rwqe(qp, true);
285 if (ret < 0)
286 goto op_err;
287 if (!ret)
288 goto rnr_nak;
289 /* FALLTHROUGH */
290 case IB_WR_RDMA_WRITE:
291 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
292 goto inv_err;
293 if (wqe->length == 0)
294 break;
295 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
296 wqe->rdma_wr.remote_addr,
297 wqe->rdma_wr.rkey,
298 IB_ACCESS_REMOTE_WRITE)))
299 goto acc_err;
300 qp->r_sge.sg_list = NULL;
301 qp->r_sge.num_sge = 1;
302 qp->r_sge.total_len = wqe->length;
303 break;
304
305 case IB_WR_RDMA_READ:
306 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
307 goto inv_err;
308 if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
309 wqe->rdma_wr.remote_addr,
310 wqe->rdma_wr.rkey,
311 IB_ACCESS_REMOTE_READ)))
312 goto acc_err;
313 release = 0;
314 sqp->s_sge.sg_list = NULL;
315 sqp->s_sge.num_sge = 1;
316 qp->r_sge.sge = wqe->sg_list[0];
317 qp->r_sge.sg_list = wqe->sg_list + 1;
318 qp->r_sge.num_sge = wqe->wr.num_sge;
319 qp->r_sge.total_len = wqe->length;
320 break;
321
322 case IB_WR_ATOMIC_CMP_AND_SWP:
323 case IB_WR_ATOMIC_FETCH_AND_ADD:
324 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
325 goto inv_err;
326 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
327 wqe->atomic_wr.remote_addr,
328 wqe->atomic_wr.rkey,
329 IB_ACCESS_REMOTE_ATOMIC)))
330 goto acc_err;
331 /* Perform atomic OP and save result. */
332 maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
333 sdata = wqe->atomic_wr.compare_add;
334 *(u64 *) sqp->s_sge.sge.vaddr =
335 (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
336 (u64) atomic64_add_return(sdata, maddr) - sdata :
337 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
338 sdata, wqe->atomic_wr.swap);
339 rvt_put_mr(qp->r_sge.sge.mr);
340 qp->r_sge.num_sge = 0;
341 goto send_comp;
342
343 default:
344 send_status = IB_WC_LOC_QP_OP_ERR;
345 goto serr;
346 }
347
348 sge = &sqp->s_sge.sge;
349 while (sqp->s_len) {
350 u32 len = sqp->s_len;
351
352 if (len > sge->length)
353 len = sge->length;
354 if (len > sge->sge_length)
355 len = sge->sge_length;
356 BUG_ON(len == 0);
357 qib_copy_sge(&qp->r_sge, sge->vaddr, len, release);
358 sge->vaddr += len;
359 sge->length -= len;
360 sge->sge_length -= len;
361 if (sge->sge_length == 0) {
362 if (!release)
363 rvt_put_mr(sge->mr);
364 if (--sqp->s_sge.num_sge)
365 *sge = *sqp->s_sge.sg_list++;
366 } else if (sge->length == 0 && sge->mr->lkey) {
367 if (++sge->n >= RVT_SEGSZ) {
368 if (++sge->m >= sge->mr->mapsz)
369 break;
370 sge->n = 0;
371 }
372 sge->vaddr =
373 sge->mr->map[sge->m]->segs[sge->n].vaddr;
374 sge->length =
375 sge->mr->map[sge->m]->segs[sge->n].length;
376 }
377 sqp->s_len -= len;
378 }
379 if (release)
380 rvt_put_ss(&qp->r_sge);
381
382 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
383 goto send_comp;
384
385 if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
386 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
387 else
388 wc.opcode = IB_WC_RECV;
389 wc.wr_id = qp->r_wr_id;
390 wc.status = IB_WC_SUCCESS;
391 wc.byte_len = wqe->length;
392 wc.qp = &qp->ibqp;
393 wc.src_qp = qp->remote_qpn;
394 wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
395 wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
396 wc.port_num = 1;
397 /* Signal completion event if the solicited bit is set. */
398 rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
399 wqe->wr.send_flags & IB_SEND_SOLICITED);
400
401send_comp:
402 spin_lock_irqsave(&sqp->s_lock, flags);
403 ibp->rvp.n_loop_pkts++;
404flush_send:
405 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
406 qib_send_complete(sqp, wqe, send_status);
407 goto again;
408
409rnr_nak:
410 /* Handle RNR NAK */
411 if (qp->ibqp.qp_type == IB_QPT_UC)
412 goto send_comp;
413 ibp->rvp.n_rnr_naks++;
414 /*
415 * Note: we don't need the s_lock held since the BUSY flag
416 * makes this single threaded.
417 */
418 if (sqp->s_rnr_retry == 0) {
419 send_status = IB_WC_RNR_RETRY_EXC_ERR;
420 goto serr;
421 }
422 if (sqp->s_rnr_retry_cnt < 7)
423 sqp->s_rnr_retry--;
424 spin_lock_irqsave(&sqp->s_lock, flags);
425 if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
426 goto clr_busy;
427 rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
428 IB_AETH_CREDIT_SHIFT);
429 goto clr_busy;
430
431op_err:
432 send_status = IB_WC_REM_OP_ERR;
433 wc.status = IB_WC_LOC_QP_OP_ERR;
434 goto err;
435
436inv_err:
437 send_status = IB_WC_REM_INV_REQ_ERR;
438 wc.status = IB_WC_LOC_QP_OP_ERR;
439 goto err;
440
441acc_err:
442 send_status = IB_WC_REM_ACCESS_ERR;
443 wc.status = IB_WC_LOC_PROT_ERR;
444err:
445 /* responder goes to error state */
446 rvt_rc_error(qp, wc.status);
447
448serr:
449 spin_lock_irqsave(&sqp->s_lock, flags);
450 qib_send_complete(sqp, wqe, send_status);
451 if (sqp->ibqp.qp_type == IB_QPT_RC) {
452 int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
453
454 sqp->s_flags &= ~RVT_S_BUSY;
455 spin_unlock_irqrestore(&sqp->s_lock, flags);
456 if (lastwqe) {
457 struct ib_event ev;
458
459 ev.device = sqp->ibqp.device;
460 ev.element.qp = &sqp->ibqp;
461 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
462 sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
463 }
464 goto done;
465 }
466clr_busy:
467 sqp->s_flags &= ~RVT_S_BUSY;
468unlock:
469 spin_unlock_irqrestore(&sqp->s_lock, flags);
470done:
471 rcu_read_unlock();
472}
473
474/**
475 * qib_make_grh - construct a GRH header 174 * qib_make_grh - construct a GRH header
476 * @ibp: a pointer to the IB port 175 * @ibp: a pointer to the IB port
477 * @hdr: a pointer to the GRH header being constructed 176 * @hdr: a pointer to the GRH header being constructed
@@ -573,7 +272,7 @@ void qib_do_send(struct rvt_qp *qp)
573 qp->ibqp.qp_type == IB_QPT_UC) && 272 qp->ibqp.qp_type == IB_QPT_UC) &&
574 (rdma_ah_get_dlid(&qp->remote_ah_attr) & 273 (rdma_ah_get_dlid(&qp->remote_ah_attr) &
575 ~((1 << ppd->lmc) - 1)) == ppd->lid) { 274 ~((1 << ppd->lmc) - 1)) == ppd->lid) {
576 qib_ruc_loopback(qp); 275 rvt_ruc_loopback(qp);
577 return; 276 return;
578 } 277 }
579 278
@@ -613,42 +312,3 @@ void qib_do_send(struct rvt_qp *qp)
613 312
614 spin_unlock_irqrestore(&qp->s_lock, flags); 313 spin_unlock_irqrestore(&qp->s_lock, flags);
615} 314}
616
617/*
618 * This should be called with s_lock held.
619 */
620void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
621 enum ib_wc_status status)
622{
623 u32 old_last, last;
624
625 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
626 return;
627
628 last = qp->s_last;
629 old_last = last;
630 if (++last >= qp->s_size)
631 last = 0;
632 qp->s_last = last;
633 /* See post_send() */
634 barrier();
635 rvt_put_swqe(wqe);
636 if (qp->ibqp.qp_type == IB_QPT_UD ||
637 qp->ibqp.qp_type == IB_QPT_SMI ||
638 qp->ibqp.qp_type == IB_QPT_GSI)
639 atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
640
641 rvt_qp_swqe_complete(qp,
642 wqe,
643 ib_qib_wc_opcode[wqe->wr.opcode],
644 status);
645
646 if (qp->s_acked == old_last)
647 qp->s_acked = last;
648 if (qp->s_cur == old_last)
649 qp->s_cur = last;
650 if (qp->s_tail == old_last)
651 qp->s_tail = last;
652 if (qp->state == IB_QPS_SQD && last == qp->s_cur)
653 qp->s_draining = 0;
654}
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index d0723d4aef5c..757d4c9d713d 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -651,7 +651,7 @@ unmap:
651 if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) 651 if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)
652 rvt_error_qp(qp, IB_WC_GENERAL_ERR); 652 rvt_error_qp(qp, IB_WC_GENERAL_ERR);
653 } else if (qp->s_wqe) 653 } else if (qp->s_wqe)
654 qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR); 654 rvt_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
655 spin_unlock(&qp->s_lock); 655 spin_unlock(&qp->s_lock);
656 spin_unlock(&qp->r_lock); 656 spin_unlock(&qp->r_lock);
657 /* return zero to process the next send work request */ 657 /* return zero to process the next send work request */
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index ca2638d8f35e..1cf4ca3f23e3 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -551,17 +551,18 @@ static struct kobj_type qib_diagc_ktype = {
551 * Start of per-unit (or driver, in some cases, but replicated 551 * Start of per-unit (or driver, in some cases, but replicated
552 * per unit) functions (these get a device *) 552 * per unit) functions (these get a device *)
553 */ 553 */
554static ssize_t show_rev(struct device *device, struct device_attribute *attr, 554static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
555 char *buf) 555 char *buf)
556{ 556{
557 struct qib_ibdev *dev = 557 struct qib_ibdev *dev =
558 container_of(device, struct qib_ibdev, rdi.ibdev.dev); 558 container_of(device, struct qib_ibdev, rdi.ibdev.dev);
559 559
560 return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); 560 return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
561} 561}
562static DEVICE_ATTR_RO(hw_rev);
562 563
563static ssize_t show_hca(struct device *device, struct device_attribute *attr, 564static ssize_t hca_type_show(struct device *device,
564 char *buf) 565 struct device_attribute *attr, char *buf)
565{ 566{
566 struct qib_ibdev *dev = 567 struct qib_ibdev *dev =
567 container_of(device, struct qib_ibdev, rdi.ibdev.dev); 568 container_of(device, struct qib_ibdev, rdi.ibdev.dev);
@@ -574,15 +575,18 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
574 ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); 575 ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
575 return ret; 576 return ret;
576} 577}
578static DEVICE_ATTR_RO(hca_type);
579static DEVICE_ATTR(board_id, 0444, hca_type_show, NULL);
577 580
578static ssize_t show_version(struct device *device, 581static ssize_t version_show(struct device *device,
579 struct device_attribute *attr, char *buf) 582 struct device_attribute *attr, char *buf)
580{ 583{
581 /* The string printed here is already newline-terminated. */ 584 /* The string printed here is already newline-terminated. */
582 return scnprintf(buf, PAGE_SIZE, "%s", (char *)ib_qib_version); 585 return scnprintf(buf, PAGE_SIZE, "%s", (char *)ib_qib_version);
583} 586}
587static DEVICE_ATTR_RO(version);
584 588
585static ssize_t show_boardversion(struct device *device, 589static ssize_t boardversion_show(struct device *device,
586 struct device_attribute *attr, char *buf) 590 struct device_attribute *attr, char *buf)
587{ 591{
588 struct qib_ibdev *dev = 592 struct qib_ibdev *dev =
@@ -592,9 +596,9 @@ static ssize_t show_boardversion(struct device *device,
592 /* The string printed here is already newline-terminated. */ 596 /* The string printed here is already newline-terminated. */
593 return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); 597 return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
594} 598}
599static DEVICE_ATTR_RO(boardversion);
595 600
596 601static ssize_t localbus_info_show(struct device *device,
597static ssize_t show_localbus_info(struct device *device,
598 struct device_attribute *attr, char *buf) 602 struct device_attribute *attr, char *buf)
599{ 603{
600 struct qib_ibdev *dev = 604 struct qib_ibdev *dev =
@@ -604,9 +608,9 @@ static ssize_t show_localbus_info(struct device *device,
604 /* The string printed here is already newline-terminated. */ 608 /* The string printed here is already newline-terminated. */
605 return scnprintf(buf, PAGE_SIZE, "%s", dd->lbus_info); 609 return scnprintf(buf, PAGE_SIZE, "%s", dd->lbus_info);
606} 610}
611static DEVICE_ATTR_RO(localbus_info);
607 612
608 613static ssize_t nctxts_show(struct device *device,
609static ssize_t show_nctxts(struct device *device,
610 struct device_attribute *attr, char *buf) 614 struct device_attribute *attr, char *buf)
611{ 615{
612 struct qib_ibdev *dev = 616 struct qib_ibdev *dev =
@@ -620,9 +624,10 @@ static ssize_t show_nctxts(struct device *device,
620 (dd->first_user_ctxt > dd->cfgctxts) ? 0 : 624 (dd->first_user_ctxt > dd->cfgctxts) ? 0 :
621 (dd->cfgctxts - dd->first_user_ctxt)); 625 (dd->cfgctxts - dd->first_user_ctxt));
622} 626}
627static DEVICE_ATTR_RO(nctxts);
623 628
624static ssize_t show_nfreectxts(struct device *device, 629static ssize_t nfreectxts_show(struct device *device,
625 struct device_attribute *attr, char *buf) 630 struct device_attribute *attr, char *buf)
626{ 631{
627 struct qib_ibdev *dev = 632 struct qib_ibdev *dev =
628 container_of(device, struct qib_ibdev, rdi.ibdev.dev); 633 container_of(device, struct qib_ibdev, rdi.ibdev.dev);
@@ -631,8 +636,9 @@ static ssize_t show_nfreectxts(struct device *device,
631 /* Return the number of free user ports (contexts) available. */ 636 /* Return the number of free user ports (contexts) available. */
632 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); 637 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
633} 638}
639static DEVICE_ATTR_RO(nfreectxts);
634 640
635static ssize_t show_serial(struct device *device, 641static ssize_t serial_show(struct device *device,
636 struct device_attribute *attr, char *buf) 642 struct device_attribute *attr, char *buf)
637{ 643{
638 struct qib_ibdev *dev = 644 struct qib_ibdev *dev =
@@ -644,8 +650,9 @@ static ssize_t show_serial(struct device *device,
644 strcat(buf, "\n"); 650 strcat(buf, "\n");
645 return strlen(buf); 651 return strlen(buf);
646} 652}
653static DEVICE_ATTR_RO(serial);
647 654
648static ssize_t store_chip_reset(struct device *device, 655static ssize_t chip_reset_store(struct device *device,
649 struct device_attribute *attr, const char *buf, 656 struct device_attribute *attr, const char *buf,
650 size_t count) 657 size_t count)
651{ 658{
@@ -663,11 +670,12 @@ static ssize_t store_chip_reset(struct device *device,
663bail: 670bail:
664 return ret < 0 ? ret : count; 671 return ret < 0 ? ret : count;
665} 672}
673static DEVICE_ATTR_WO(chip_reset);
666 674
667/* 675/*
668 * Dump tempsense regs. in decimal, to ease shell-scripts. 676 * Dump tempsense regs. in decimal, to ease shell-scripts.
669 */ 677 */
670static ssize_t show_tempsense(struct device *device, 678static ssize_t tempsense_show(struct device *device,
671 struct device_attribute *attr, char *buf) 679 struct device_attribute *attr, char *buf)
672{ 680{
673 struct qib_ibdev *dev = 681 struct qib_ibdev *dev =
@@ -695,6 +703,7 @@ static ssize_t show_tempsense(struct device *device,
695 *(signed char *)(regvals + 7)); 703 *(signed char *)(regvals + 7));
696 return ret; 704 return ret;
697} 705}
706static DEVICE_ATTR_RO(tempsense);
698 707
699/* 708/*
700 * end of per-unit (or driver, in some cases, but replicated 709 * end of per-unit (or driver, in some cases, but replicated
@@ -702,30 +711,23 @@ static ssize_t show_tempsense(struct device *device,
702 */ 711 */
703 712
704/* start of per-unit file structures and support code */ 713/* start of per-unit file structures and support code */
705static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 714static struct attribute *qib_attributes[] = {
706static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 715 &dev_attr_hw_rev.attr,
707static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL); 716 &dev_attr_hca_type.attr,
708static DEVICE_ATTR(version, S_IRUGO, show_version, NULL); 717 &dev_attr_board_id.attr,
709static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL); 718 &dev_attr_version.attr,
710static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL); 719 &dev_attr_nctxts.attr,
711static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); 720 &dev_attr_nfreectxts.attr,
712static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); 721 &dev_attr_serial.attr,
713static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL); 722 &dev_attr_boardversion.attr,
714static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL); 723 &dev_attr_tempsense.attr,
715static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset); 724 &dev_attr_localbus_info.attr,
716 725 &dev_attr_chip_reset.attr,
717static struct device_attribute *qib_attributes[] = { 726 NULL,
718 &dev_attr_hw_rev, 727};
719 &dev_attr_hca_type, 728
720 &dev_attr_board_id, 729const struct attribute_group qib_attr_group = {
721 &dev_attr_version, 730 .attrs = qib_attributes,
722 &dev_attr_nctxts,
723 &dev_attr_nfreectxts,
724 &dev_attr_serial,
725 &dev_attr_boardversion,
726 &dev_attr_tempsense,
727 &dev_attr_localbus_info,
728 &dev_attr_chip_reset,
729}; 731};
730 732
731int qib_create_port_files(struct ib_device *ibdev, u8 port_num, 733int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
@@ -827,27 +829,6 @@ bail:
827} 829}
828 830
829/* 831/*
830 * Register and create our files in /sys/class/infiniband.
831 */
832int qib_verbs_register_sysfs(struct qib_devdata *dd)
833{
834 struct ib_device *dev = &dd->verbs_dev.rdi.ibdev;
835 int i, ret;
836
837 for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) {
838 ret = device_create_file(&dev->dev, qib_attributes[i]);
839 if (ret)
840 goto bail;
841 }
842
843 return 0;
844bail:
845 for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i)
846 device_remove_file(&dev->dev, qib_attributes[i]);
847 return ret;
848}
849
850/*
851 * Unregister and remove our files in /sys/class/infiniband. 832 * Unregister and remove our files in /sys/class/infiniband.
852 */ 833 */
853void qib_verbs_unregister_sysfs(struct qib_devdata *dd) 834void qib_verbs_unregister_sysfs(struct qib_devdata *dd)
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 3e54bc11e0ae..30c70ad0f4bf 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -68,7 +68,7 @@ int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags)
68 goto bail; 68 goto bail;
69 } 69 }
70 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 70 wqe = rvt_get_swqe_ptr(qp, qp->s_last);
71 qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 71 rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
72 goto done; 72 goto done;
73 } 73 }
74 74
@@ -359,7 +359,7 @@ send_first:
359 qp->r_rcv_len += pmtu; 359 qp->r_rcv_len += pmtu;
360 if (unlikely(qp->r_rcv_len > qp->r_len)) 360 if (unlikely(qp->r_rcv_len > qp->r_len))
361 goto rewind; 361 goto rewind;
362 qib_copy_sge(&qp->r_sge, data, pmtu, 0); 362 rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false);
363 break; 363 break;
364 364
365 case OP(SEND_LAST_WITH_IMMEDIATE): 365 case OP(SEND_LAST_WITH_IMMEDIATE):
@@ -385,7 +385,7 @@ send_last:
385 if (unlikely(wc.byte_len > qp->r_len)) 385 if (unlikely(wc.byte_len > qp->r_len))
386 goto rewind; 386 goto rewind;
387 wc.opcode = IB_WC_RECV; 387 wc.opcode = IB_WC_RECV;
388 qib_copy_sge(&qp->r_sge, data, tlen, 0); 388 rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false);
389 rvt_put_ss(&qp->s_rdma_read_sge); 389 rvt_put_ss(&qp->s_rdma_read_sge);
390last_imm: 390last_imm:
391 wc.wr_id = qp->r_wr_id; 391 wc.wr_id = qp->r_wr_id;
@@ -449,7 +449,7 @@ rdma_first:
449 qp->r_rcv_len += pmtu; 449 qp->r_rcv_len += pmtu;
450 if (unlikely(qp->r_rcv_len > qp->r_len)) 450 if (unlikely(qp->r_rcv_len > qp->r_len))
451 goto drop; 451 goto drop;
452 qib_copy_sge(&qp->r_sge, data, pmtu, 1); 452 rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
453 break; 453 break;
454 454
455 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): 455 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -479,7 +479,7 @@ rdma_last_imm:
479 } 479 }
480 wc.byte_len = qp->r_len; 480 wc.byte_len = qp->r_len;
481 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; 481 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
482 qib_copy_sge(&qp->r_sge, data, tlen, 1); 482 rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
483 rvt_put_ss(&qp->r_sge); 483 rvt_put_ss(&qp->r_sge);
484 goto last_imm; 484 goto last_imm;
485 485
@@ -495,7 +495,7 @@ rdma_last:
495 tlen -= (hdrsize + pad + 4); 495 tlen -= (hdrsize + pad + 4);
496 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) 496 if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
497 goto drop; 497 goto drop;
498 qib_copy_sge(&qp->r_sge, data, tlen, 1); 498 rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
499 rvt_put_ss(&qp->r_sge); 499 rvt_put_ss(&qp->r_sge);
500 break; 500 break;
501 501
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index f8d029a2390f..4d4c31ea4e2d 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -162,8 +162,8 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
162 const struct ib_global_route *grd = rdma_ah_read_grh(ah_attr); 162 const struct ib_global_route *grd = rdma_ah_read_grh(ah_attr);
163 163
164 qib_make_grh(ibp, &grh, grd, 0, 0); 164 qib_make_grh(ibp, &grh, grd, 0, 0);
165 qib_copy_sge(&qp->r_sge, &grh, 165 rvt_copy_sge(qp, &qp->r_sge, &grh,
166 sizeof(grh), 1); 166 sizeof(grh), true, false);
167 wc.wc_flags |= IB_WC_GRH; 167 wc.wc_flags |= IB_WC_GRH;
168 } else 168 } else
169 rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true); 169 rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
@@ -179,7 +179,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
179 if (len > sge->sge_length) 179 if (len > sge->sge_length)
180 len = sge->sge_length; 180 len = sge->sge_length;
181 BUG_ON(len == 0); 181 BUG_ON(len == 0);
182 qib_copy_sge(&qp->r_sge, sge->vaddr, len, 1); 182 rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
183 sge->vaddr += len; 183 sge->vaddr += len;
184 sge->length -= len; 184 sge->length -= len;
185 sge->sge_length -= len; 185 sge->sge_length -= len;
@@ -260,7 +260,7 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
260 goto bail; 260 goto bail;
261 } 261 }
262 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 262 wqe = rvt_get_swqe_ptr(qp, qp->s_last);
263 qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 263 rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
264 goto done; 264 goto done;
265 } 265 }
266 266
@@ -304,7 +304,7 @@ int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
304 qib_ud_loopback(qp, wqe); 304 qib_ud_loopback(qp, wqe);
305 spin_lock_irqsave(&qp->s_lock, tflags); 305 spin_lock_irqsave(&qp->s_lock, tflags);
306 *flags = tflags; 306 *flags = tflags;
307 qib_send_complete(qp, wqe, IB_WC_SUCCESS); 307 rvt_send_complete(qp, wqe, IB_WC_SUCCESS);
308 goto done; 308 goto done;
309 } 309 }
310 } 310 }
@@ -551,12 +551,13 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
551 goto drop; 551 goto drop;
552 } 552 }
553 if (has_grh) { 553 if (has_grh) {
554 qib_copy_sge(&qp->r_sge, &hdr->u.l.grh, 554 rvt_copy_sge(qp, &qp->r_sge, &hdr->u.l.grh,
555 sizeof(struct ib_grh), 1); 555 sizeof(struct ib_grh), true, false);
556 wc.wc_flags |= IB_WC_GRH; 556 wc.wc_flags |= IB_WC_GRH;
557 } else 557 } else
558 rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true); 558 rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
559 qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1); 559 rvt_copy_sge(qp, &qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
560 true, false);
560 rvt_put_ss(&qp->r_sge); 561 rvt_put_ss(&qp->r_sge);
561 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) 562 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
562 return; 563 return;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 41babbc0db58..4b0f5761a646 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -131,27 +131,6 @@ const enum ib_wc_opcode ib_qib_wc_opcode[] = {
131 */ 131 */
132__be64 ib_qib_sys_image_guid; 132__be64 ib_qib_sys_image_guid;
133 133
134/**
135 * qib_copy_sge - copy data to SGE memory
136 * @ss: the SGE state
137 * @data: the data to copy
138 * @length: the length of the data
139 */
140void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, int release)
141{
142 struct rvt_sge *sge = &ss->sge;
143
144 while (length) {
145 u32 len = rvt_get_sge_length(sge, length);
146
147 WARN_ON_ONCE(len == 0);
148 memcpy(sge->vaddr, data, len);
149 rvt_update_sge(ss, len, release);
150 data += len;
151 length -= len;
152 }
153}
154
155/* 134/*
156 * Count the number of DMA descriptors needed to send length bytes of data. 135 * Count the number of DMA descriptors needed to send length bytes of data.
157 * Don't modify the qib_sge_state to get the count. 136 * Don't modify the qib_sge_state to get the count.
@@ -752,7 +731,7 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
752 731
753 spin_lock(&qp->s_lock); 732 spin_lock(&qp->s_lock);
754 if (tx->wqe) 733 if (tx->wqe)
755 qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS); 734 rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
756 else if (qp->ibqp.qp_type == IB_QPT_RC) { 735 else if (qp->ibqp.qp_type == IB_QPT_RC) {
757 struct ib_header *hdr; 736 struct ib_header *hdr;
758 737
@@ -1025,7 +1004,7 @@ done:
1025 } 1004 }
1026 if (qp->s_wqe) { 1005 if (qp->s_wqe) {
1027 spin_lock_irqsave(&qp->s_lock, flags); 1006 spin_lock_irqsave(&qp->s_lock, flags);
1028 qib_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); 1007 rvt_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
1029 spin_unlock_irqrestore(&qp->s_lock, flags); 1008 spin_unlock_irqrestore(&qp->s_lock, flags);
1030 } else if (qp->ibqp.qp_type == IB_QPT_RC) { 1009 } else if (qp->ibqp.qp_type == IB_QPT_RC) {
1031 spin_lock_irqsave(&qp->s_lock, flags); 1010 spin_lock_irqsave(&qp->s_lock, flags);
@@ -1512,6 +1491,9 @@ static void qib_fill_device_attr(struct qib_devdata *dd)
1512 rdi->dparms.props.max_mcast_grp; 1491 rdi->dparms.props.max_mcast_grp;
1513 /* post send table */ 1492 /* post send table */
1514 dd->verbs_dev.rdi.post_parms = qib_post_parms; 1493 dd->verbs_dev.rdi.post_parms = qib_post_parms;
1494
1495 /* opcode translation table */
1496 dd->verbs_dev.rdi.wc_opcode = ib_qib_wc_opcode;
1515} 1497}
1516 1498
1517/** 1499/**
@@ -1588,7 +1570,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
1588 dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files; 1570 dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files;
1589 dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev; 1571 dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev;
1590 dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah; 1572 dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah;
1591 dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe; 1573 dd->verbs_dev.rdi.driver_f.setup_wqe = qib_check_send_wqe;
1592 dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah; 1574 dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah;
1593 dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn; 1575 dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn;
1594 dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc; 1576 dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc;
@@ -1631,6 +1613,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
1631 dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id; 1613 dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id;
1632 dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB; 1614 dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB;
1633 dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE; 1615 dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE;
1616 dd->verbs_dev.rdi.dparms.sge_copy_mode = RVT_SGE_COPY_MEMCPY;
1634 1617
1635 qib_fill_device_attr(dd); 1618 qib_fill_device_attr(dd);
1636 1619
@@ -1642,19 +1625,14 @@ int qib_register_ib_device(struct qib_devdata *dd)
1642 i, 1625 i,
1643 dd->rcd[ctxt]->pkeys); 1626 dd->rcd[ctxt]->pkeys);
1644 } 1627 }
1628 rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, &qib_attr_group);
1645 1629
1646 ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_QIB); 1630 ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_QIB);
1647 if (ret) 1631 if (ret)
1648 goto err_tx; 1632 goto err_tx;
1649 1633
1650 ret = qib_verbs_register_sysfs(dd);
1651 if (ret)
1652 goto err_class;
1653
1654 return ret; 1634 return ret;
1655 1635
1656err_class:
1657 rvt_unregister_device(&dd->verbs_dev.rdi);
1658err_tx: 1636err_tx:
1659 while (!list_empty(&dev->txreq_free)) { 1637 while (!list_empty(&dev->txreq_free)) {
1660 struct list_head *l = dev->txreq_free.next; 1638 struct list_head *l = dev->txreq_free.next;
@@ -1716,14 +1694,14 @@ void qib_unregister_ib_device(struct qib_devdata *dd)
1716 * It is only used in post send, which doesn't hold 1694 * It is only used in post send, which doesn't hold
1717 * the s_lock. 1695 * the s_lock.
1718 */ 1696 */
1719void _qib_schedule_send(struct rvt_qp *qp) 1697bool _qib_schedule_send(struct rvt_qp *qp)
1720{ 1698{
1721 struct qib_ibport *ibp = 1699 struct qib_ibport *ibp =
1722 to_iport(qp->ibqp.device, qp->port_num); 1700 to_iport(qp->ibqp.device, qp->port_num);
1723 struct qib_pportdata *ppd = ppd_from_ibp(ibp); 1701 struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1724 struct qib_qp_priv *priv = qp->priv; 1702 struct qib_qp_priv *priv = qp->priv;
1725 1703
1726 queue_work(ppd->qib_wq, &priv->s_work); 1704 return queue_work(ppd->qib_wq, &priv->s_work);
1727} 1705}
1728 1706
1729/** 1707/**
@@ -1733,8 +1711,9 @@ void _qib_schedule_send(struct rvt_qp *qp)
1733 * This schedules qp progress. The s_lock 1711 * This schedules qp progress. The s_lock
1734 * should be held. 1712 * should be held.
1735 */ 1713 */
1736void qib_schedule_send(struct rvt_qp *qp) 1714bool qib_schedule_send(struct rvt_qp *qp)
1737{ 1715{
1738 if (qib_send_ok(qp)) 1716 if (qib_send_ok(qp))
1739 _qib_schedule_send(qp); 1717 return _qib_schedule_send(qp);
1718 return false;
1740} 1719}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 666613eef88f..a4426c24b0d1 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2012 - 2017 Intel Corporation. All rights reserved. 2 * Copyright (c) 2012 - 2018 Intel Corporation. All rights reserved.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * 5 *
@@ -223,8 +223,8 @@ static inline int qib_send_ok(struct rvt_qp *qp)
223 !(qp->s_flags & RVT_S_ANY_WAIT_SEND)); 223 !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
224} 224}
225 225
226void _qib_schedule_send(struct rvt_qp *qp); 226bool _qib_schedule_send(struct rvt_qp *qp);
227void qib_schedule_send(struct rvt_qp *qp); 227bool qib_schedule_send(struct rvt_qp *qp);
228 228
229static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) 229static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
230{ 230{
@@ -292,9 +292,6 @@ void qib_put_txreq(struct qib_verbs_txreq *tx);
292int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr, 292int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
293 u32 hdrwords, struct rvt_sge_state *ss, u32 len); 293 u32 hdrwords, struct rvt_sge_state *ss, u32 len);
294 294
295void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
296 int release);
297
298void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr, 295void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
299 int has_grh, void *data, u32 tlen, struct rvt_qp *qp); 296 int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
300 297
@@ -303,7 +300,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
303 300
304int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr); 301int qib_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
305 302
306int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); 303int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
304 bool *call_send);
307 305
308struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); 306struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
309 307
@@ -333,9 +331,6 @@ void _qib_do_send(struct work_struct *work);
333 331
334void qib_do_send(struct rvt_qp *qp); 332void qib_do_send(struct rvt_qp *qp);
335 333
336void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
337 enum ib_wc_status status);
338
339void qib_send_rc_ack(struct rvt_qp *qp); 334void qib_send_rc_ack(struct rvt_qp *qp);
340 335
341int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags); 336int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags);
diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c
index 92dc66cc2d50..a3115709fb03 100644
--- a/drivers/infiniband/hw/usnic/usnic_debugfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c
@@ -165,6 +165,5 @@ void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow)
165 165
166void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow) 166void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow)
167{ 167{
168 if (!IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) 168 debugfs_remove(qp_flow->dbgfs_dentry);
169 debugfs_remove(qp_flow->dbgfs_dentry);
170} 169}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index f0538a460328..73bd00f8d2c8 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -76,7 +76,7 @@ static LIST_HEAD(usnic_ib_ibdev_list);
76static int usnic_ib_dump_vf_hdr(void *obj, char *buf, int buf_sz) 76static int usnic_ib_dump_vf_hdr(void *obj, char *buf, int buf_sz)
77{ 77{
78 struct usnic_ib_vf *vf = obj; 78 struct usnic_ib_vf *vf = obj;
79 return scnprintf(buf, buf_sz, "PF: %s ", vf->pf->ib_dev.name); 79 return scnprintf(buf, buf_sz, "PF: %s ", dev_name(&vf->pf->ib_dev.dev));
80} 80}
81/* End callback dump funcs */ 81/* End callback dump funcs */
82 82
@@ -138,7 +138,7 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
138 netdev = us_ibdev->netdev; 138 netdev = us_ibdev->netdev;
139 switch (event) { 139 switch (event) {
140 case NETDEV_REBOOT: 140 case NETDEV_REBOOT:
141 usnic_info("PF Reset on %s\n", us_ibdev->ib_dev.name); 141 usnic_info("PF Reset on %s\n", dev_name(&us_ibdev->ib_dev.dev));
142 usnic_ib_qp_grp_modify_active_to_err(us_ibdev); 142 usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
143 ib_event.event = IB_EVENT_PORT_ERR; 143 ib_event.event = IB_EVENT_PORT_ERR;
144 ib_event.device = &us_ibdev->ib_dev; 144 ib_event.device = &us_ibdev->ib_dev;
@@ -151,7 +151,8 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
151 if (!us_ibdev->ufdev->link_up && 151 if (!us_ibdev->ufdev->link_up &&
152 netif_carrier_ok(netdev)) { 152 netif_carrier_ok(netdev)) {
153 usnic_fwd_carrier_up(us_ibdev->ufdev); 153 usnic_fwd_carrier_up(us_ibdev->ufdev);
154 usnic_info("Link UP on %s\n", us_ibdev->ib_dev.name); 154 usnic_info("Link UP on %s\n",
155 dev_name(&us_ibdev->ib_dev.dev));
155 ib_event.event = IB_EVENT_PORT_ACTIVE; 156 ib_event.event = IB_EVENT_PORT_ACTIVE;
156 ib_event.device = &us_ibdev->ib_dev; 157 ib_event.device = &us_ibdev->ib_dev;
157 ib_event.element.port_num = 1; 158 ib_event.element.port_num = 1;
@@ -159,7 +160,8 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
159 } else if (us_ibdev->ufdev->link_up && 160 } else if (us_ibdev->ufdev->link_up &&
160 !netif_carrier_ok(netdev)) { 161 !netif_carrier_ok(netdev)) {
161 usnic_fwd_carrier_down(us_ibdev->ufdev); 162 usnic_fwd_carrier_down(us_ibdev->ufdev);
162 usnic_info("Link DOWN on %s\n", us_ibdev->ib_dev.name); 163 usnic_info("Link DOWN on %s\n",
164 dev_name(&us_ibdev->ib_dev.dev));
163 usnic_ib_qp_grp_modify_active_to_err(us_ibdev); 165 usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
164 ib_event.event = IB_EVENT_PORT_ERR; 166 ib_event.event = IB_EVENT_PORT_ERR;
165 ib_event.device = &us_ibdev->ib_dev; 167 ib_event.device = &us_ibdev->ib_dev;
@@ -168,17 +170,17 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
168 } else { 170 } else {
169 usnic_dbg("Ignoring %s on %s\n", 171 usnic_dbg("Ignoring %s on %s\n",
170 netdev_cmd_to_name(event), 172 netdev_cmd_to_name(event),
171 us_ibdev->ib_dev.name); 173 dev_name(&us_ibdev->ib_dev.dev));
172 } 174 }
173 break; 175 break;
174 case NETDEV_CHANGEADDR: 176 case NETDEV_CHANGEADDR:
175 if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr, 177 if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr,
176 sizeof(us_ibdev->ufdev->mac))) { 178 sizeof(us_ibdev->ufdev->mac))) {
177 usnic_dbg("Ignoring addr change on %s\n", 179 usnic_dbg("Ignoring addr change on %s\n",
178 us_ibdev->ib_dev.name); 180 dev_name(&us_ibdev->ib_dev.dev));
179 } else { 181 } else {
180 usnic_info(" %s old mac: %pM new mac: %pM\n", 182 usnic_info(" %s old mac: %pM new mac: %pM\n",
181 us_ibdev->ib_dev.name, 183 dev_name(&us_ibdev->ib_dev.dev),
182 us_ibdev->ufdev->mac, 184 us_ibdev->ufdev->mac,
183 netdev->dev_addr); 185 netdev->dev_addr);
184 usnic_fwd_set_mac(us_ibdev->ufdev, netdev->dev_addr); 186 usnic_fwd_set_mac(us_ibdev->ufdev, netdev->dev_addr);
@@ -193,19 +195,19 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
193 case NETDEV_CHANGEMTU: 195 case NETDEV_CHANGEMTU:
194 if (us_ibdev->ufdev->mtu != netdev->mtu) { 196 if (us_ibdev->ufdev->mtu != netdev->mtu) {
195 usnic_info("MTU Change on %s old: %u new: %u\n", 197 usnic_info("MTU Change on %s old: %u new: %u\n",
196 us_ibdev->ib_dev.name, 198 dev_name(&us_ibdev->ib_dev.dev),
197 us_ibdev->ufdev->mtu, netdev->mtu); 199 us_ibdev->ufdev->mtu, netdev->mtu);
198 usnic_fwd_set_mtu(us_ibdev->ufdev, netdev->mtu); 200 usnic_fwd_set_mtu(us_ibdev->ufdev, netdev->mtu);
199 usnic_ib_qp_grp_modify_active_to_err(us_ibdev); 201 usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
200 } else { 202 } else {
201 usnic_dbg("Ignoring MTU change on %s\n", 203 usnic_dbg("Ignoring MTU change on %s\n",
202 us_ibdev->ib_dev.name); 204 dev_name(&us_ibdev->ib_dev.dev));
203 } 205 }
204 break; 206 break;
205 default: 207 default:
206 usnic_dbg("Ignoring event %s on %s", 208 usnic_dbg("Ignoring event %s on %s",
207 netdev_cmd_to_name(event), 209 netdev_cmd_to_name(event),
208 us_ibdev->ib_dev.name); 210 dev_name(&us_ibdev->ib_dev.dev));
209 } 211 }
210 mutex_unlock(&us_ibdev->usdev_lock); 212 mutex_unlock(&us_ibdev->usdev_lock);
211} 213}
@@ -267,7 +269,7 @@ static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev,
267 default: 269 default:
268 usnic_info("Ignoring event %s on %s", 270 usnic_info("Ignoring event %s on %s",
269 netdev_cmd_to_name(event), 271 netdev_cmd_to_name(event),
270 us_ibdev->ib_dev.name); 272 dev_name(&us_ibdev->ib_dev.dev));
271 } 273 }
272 mutex_unlock(&us_ibdev->usdev_lock); 274 mutex_unlock(&us_ibdev->usdev_lock);
273 275
@@ -364,7 +366,6 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
364 us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS; 366 us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS;
365 us_ibdev->ib_dev.dev.parent = &dev->dev; 367 us_ibdev->ib_dev.dev.parent = &dev->dev;
366 us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION; 368 us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION;
367 strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX);
368 369
369 us_ibdev->ib_dev.uverbs_cmd_mask = 370 us_ibdev->ib_dev.uverbs_cmd_mask =
370 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 371 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -416,7 +417,9 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
416 417
417 418
418 us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC; 419 us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC;
419 if (ib_register_device(&us_ibdev->ib_dev, NULL)) 420 rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group);
421
422 if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d", NULL))
420 goto err_fwd_dealloc; 423 goto err_fwd_dealloc;
421 424
422 usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu); 425 usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu);
@@ -437,9 +440,9 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
437 kref_init(&us_ibdev->vf_cnt); 440 kref_init(&us_ibdev->vf_cnt);
438 441
439 usnic_info("Added ibdev: %s netdev: %s with mac %pM Link: %u MTU: %u\n", 442 usnic_info("Added ibdev: %s netdev: %s with mac %pM Link: %u MTU: %u\n",
440 us_ibdev->ib_dev.name, netdev_name(us_ibdev->netdev), 443 dev_name(&us_ibdev->ib_dev.dev),
441 us_ibdev->ufdev->mac, us_ibdev->ufdev->link_up, 444 netdev_name(us_ibdev->netdev), us_ibdev->ufdev->mac,
442 us_ibdev->ufdev->mtu); 445 us_ibdev->ufdev->link_up, us_ibdev->ufdev->mtu);
443 return us_ibdev; 446 return us_ibdev;
444 447
445err_fwd_dealloc: 448err_fwd_dealloc:
@@ -452,7 +455,7 @@ err_dealloc:
452 455
453static void usnic_ib_device_remove(struct usnic_ib_dev *us_ibdev) 456static void usnic_ib_device_remove(struct usnic_ib_dev *us_ibdev)
454{ 457{
455 usnic_info("Unregistering %s\n", us_ibdev->ib_dev.name); 458 usnic_info("Unregistering %s\n", dev_name(&us_ibdev->ib_dev.dev));
456 usnic_ib_sysfs_unregister_usdev(us_ibdev); 459 usnic_ib_sysfs_unregister_usdev(us_ibdev);
457 usnic_fwd_dev_free(us_ibdev->ufdev); 460 usnic_fwd_dev_free(us_ibdev->ufdev);
458 ib_unregister_device(&us_ibdev->ib_dev); 461 ib_unregister_device(&us_ibdev->ib_dev);
@@ -591,7 +594,7 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev,
591 mutex_unlock(&pf->usdev_lock); 594 mutex_unlock(&pf->usdev_lock);
592 595
593 usnic_info("Registering usnic VF %s into PF %s\n", pci_name(pdev), 596 usnic_info("Registering usnic VF %s into PF %s\n", pci_name(pdev),
594 pf->ib_dev.name); 597 dev_name(&pf->ib_dev.dev));
595 usnic_ib_log_vf(vf); 598 usnic_ib_log_vf(vf);
596 return 0; 599 return 0;
597 600
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
index 4210ca14014d..a7e4b2ccfaf8 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -46,9 +46,8 @@
46#include "usnic_ib_sysfs.h" 46#include "usnic_ib_sysfs.h"
47#include "usnic_log.h" 47#include "usnic_log.h"
48 48
49static ssize_t usnic_ib_show_board(struct device *device, 49static ssize_t board_id_show(struct device *device,
50 struct device_attribute *attr, 50 struct device_attribute *attr, char *buf)
51 char *buf)
52{ 51{
53 struct usnic_ib_dev *us_ibdev = 52 struct usnic_ib_dev *us_ibdev =
54 container_of(device, struct usnic_ib_dev, ib_dev.dev); 53 container_of(device, struct usnic_ib_dev, ib_dev.dev);
@@ -60,13 +59,13 @@ static ssize_t usnic_ib_show_board(struct device *device,
60 59
61 return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id); 60 return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id);
62} 61}
62static DEVICE_ATTR_RO(board_id);
63 63
64/* 64/*
65 * Report the configuration for this PF 65 * Report the configuration for this PF
66 */ 66 */
67static ssize_t 67static ssize_t
68usnic_ib_show_config(struct device *device, struct device_attribute *attr, 68config_show(struct device *device, struct device_attribute *attr, char *buf)
69 char *buf)
70{ 69{
71 struct usnic_ib_dev *us_ibdev; 70 struct usnic_ib_dev *us_ibdev;
72 char *ptr; 71 char *ptr;
@@ -94,7 +93,7 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr,
94 93
95 n = scnprintf(ptr, left, 94 n = scnprintf(ptr, left,
96 "%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:", 95 "%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:",
97 us_ibdev->ib_dev.name, 96 dev_name(&us_ibdev->ib_dev.dev),
98 busname, 97 busname,
99 PCI_SLOT(us_ibdev->pdev->devfn), 98 PCI_SLOT(us_ibdev->pdev->devfn),
100 PCI_FUNC(us_ibdev->pdev->devfn), 99 PCI_FUNC(us_ibdev->pdev->devfn),
@@ -119,17 +118,17 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr,
119 UPDATE_PTR_LEFT(n, ptr, left); 118 UPDATE_PTR_LEFT(n, ptr, left);
120 } else { 119 } else {
121 n = scnprintf(ptr, left, "%s: no VFs\n", 120 n = scnprintf(ptr, left, "%s: no VFs\n",
122 us_ibdev->ib_dev.name); 121 dev_name(&us_ibdev->ib_dev.dev));
123 UPDATE_PTR_LEFT(n, ptr, left); 122 UPDATE_PTR_LEFT(n, ptr, left);
124 } 123 }
125 mutex_unlock(&us_ibdev->usdev_lock); 124 mutex_unlock(&us_ibdev->usdev_lock);
126 125
127 return ptr - buf; 126 return ptr - buf;
128} 127}
128static DEVICE_ATTR_RO(config);
129 129
130static ssize_t 130static ssize_t
131usnic_ib_show_iface(struct device *device, struct device_attribute *attr, 131iface_show(struct device *device, struct device_attribute *attr, char *buf)
132 char *buf)
133{ 132{
134 struct usnic_ib_dev *us_ibdev; 133 struct usnic_ib_dev *us_ibdev;
135 134
@@ -138,10 +137,10 @@ usnic_ib_show_iface(struct device *device, struct device_attribute *attr,
138 return scnprintf(buf, PAGE_SIZE, "%s\n", 137 return scnprintf(buf, PAGE_SIZE, "%s\n",
139 netdev_name(us_ibdev->netdev)); 138 netdev_name(us_ibdev->netdev));
140} 139}
140static DEVICE_ATTR_RO(iface);
141 141
142static ssize_t 142static ssize_t
143usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr, 143max_vf_show(struct device *device, struct device_attribute *attr, char *buf)
144 char *buf)
145{ 144{
146 struct usnic_ib_dev *us_ibdev; 145 struct usnic_ib_dev *us_ibdev;
147 146
@@ -150,10 +149,10 @@ usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr,
150 return scnprintf(buf, PAGE_SIZE, "%u\n", 149 return scnprintf(buf, PAGE_SIZE, "%u\n",
151 kref_read(&us_ibdev->vf_cnt)); 150 kref_read(&us_ibdev->vf_cnt));
152} 151}
152static DEVICE_ATTR_RO(max_vf);
153 153
154static ssize_t 154static ssize_t
155usnic_ib_show_qp_per_vf(struct device *device, struct device_attribute *attr, 155qp_per_vf_show(struct device *device, struct device_attribute *attr, char *buf)
156 char *buf)
157{ 156{
158 struct usnic_ib_dev *us_ibdev; 157 struct usnic_ib_dev *us_ibdev;
159 int qp_per_vf; 158 int qp_per_vf;
@@ -165,10 +164,10 @@ usnic_ib_show_qp_per_vf(struct device *device, struct device_attribute *attr,
165 return scnprintf(buf, PAGE_SIZE, 164 return scnprintf(buf, PAGE_SIZE,
166 "%d\n", qp_per_vf); 165 "%d\n", qp_per_vf);
167} 166}
167static DEVICE_ATTR_RO(qp_per_vf);
168 168
169static ssize_t 169static ssize_t
170usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr, 170cq_per_vf_show(struct device *device, struct device_attribute *attr, char *buf)
171 char *buf)
172{ 171{
173 struct usnic_ib_dev *us_ibdev; 172 struct usnic_ib_dev *us_ibdev;
174 173
@@ -177,21 +176,20 @@ usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr,
177 return scnprintf(buf, PAGE_SIZE, "%d\n", 176 return scnprintf(buf, PAGE_SIZE, "%d\n",
178 us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); 177 us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]);
179} 178}
179static DEVICE_ATTR_RO(cq_per_vf);
180
181static struct attribute *usnic_class_attributes[] = {
182 &dev_attr_board_id.attr,
183 &dev_attr_config.attr,
184 &dev_attr_iface.attr,
185 &dev_attr_max_vf.attr,
186 &dev_attr_qp_per_vf.attr,
187 &dev_attr_cq_per_vf.attr,
188 NULL
189};
180 190
181static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL); 191const struct attribute_group usnic_attr_group = {
182static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL); 192 .attrs = usnic_class_attributes,
183static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL);
184static DEVICE_ATTR(max_vf, S_IRUGO, usnic_ib_show_max_vf, NULL);
185static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL);
186static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL);
187
188static struct device_attribute *usnic_class_attributes[] = {
189 &dev_attr_board_id,
190 &dev_attr_config,
191 &dev_attr_iface,
192 &dev_attr_max_vf,
193 &dev_attr_qp_per_vf,
194 &dev_attr_cq_per_vf,
195}; 193};
196 194
197struct qpn_attribute { 195struct qpn_attribute {
@@ -278,18 +276,6 @@ static struct kobj_type usnic_ib_qpn_type = {
278 276
279int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev) 277int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev)
280{ 278{
281 int i;
282 int err;
283 for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
284 err = device_create_file(&us_ibdev->ib_dev.dev,
285 usnic_class_attributes[i]);
286 if (err) {
287 usnic_err("Failed to create device file %d for %s eith err %d",
288 i, us_ibdev->ib_dev.name, err);
289 return -EINVAL;
290 }
291 }
292
293 /* create kernel object for looking at individual QPs */ 279 /* create kernel object for looking at individual QPs */
294 kobject_get(&us_ibdev->ib_dev.dev.kobj); 280 kobject_get(&us_ibdev->ib_dev.dev.kobj);
295 us_ibdev->qpn_kobj = kobject_create_and_add("qpn", 281 us_ibdev->qpn_kobj = kobject_create_and_add("qpn",
@@ -304,12 +290,6 @@ int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev)
304 290
305void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev) 291void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev)
306{ 292{
307 int i;
308 for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
309 device_remove_file(&us_ibdev->ib_dev.dev,
310 usnic_class_attributes[i]);
311 }
312
313 kobject_put(us_ibdev->qpn_kobj); 293 kobject_put(us_ibdev->qpn_kobj);
314} 294}
315 295
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
index 3d98e16cfeaf..b1f064cec850 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
@@ -41,4 +41,6 @@ void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev);
41void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp); 41void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp);
42void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp); 42void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp);
43 43
44extern const struct attribute_group usnic_attr_group;
45
44#endif /* !USNIC_IB_SYSFS_H_ */ 46#endif /* !USNIC_IB_SYSFS_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 9973ac893635..0b91ff36768a 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -159,7 +159,8 @@ static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
159 159
160 err = ib_copy_to_udata(udata, &resp, sizeof(resp)); 160 err = ib_copy_to_udata(udata, &resp, sizeof(resp));
161 if (err) { 161 if (err) {
162 usnic_err("Failed to copy udata for %s", us_ibdev->ib_dev.name); 162 usnic_err("Failed to copy udata for %s",
163 dev_name(&us_ibdev->ib_dev.dev));
163 return err; 164 return err;
164 } 165 }
165 166
@@ -197,7 +198,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
197 vnic = vf->vnic; 198 vnic = vf->vnic;
198 if (!usnic_vnic_check_room(vnic, res_spec)) { 199 if (!usnic_vnic_check_room(vnic, res_spec)) {
199 usnic_dbg("Found used vnic %s from %s\n", 200 usnic_dbg("Found used vnic %s from %s\n",
200 us_ibdev->ib_dev.name, 201 dev_name(&us_ibdev->ib_dev.dev),
201 pci_name(usnic_vnic_get_pdev( 202 pci_name(usnic_vnic_get_pdev(
202 vnic))); 203 vnic)));
203 qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, 204 qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev,
@@ -230,7 +231,8 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
230 spin_unlock(&vf->lock); 231 spin_unlock(&vf->lock);
231 } 232 }
232 233
233 usnic_info("No free qp grp found on %s\n", us_ibdev->ib_dev.name); 234 usnic_info("No free qp grp found on %s\n",
235 dev_name(&us_ibdev->ib_dev.dev));
234 return ERR_PTR(-ENOMEM); 236 return ERR_PTR(-ENOMEM);
235 237
236qp_grp_check: 238qp_grp_check:
@@ -471,7 +473,7 @@ struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev,
471 } 473 }
472 474
473 usnic_info("domain 0x%p allocated for context 0x%p and device %s\n", 475 usnic_info("domain 0x%p allocated for context 0x%p and device %s\n",
474 pd, context, ibdev->name); 476 pd, context, dev_name(&ibdev->dev));
475 return &pd->ibpd; 477 return &pd->ibpd;
476} 478}
477 479
@@ -508,20 +510,20 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
508 err = ib_copy_from_udata(&cmd, udata, sizeof(cmd)); 510 err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
509 if (err) { 511 if (err) {
510 usnic_err("%s: cannot copy udata for create_qp\n", 512 usnic_err("%s: cannot copy udata for create_qp\n",
511 us_ibdev->ib_dev.name); 513 dev_name(&us_ibdev->ib_dev.dev));
512 return ERR_PTR(-EINVAL); 514 return ERR_PTR(-EINVAL);
513 } 515 }
514 516
515 err = create_qp_validate_user_data(cmd); 517 err = create_qp_validate_user_data(cmd);
516 if (err) { 518 if (err) {
517 usnic_err("%s: Failed to validate user data\n", 519 usnic_err("%s: Failed to validate user data\n",
518 us_ibdev->ib_dev.name); 520 dev_name(&us_ibdev->ib_dev.dev));
519 return ERR_PTR(-EINVAL); 521 return ERR_PTR(-EINVAL);
520 } 522 }
521 523
522 if (init_attr->qp_type != IB_QPT_UD) { 524 if (init_attr->qp_type != IB_QPT_UD) {
523 usnic_err("%s asked to make a non-UD QP: %d\n", 525 usnic_err("%s asked to make a non-UD QP: %d\n",
524 us_ibdev->ib_dev.name, init_attr->qp_type); 526 dev_name(&us_ibdev->ib_dev.dev), init_attr->qp_type);
525 return ERR_PTR(-EINVAL); 527 return ERR_PTR(-EINVAL);
526 } 528 }
527 529
diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c
index e0a95538c364..82dd810bc000 100644
--- a/drivers/infiniband/hw/usnic/usnic_transport.c
+++ b/drivers/infiniband/hw/usnic/usnic_transport.c
@@ -121,7 +121,7 @@ void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num)
121 if (type == USNIC_TRANSPORT_ROCE_CUSTOM) { 121 if (type == USNIC_TRANSPORT_ROCE_CUSTOM) {
122 spin_lock(&roce_bitmap_lock); 122 spin_lock(&roce_bitmap_lock);
123 if (!port_num) { 123 if (!port_num) {
124 usnic_err("Unreserved unvalid port num 0 for %s\n", 124 usnic_err("Unreserved invalid port num 0 for %s\n",
125 usnic_transport_to_str(type)); 125 usnic_transport_to_str(type));
126 goto out_roce_custom; 126 goto out_roce_custom;
127 } 127 }
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 9dd39daa602b..49275a548751 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -54,18 +54,6 @@ static struct workqueue_struct *usnic_uiom_wq;
54 ((void *) &((struct usnic_uiom_chunk *) 0)->page_list[1] - \ 54 ((void *) &((struct usnic_uiom_chunk *) 0)->page_list[1] - \
55 (void *) &((struct usnic_uiom_chunk *) 0)->page_list[0])) 55 (void *) &((struct usnic_uiom_chunk *) 0)->page_list[0]))
56 56
57static void usnic_uiom_reg_account(struct work_struct *work)
58{
59 struct usnic_uiom_reg *umem = container_of(work,
60 struct usnic_uiom_reg, work);
61
62 down_write(&umem->mm->mmap_sem);
63 umem->mm->locked_vm -= umem->diff;
64 up_write(&umem->mm->mmap_sem);
65 mmput(umem->mm);
66 kfree(umem);
67}
68
69static int usnic_uiom_dma_fault(struct iommu_domain *domain, 57static int usnic_uiom_dma_fault(struct iommu_domain *domain,
70 struct device *dev, 58 struct device *dev,
71 unsigned long iova, int flags, 59 unsigned long iova, int flags,
@@ -99,8 +87,9 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
99} 87}
100 88
101static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, 89static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
102 int dmasync, struct list_head *chunk_list) 90 int dmasync, struct usnic_uiom_reg *uiomr)
103{ 91{
92 struct list_head *chunk_list = &uiomr->chunk_list;
104 struct page **page_list; 93 struct page **page_list;
105 struct scatterlist *sg; 94 struct scatterlist *sg;
106 struct usnic_uiom_chunk *chunk; 95 struct usnic_uiom_chunk *chunk;
@@ -114,6 +103,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
114 int flags; 103 int flags;
115 dma_addr_t pa; 104 dma_addr_t pa;
116 unsigned int gup_flags; 105 unsigned int gup_flags;
106 struct mm_struct *mm;
117 107
118 /* 108 /*
119 * If the combination of the addr and size requested for this memory 109 * If the combination of the addr and size requested for this memory
@@ -136,7 +126,8 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
136 126
137 npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT; 127 npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT;
138 128
139 down_write(&current->mm->mmap_sem); 129 uiomr->owning_mm = mm = current->mm;
130 down_write(&mm->mmap_sem);
140 131
141 locked = npages + current->mm->pinned_vm; 132 locked = npages + current->mm->pinned_vm;
142 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 133 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
@@ -196,10 +187,12 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
196out: 187out:
197 if (ret < 0) 188 if (ret < 0)
198 usnic_uiom_put_pages(chunk_list, 0); 189 usnic_uiom_put_pages(chunk_list, 0);
199 else 190 else {
200 current->mm->pinned_vm = locked; 191 mm->pinned_vm = locked;
192 mmgrab(uiomr->owning_mm);
193 }
201 194
202 up_write(&current->mm->mmap_sem); 195 up_write(&mm->mmap_sem);
203 free_page((unsigned long) page_list); 196 free_page((unsigned long) page_list);
204 return ret; 197 return ret;
205} 198}
@@ -379,7 +372,7 @@ struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
379 uiomr->pd = pd; 372 uiomr->pd = pd;
380 373
381 err = usnic_uiom_get_pages(addr, size, writable, dmasync, 374 err = usnic_uiom_get_pages(addr, size, writable, dmasync,
382 &uiomr->chunk_list); 375 uiomr);
383 if (err) { 376 if (err) {
384 usnic_err("Failed get_pages vpn [0x%lx,0x%lx] err %d\n", 377 usnic_err("Failed get_pages vpn [0x%lx,0x%lx] err %d\n",
385 vpn_start, vpn_last, err); 378 vpn_start, vpn_last, err);
@@ -426,29 +419,39 @@ out_put_intervals:
426out_put_pages: 419out_put_pages:
427 usnic_uiom_put_pages(&uiomr->chunk_list, 0); 420 usnic_uiom_put_pages(&uiomr->chunk_list, 0);
428 spin_unlock(&pd->lock); 421 spin_unlock(&pd->lock);
422 mmdrop(uiomr->owning_mm);
429out_free_uiomr: 423out_free_uiomr:
430 kfree(uiomr); 424 kfree(uiomr);
431 return ERR_PTR(err); 425 return ERR_PTR(err);
432} 426}
433 427
434void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, 428static void __usnic_uiom_release_tail(struct usnic_uiom_reg *uiomr)
435 struct ib_ucontext *ucontext)
436{ 429{
437 struct task_struct *task; 430 mmdrop(uiomr->owning_mm);
438 struct mm_struct *mm; 431 kfree(uiomr);
439 unsigned long diff; 432}
440 433
441 __usnic_uiom_reg_release(uiomr->pd, uiomr, 1); 434static inline size_t usnic_uiom_num_pages(struct usnic_uiom_reg *uiomr)
435{
436 return PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
437}
442 438
443 task = get_pid_task(ucontext->tgid, PIDTYPE_PID); 439static void usnic_uiom_release_defer(struct work_struct *work)
444 if (!task) 440{
445 goto out; 441 struct usnic_uiom_reg *uiomr =
446 mm = get_task_mm(task); 442 container_of(work, struct usnic_uiom_reg, work);
447 put_task_struct(task);
448 if (!mm)
449 goto out;
450 443
451 diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; 444 down_write(&uiomr->owning_mm->mmap_sem);
445 uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr);
446 up_write(&uiomr->owning_mm->mmap_sem);
447
448 __usnic_uiom_release_tail(uiomr);
449}
450
451void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
452 struct ib_ucontext *context)
453{
454 __usnic_uiom_reg_release(uiomr->pd, uiomr, 1);
452 455
453 /* 456 /*
454 * We may be called with the mm's mmap_sem already held. This 457 * We may be called with the mm's mmap_sem already held. This
@@ -456,25 +459,21 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
456 * the last reference to our file and calls our release 459 * the last reference to our file and calls our release
457 * method. If there are memory regions to destroy, we'll end 460 * method. If there are memory regions to destroy, we'll end
458 * up here and not be able to take the mmap_sem. In that case 461 * up here and not be able to take the mmap_sem. In that case
459 * we defer the vm_locked accounting to the system workqueue. 462 * we defer the vm_locked accounting to a workqueue.
460 */ 463 */
461 if (ucontext->closing) { 464 if (context->closing) {
462 if (!down_write_trylock(&mm->mmap_sem)) { 465 if (!down_write_trylock(&uiomr->owning_mm->mmap_sem)) {
463 INIT_WORK(&uiomr->work, usnic_uiom_reg_account); 466 INIT_WORK(&uiomr->work, usnic_uiom_release_defer);
464 uiomr->mm = mm;
465 uiomr->diff = diff;
466
467 queue_work(usnic_uiom_wq, &uiomr->work); 467 queue_work(usnic_uiom_wq, &uiomr->work);
468 return; 468 return;
469 } 469 }
470 } else 470 } else {
471 down_write(&mm->mmap_sem); 471 down_write(&uiomr->owning_mm->mmap_sem);
472 }
473 uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr);
474 up_write(&uiomr->owning_mm->mmap_sem);
472 475
473 mm->pinned_vm -= diff; 476 __usnic_uiom_release_tail(uiomr);
474 up_write(&mm->mmap_sem);
475 mmput(mm);
476out:
477 kfree(uiomr);
478} 477}
479 478
480struct usnic_uiom_pd *usnic_uiom_alloc_pd(void) 479struct usnic_uiom_pd *usnic_uiom_alloc_pd(void)
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h
index 8c096acff123..b86a9731071b 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.h
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.h
@@ -71,8 +71,7 @@ struct usnic_uiom_reg {
71 int writable; 71 int writable;
72 struct list_head chunk_list; 72 struct list_head chunk_list;
73 struct work_struct work; 73 struct work_struct work;
74 struct mm_struct *mm; 74 struct mm_struct *owning_mm;
75 unsigned long diff;
76}; 75};
77 76
78struct usnic_uiom_chunk { 77struct usnic_uiom_chunk {
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index a5719899f49a..398443f43dc3 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -65,32 +65,36 @@ static struct workqueue_struct *event_wq;
65static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context); 65static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context);
66static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context); 66static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context);
67 67
68static ssize_t show_hca(struct device *device, struct device_attribute *attr, 68static ssize_t hca_type_show(struct device *device,
69 char *buf) 69 struct device_attribute *attr, char *buf)
70{ 70{
71 return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION); 71 return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION);
72} 72}
73static DEVICE_ATTR_RO(hca_type);
73 74
74static ssize_t show_rev(struct device *device, struct device_attribute *attr, 75static ssize_t hw_rev_show(struct device *device,
75 char *buf) 76 struct device_attribute *attr, char *buf)
76{ 77{
77 return sprintf(buf, "%d\n", PVRDMA_REV_ID); 78 return sprintf(buf, "%d\n", PVRDMA_REV_ID);
78} 79}
80static DEVICE_ATTR_RO(hw_rev);
79 81
80static ssize_t show_board(struct device *device, struct device_attribute *attr, 82static ssize_t board_id_show(struct device *device,
81 char *buf) 83 struct device_attribute *attr, char *buf)
82{ 84{
83 return sprintf(buf, "%d\n", PVRDMA_BOARD_ID); 85 return sprintf(buf, "%d\n", PVRDMA_BOARD_ID);
84} 86}
87static DEVICE_ATTR_RO(board_id);
85 88
86static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 89static struct attribute *pvrdma_class_attributes[] = {
87static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 90 &dev_attr_hw_rev.attr,
88static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 91 &dev_attr_hca_type.attr,
92 &dev_attr_board_id.attr,
93 NULL,
94};
89 95
90static struct device_attribute *pvrdma_class_attributes[] = { 96static const struct attribute_group pvrdma_attr_group = {
91 &dev_attr_hw_rev, 97 .attrs = pvrdma_class_attributes,
92 &dev_attr_hca_type,
93 &dev_attr_board_id
94}; 98};
95 99
96static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str) 100static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str)
@@ -160,9 +164,7 @@ static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev,
160static int pvrdma_register_device(struct pvrdma_dev *dev) 164static int pvrdma_register_device(struct pvrdma_dev *dev)
161{ 165{
162 int ret = -1; 166 int ret = -1;
163 int i = 0;
164 167
165 strlcpy(dev->ib_dev.name, "vmw_pvrdma%d", IB_DEVICE_NAME_MAX);
166 dev->ib_dev.node_guid = dev->dsr->caps.node_guid; 168 dev->ib_dev.node_guid = dev->dsr->caps.node_guid;
167 dev->sys_image_guid = dev->dsr->caps.sys_image_guid; 169 dev->sys_image_guid = dev->dsr->caps.sys_image_guid;
168 dev->flags = 0; 170 dev->flags = 0;
@@ -266,24 +268,16 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
266 } 268 }
267 dev->ib_dev.driver_id = RDMA_DRIVER_VMW_PVRDMA; 269 dev->ib_dev.driver_id = RDMA_DRIVER_VMW_PVRDMA;
268 spin_lock_init(&dev->srq_tbl_lock); 270 spin_lock_init(&dev->srq_tbl_lock);
271 rdma_set_device_sysfs_group(&dev->ib_dev, &pvrdma_attr_group);
269 272
270 ret = ib_register_device(&dev->ib_dev, NULL); 273 ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d", NULL);
271 if (ret) 274 if (ret)
272 goto err_srq_free; 275 goto err_srq_free;
273 276
274 for (i = 0; i < ARRAY_SIZE(pvrdma_class_attributes); ++i) {
275 ret = device_create_file(&dev->ib_dev.dev,
276 pvrdma_class_attributes[i]);
277 if (ret)
278 goto err_class;
279 }
280
281 dev->ib_active = true; 277 dev->ib_active = true;
282 278
283 return 0; 279 return 0;
284 280
285err_class:
286 ib_unregister_device(&dev->ib_dev);
287err_srq_free: 281err_srq_free:
288 kfree(dev->srq_tbl); 282 kfree(dev->srq_tbl);
289err_qp_free: 283err_qp_free:
@@ -735,7 +729,7 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
735 729
736 default: 730 default:
737 dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", 731 dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n",
738 event, dev->ib_dev.name); 732 event, dev_name(&dev->ib_dev.dev));
739 break; 733 break;
740 } 734 }
741} 735}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 60083c0363a5..cf22f57a9f0d 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -499,7 +499,7 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
499 next_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : cur_state; 499 next_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : cur_state;
500 500
501 if (!ib_modify_qp_is_ok(cur_state, next_state, ibqp->qp_type, 501 if (!ib_modify_qp_is_ok(cur_state, next_state, ibqp->qp_type,
502 attr_mask, IB_LINK_LAYER_ETHERNET)) { 502 attr_mask)) {
503 ret = -EINVAL; 503 ret = -EINVAL;
504 goto out; 504 goto out;
505 } 505 }
diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig
index 98e798007f75..7df896a18d38 100644
--- a/drivers/infiniband/sw/rdmavt/Kconfig
+++ b/drivers/infiniband/sw/rdmavt/Kconfig
@@ -1,6 +1,6 @@
1config INFINIBAND_RDMAVT 1config INFINIBAND_RDMAVT
2 tristate "RDMA verbs transport library" 2 tristate "RDMA verbs transport library"
3 depends on 64BIT && ARCH_DMA_ADDR_T_64BIT 3 depends on X86_64 && ARCH_DMA_ADDR_T_64BIT
4 depends on PCI 4 depends on PCI
5 select DMA_VIRT_OPS 5 select DMA_VIRT_OPS
6 ---help--- 6 ---help---
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 5ce403c6cddb..1735deb1a9d4 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -118,6 +118,187 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
118}; 118};
119EXPORT_SYMBOL(ib_rvt_state_ops); 119EXPORT_SYMBOL(ib_rvt_state_ops);
120 120
121/* platform specific: return the last level cache (llc) size, in KiB */
122static int rvt_wss_llc_size(void)
123{
124 /* assume that the boot CPU value is universal for all CPUs */
125 return boot_cpu_data.x86_cache_size;
126}
127
128/* platform specific: cacheless copy */
129static void cacheless_memcpy(void *dst, void *src, size_t n)
130{
131 /*
132 * Use the only available X64 cacheless copy. Add a __user cast
133 * to quiet sparse. The src agument is already in the kernel so
134 * there are no security issues. The extra fault recovery machinery
135 * is not invoked.
136 */
137 __copy_user_nocache(dst, (void __user *)src, n, 0);
138}
139
140void rvt_wss_exit(struct rvt_dev_info *rdi)
141{
142 struct rvt_wss *wss = rdi->wss;
143
144 if (!wss)
145 return;
146
147 /* coded to handle partially initialized and repeat callers */
148 kfree(wss->entries);
149 wss->entries = NULL;
150 kfree(rdi->wss);
151 rdi->wss = NULL;
152}
153
154/**
155 * rvt_wss_init - Init wss data structures
156 *
157 * Return: 0 on success
158 */
159int rvt_wss_init(struct rvt_dev_info *rdi)
160{
161 unsigned int sge_copy_mode = rdi->dparms.sge_copy_mode;
162 unsigned int wss_threshold = rdi->dparms.wss_threshold;
163 unsigned int wss_clean_period = rdi->dparms.wss_clean_period;
164 long llc_size;
165 long llc_bits;
166 long table_size;
167 long table_bits;
168 struct rvt_wss *wss;
169 int node = rdi->dparms.node;
170
171 if (sge_copy_mode != RVT_SGE_COPY_ADAPTIVE) {
172 rdi->wss = NULL;
173 return 0;
174 }
175
176 rdi->wss = kzalloc_node(sizeof(*rdi->wss), GFP_KERNEL, node);
177 if (!rdi->wss)
178 return -ENOMEM;
179 wss = rdi->wss;
180
181 /* check for a valid percent range - default to 80 if none or invalid */
182 if (wss_threshold < 1 || wss_threshold > 100)
183 wss_threshold = 80;
184
185 /* reject a wildly large period */
186 if (wss_clean_period > 1000000)
187 wss_clean_period = 256;
188
189 /* reject a zero period */
190 if (wss_clean_period == 0)
191 wss_clean_period = 1;
192
193 /*
194 * Calculate the table size - the next power of 2 larger than the
195 * LLC size. LLC size is in KiB.
196 */
197 llc_size = rvt_wss_llc_size() * 1024;
198 table_size = roundup_pow_of_two(llc_size);
199
200 /* one bit per page in rounded up table */
201 llc_bits = llc_size / PAGE_SIZE;
202 table_bits = table_size / PAGE_SIZE;
203 wss->pages_mask = table_bits - 1;
204 wss->num_entries = table_bits / BITS_PER_LONG;
205
206 wss->threshold = (llc_bits * wss_threshold) / 100;
207 if (wss->threshold == 0)
208 wss->threshold = 1;
209
210 wss->clean_period = wss_clean_period;
211 atomic_set(&wss->clean_counter, wss_clean_period);
212
213 wss->entries = kcalloc_node(wss->num_entries, sizeof(*wss->entries),
214 GFP_KERNEL, node);
215 if (!wss->entries) {
216 rvt_wss_exit(rdi);
217 return -ENOMEM;
218 }
219
220 return 0;
221}
222
223/*
224 * Advance the clean counter. When the clean period has expired,
225 * clean an entry.
226 *
227 * This is implemented in atomics to avoid locking. Because multiple
228 * variables are involved, it can be racy which can lead to slightly
229 * inaccurate information. Since this is only a heuristic, this is
230 * OK. Any innaccuracies will clean themselves out as the counter
231 * advances. That said, it is unlikely the entry clean operation will
232 * race - the next possible racer will not start until the next clean
233 * period.
234 *
235 * The clean counter is implemented as a decrement to zero. When zero
236 * is reached an entry is cleaned.
237 */
238static void wss_advance_clean_counter(struct rvt_wss *wss)
239{
240 int entry;
241 int weight;
242 unsigned long bits;
243
244 /* become the cleaner if we decrement the counter to zero */
245 if (atomic_dec_and_test(&wss->clean_counter)) {
246 /*
247 * Set, not add, the clean period. This avoids an issue
248 * where the counter could decrement below the clean period.
249 * Doing a set can result in lost decrements, slowing the
250 * clean advance. Since this a heuristic, this possible
251 * slowdown is OK.
252 *
253 * An alternative is to loop, advancing the counter by a
254 * clean period until the result is > 0. However, this could
255 * lead to several threads keeping another in the clean loop.
256 * This could be mitigated by limiting the number of times
257 * we stay in the loop.
258 */
259 atomic_set(&wss->clean_counter, wss->clean_period);
260
261 /*
262 * Uniquely grab the entry to clean and move to next.
263 * The current entry is always the lower bits of
264 * wss.clean_entry. The table size, wss.num_entries,
265 * is always a power-of-2.
266 */
267 entry = (atomic_inc_return(&wss->clean_entry) - 1)
268 & (wss->num_entries - 1);
269
270 /* clear the entry and count the bits */
271 bits = xchg(&wss->entries[entry], 0);
272 weight = hweight64((u64)bits);
273 /* only adjust the contended total count if needed */
274 if (weight)
275 atomic_sub(weight, &wss->total_count);
276 }
277}
278
279/*
280 * Insert the given address into the working set array.
281 */
282static void wss_insert(struct rvt_wss *wss, void *address)
283{
284 u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss->pages_mask;
285 u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */
286 u32 nr = page & (BITS_PER_LONG - 1);
287
288 if (!test_and_set_bit(nr, &wss->entries[entry]))
289 atomic_inc(&wss->total_count);
290
291 wss_advance_clean_counter(wss);
292}
293
294/*
295 * Is the working set larger than the threshold?
296 */
297static inline bool wss_exceeds_threshold(struct rvt_wss *wss)
298{
299 return atomic_read(&wss->total_count) >= wss->threshold;
300}
301
121static void get_map_page(struct rvt_qpn_table *qpt, 302static void get_map_page(struct rvt_qpn_table *qpt,
122 struct rvt_qpn_map *map) 303 struct rvt_qpn_map *map)
123{ 304{
@@ -1164,11 +1345,8 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1164 int lastwqe = 0; 1345 int lastwqe = 0;
1165 int mig = 0; 1346 int mig = 0;
1166 int pmtu = 0; /* for gcc warning only */ 1347 int pmtu = 0; /* for gcc warning only */
1167 enum rdma_link_layer link;
1168 int opa_ah; 1348 int opa_ah;
1169 1349
1170 link = rdma_port_get_link_layer(ibqp->device, qp->port_num);
1171
1172 spin_lock_irq(&qp->r_lock); 1350 spin_lock_irq(&qp->r_lock);
1173 spin_lock(&qp->s_hlock); 1351 spin_lock(&qp->s_hlock);
1174 spin_lock(&qp->s_lock); 1352 spin_lock(&qp->s_lock);
@@ -1179,7 +1357,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1179 opa_ah = rdma_cap_opa_ah(ibqp->device, qp->port_num); 1357 opa_ah = rdma_cap_opa_ah(ibqp->device, qp->port_num);
1180 1358
1181 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, 1359 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
1182 attr_mask, link)) 1360 attr_mask))
1183 goto inval; 1361 goto inval;
1184 1362
1185 if (rdi->driver_f.check_modify_qp && 1363 if (rdi->driver_f.check_modify_qp &&
@@ -1718,7 +1896,7 @@ static inline int rvt_qp_is_avail(
1718 */ 1896 */
1719static int rvt_post_one_wr(struct rvt_qp *qp, 1897static int rvt_post_one_wr(struct rvt_qp *qp,
1720 const struct ib_send_wr *wr, 1898 const struct ib_send_wr *wr,
1721 int *call_send) 1899 bool *call_send)
1722{ 1900{
1723 struct rvt_swqe *wqe; 1901 struct rvt_swqe *wqe;
1724 u32 next; 1902 u32 next;
@@ -1823,15 +2001,11 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
1823 wqe->wr.num_sge = j; 2001 wqe->wr.num_sge = j;
1824 } 2002 }
1825 2003
1826 /* general part of wqe valid - allow for driver checks */ 2004 /*
1827 if (rdi->driver_f.check_send_wqe) { 2005 * Calculate and set SWQE PSN values prior to handing it off
1828 ret = rdi->driver_f.check_send_wqe(qp, wqe); 2006 * to the driver's check routine. This give the driver the
1829 if (ret < 0) 2007 * opportunity to adjust PSN values based on internal checks.
1830 goto bail_inval_free; 2008 */
1831 if (ret)
1832 *call_send = ret;
1833 }
1834
1835 log_pmtu = qp->log_pmtu; 2009 log_pmtu = qp->log_pmtu;
1836 if (qp->ibqp.qp_type != IB_QPT_UC && 2010 if (qp->ibqp.qp_type != IB_QPT_UC &&
1837 qp->ibqp.qp_type != IB_QPT_RC) { 2011 qp->ibqp.qp_type != IB_QPT_RC) {
@@ -1856,8 +2030,18 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
1856 (wqe->length ? 2030 (wqe->length ?
1857 ((wqe->length - 1) >> log_pmtu) : 2031 ((wqe->length - 1) >> log_pmtu) :
1858 0); 2032 0);
1859 qp->s_next_psn = wqe->lpsn + 1;
1860 } 2033 }
2034
2035 /* general part of wqe valid - allow for driver checks */
2036 if (rdi->driver_f.setup_wqe) {
2037 ret = rdi->driver_f.setup_wqe(qp, wqe, call_send);
2038 if (ret < 0)
2039 goto bail_inval_free_ref;
2040 }
2041
2042 if (!(rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL))
2043 qp->s_next_psn = wqe->lpsn + 1;
2044
1861 if (unlikely(reserved_op)) { 2045 if (unlikely(reserved_op)) {
1862 wqe->wr.send_flags |= RVT_SEND_RESERVE_USED; 2046 wqe->wr.send_flags |= RVT_SEND_RESERVE_USED;
1863 rvt_qp_wqe_reserve(qp, wqe); 2047 rvt_qp_wqe_reserve(qp, wqe);
@@ -1871,6 +2055,10 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
1871 2055
1872 return 0; 2056 return 0;
1873 2057
2058bail_inval_free_ref:
2059 if (qp->ibqp.qp_type != IB_QPT_UC &&
2060 qp->ibqp.qp_type != IB_QPT_RC)
2061 atomic_dec(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
1874bail_inval_free: 2062bail_inval_free:
1875 /* release mr holds */ 2063 /* release mr holds */
1876 while (j) { 2064 while (j) {
@@ -1897,7 +2085,7 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
1897 struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); 2085 struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1898 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); 2086 struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1899 unsigned long flags = 0; 2087 unsigned long flags = 0;
1900 int call_send; 2088 bool call_send;
1901 unsigned nreq = 0; 2089 unsigned nreq = 0;
1902 int err = 0; 2090 int err = 0;
1903 2091
@@ -1930,7 +2118,11 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
1930bail: 2118bail:
1931 spin_unlock_irqrestore(&qp->s_hlock, flags); 2119 spin_unlock_irqrestore(&qp->s_hlock, flags);
1932 if (nreq) { 2120 if (nreq) {
1933 if (call_send) 2121 /*
2122 * Only call do_send if there is exactly one packet, and the
2123 * driver said it was ok.
2124 */
2125 if (nreq == 1 && call_send)
1934 rdi->driver_f.do_send(qp); 2126 rdi->driver_f.do_send(qp);
1935 else 2127 else
1936 rdi->driver_f.schedule_send_no_lock(qp); 2128 rdi->driver_f.schedule_send_no_lock(qp);
@@ -2465,3 +2657,454 @@ void rvt_qp_iter(struct rvt_dev_info *rdi,
2465 rcu_read_unlock(); 2657 rcu_read_unlock();
2466} 2658}
2467EXPORT_SYMBOL(rvt_qp_iter); 2659EXPORT_SYMBOL(rvt_qp_iter);
2660
2661/*
2662 * This should be called with s_lock held.
2663 */
2664void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
2665 enum ib_wc_status status)
2666{
2667 u32 old_last, last;
2668 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
2669
2670 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
2671 return;
2672
2673 last = qp->s_last;
2674 old_last = last;
2675 trace_rvt_qp_send_completion(qp, wqe, last);
2676 if (++last >= qp->s_size)
2677 last = 0;
2678 trace_rvt_qp_send_completion(qp, wqe, last);
2679 qp->s_last = last;
2680 /* See post_send() */
2681 barrier();
2682 rvt_put_swqe(wqe);
2683 if (qp->ibqp.qp_type == IB_QPT_UD ||
2684 qp->ibqp.qp_type == IB_QPT_SMI ||
2685 qp->ibqp.qp_type == IB_QPT_GSI)
2686 atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
2687
2688 rvt_qp_swqe_complete(qp,
2689 wqe,
2690 rdi->wc_opcode[wqe->wr.opcode],
2691 status);
2692
2693 if (qp->s_acked == old_last)
2694 qp->s_acked = last;
2695 if (qp->s_cur == old_last)
2696 qp->s_cur = last;
2697 if (qp->s_tail == old_last)
2698 qp->s_tail = last;
2699 if (qp->state == IB_QPS_SQD && last == qp->s_cur)
2700 qp->s_draining = 0;
2701}
2702EXPORT_SYMBOL(rvt_send_complete);
2703
2704/**
2705 * rvt_copy_sge - copy data to SGE memory
2706 * @qp: associated QP
2707 * @ss: the SGE state
2708 * @data: the data to copy
2709 * @length: the length of the data
2710 * @release: boolean to release MR
2711 * @copy_last: do a separate copy of the last 8 bytes
2712 */
2713void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss,
2714 void *data, u32 length,
2715 bool release, bool copy_last)
2716{
2717 struct rvt_sge *sge = &ss->sge;
2718 int i;
2719 bool in_last = false;
2720 bool cacheless_copy = false;
2721 struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
2722 struct rvt_wss *wss = rdi->wss;
2723 unsigned int sge_copy_mode = rdi->dparms.sge_copy_mode;
2724
2725 if (sge_copy_mode == RVT_SGE_COPY_CACHELESS) {
2726 cacheless_copy = length >= PAGE_SIZE;
2727 } else if (sge_copy_mode == RVT_SGE_COPY_ADAPTIVE) {
2728 if (length >= PAGE_SIZE) {
2729 /*
2730 * NOTE: this *assumes*:
2731 * o The first vaddr is the dest.
2732 * o If multiple pages, then vaddr is sequential.
2733 */
2734 wss_insert(wss, sge->vaddr);
2735 if (length >= (2 * PAGE_SIZE))
2736 wss_insert(wss, (sge->vaddr + PAGE_SIZE));
2737
2738 cacheless_copy = wss_exceeds_threshold(wss);
2739 } else {
2740 wss_advance_clean_counter(wss);
2741 }
2742 }
2743
2744 if (copy_last) {
2745 if (length > 8) {
2746 length -= 8;
2747 } else {
2748 copy_last = false;
2749 in_last = true;
2750 }
2751 }
2752
2753again:
2754 while (length) {
2755 u32 len = rvt_get_sge_length(sge, length);
2756
2757 WARN_ON_ONCE(len == 0);
2758 if (unlikely(in_last)) {
2759 /* enforce byte transfer ordering */
2760 for (i = 0; i < len; i++)
2761 ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
2762 } else if (cacheless_copy) {
2763 cacheless_memcpy(sge->vaddr, data, len);
2764 } else {
2765 memcpy(sge->vaddr, data, len);
2766 }
2767 rvt_update_sge(ss, len, release);
2768 data += len;
2769 length -= len;
2770 }
2771
2772 if (copy_last) {
2773 copy_last = false;
2774 in_last = true;
2775 length = 8;
2776 goto again;
2777 }
2778}
2779EXPORT_SYMBOL(rvt_copy_sge);
2780
2781/**
2782 * ruc_loopback - handle UC and RC loopback requests
2783 * @sqp: the sending QP
2784 *
2785 * This is called from rvt_do_send() to forward a WQE addressed to the same HFI
2786 * Note that although we are single threaded due to the send engine, we still
2787 * have to protect against post_send(). We don't have to worry about
2788 * receive interrupts since this is a connected protocol and all packets
2789 * will pass through here.
2790 */
2791void rvt_ruc_loopback(struct rvt_qp *sqp)
2792{
2793 struct rvt_ibport *rvp = NULL;
2794 struct rvt_dev_info *rdi = ib_to_rvt(sqp->ibqp.device);
2795 struct rvt_qp *qp;
2796 struct rvt_swqe *wqe;
2797 struct rvt_sge *sge;
2798 unsigned long flags;
2799 struct ib_wc wc;
2800 u64 sdata;
2801 atomic64_t *maddr;
2802 enum ib_wc_status send_status;
2803 bool release;
2804 int ret;
2805 bool copy_last = false;
2806 int local_ops = 0;
2807
2808 rcu_read_lock();
2809 rvp = rdi->ports[sqp->port_num - 1];
2810
2811 /*
2812 * Note that we check the responder QP state after
2813 * checking the requester's state.
2814 */
2815
2816 qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), rvp,
2817 sqp->remote_qpn);
2818
2819 spin_lock_irqsave(&sqp->s_lock, flags);
2820
2821 /* Return if we are already busy processing a work request. */
2822 if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
2823 !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
2824 goto unlock;
2825
2826 sqp->s_flags |= RVT_S_BUSY;
2827
2828again:
2829 if (sqp->s_last == READ_ONCE(sqp->s_head))
2830 goto clr_busy;
2831 wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
2832
2833 /* Return if it is not OK to start a new work request. */
2834 if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
2835 if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
2836 goto clr_busy;
2837 /* We are in the error state, flush the work request. */
2838 send_status = IB_WC_WR_FLUSH_ERR;
2839 goto flush_send;
2840 }
2841
2842 /*
2843 * We can rely on the entry not changing without the s_lock
2844 * being held until we update s_last.
2845 * We increment s_cur to indicate s_last is in progress.
2846 */
2847 if (sqp->s_last == sqp->s_cur) {
2848 if (++sqp->s_cur >= sqp->s_size)
2849 sqp->s_cur = 0;
2850 }
2851 spin_unlock_irqrestore(&sqp->s_lock, flags);
2852
2853 if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
2854 qp->ibqp.qp_type != sqp->ibqp.qp_type) {
2855 rvp->n_pkt_drops++;
2856 /*
2857 * For RC, the requester would timeout and retry so
2858 * shortcut the timeouts and just signal too many retries.
2859 */
2860 if (sqp->ibqp.qp_type == IB_QPT_RC)
2861 send_status = IB_WC_RETRY_EXC_ERR;
2862 else
2863 send_status = IB_WC_SUCCESS;
2864 goto serr;
2865 }
2866
2867 memset(&wc, 0, sizeof(wc));
2868 send_status = IB_WC_SUCCESS;
2869
2870 release = true;
2871 sqp->s_sge.sge = wqe->sg_list[0];
2872 sqp->s_sge.sg_list = wqe->sg_list + 1;
2873 sqp->s_sge.num_sge = wqe->wr.num_sge;
2874 sqp->s_len = wqe->length;
2875 switch (wqe->wr.opcode) {
2876 case IB_WR_REG_MR:
2877 goto send_comp;
2878
2879 case IB_WR_LOCAL_INV:
2880 if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
2881 if (rvt_invalidate_rkey(sqp,
2882 wqe->wr.ex.invalidate_rkey))
2883 send_status = IB_WC_LOC_PROT_ERR;
2884 local_ops = 1;
2885 }
2886 goto send_comp;
2887
2888 case IB_WR_SEND_WITH_INV:
2889 if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
2890 wc.wc_flags = IB_WC_WITH_INVALIDATE;
2891 wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
2892 }
2893 goto send;
2894
2895 case IB_WR_SEND_WITH_IMM:
2896 wc.wc_flags = IB_WC_WITH_IMM;
2897 wc.ex.imm_data = wqe->wr.ex.imm_data;
2898 /* FALLTHROUGH */
2899 case IB_WR_SEND:
2900send:
2901 ret = rvt_get_rwqe(qp, false);
2902 if (ret < 0)
2903 goto op_err;
2904 if (!ret)
2905 goto rnr_nak;
2906 break;
2907
2908 case IB_WR_RDMA_WRITE_WITH_IMM:
2909 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
2910 goto inv_err;
2911 wc.wc_flags = IB_WC_WITH_IMM;
2912 wc.ex.imm_data = wqe->wr.ex.imm_data;
2913 ret = rvt_get_rwqe(qp, true);
2914 if (ret < 0)
2915 goto op_err;
2916 if (!ret)
2917 goto rnr_nak;
2918 /* skip copy_last set and qp_access_flags recheck */
2919 goto do_write;
2920 case IB_WR_RDMA_WRITE:
2921 copy_last = rvt_is_user_qp(qp);
2922 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
2923 goto inv_err;
2924do_write:
2925 if (wqe->length == 0)
2926 break;
2927 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
2928 wqe->rdma_wr.remote_addr,
2929 wqe->rdma_wr.rkey,
2930 IB_ACCESS_REMOTE_WRITE)))
2931 goto acc_err;
2932 qp->r_sge.sg_list = NULL;
2933 qp->r_sge.num_sge = 1;
2934 qp->r_sge.total_len = wqe->length;
2935 break;
2936
2937 case IB_WR_RDMA_READ:
2938 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
2939 goto inv_err;
2940 if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
2941 wqe->rdma_wr.remote_addr,
2942 wqe->rdma_wr.rkey,
2943 IB_ACCESS_REMOTE_READ)))
2944 goto acc_err;
2945 release = false;
2946 sqp->s_sge.sg_list = NULL;
2947 sqp->s_sge.num_sge = 1;
2948 qp->r_sge.sge = wqe->sg_list[0];
2949 qp->r_sge.sg_list = wqe->sg_list + 1;
2950 qp->r_sge.num_sge = wqe->wr.num_sge;
2951 qp->r_sge.total_len = wqe->length;
2952 break;
2953
2954 case IB_WR_ATOMIC_CMP_AND_SWP:
2955 case IB_WR_ATOMIC_FETCH_AND_ADD:
2956 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
2957 goto inv_err;
2958 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
2959 wqe->atomic_wr.remote_addr,
2960 wqe->atomic_wr.rkey,
2961 IB_ACCESS_REMOTE_ATOMIC)))
2962 goto acc_err;
2963 /* Perform atomic OP and save result. */
2964 maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
2965 sdata = wqe->atomic_wr.compare_add;
2966 *(u64 *)sqp->s_sge.sge.vaddr =
2967 (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
2968 (u64)atomic64_add_return(sdata, maddr) - sdata :
2969 (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
2970 sdata, wqe->atomic_wr.swap);
2971 rvt_put_mr(qp->r_sge.sge.mr);
2972 qp->r_sge.num_sge = 0;
2973 goto send_comp;
2974
2975 default:
2976 send_status = IB_WC_LOC_QP_OP_ERR;
2977 goto serr;
2978 }
2979
2980 sge = &sqp->s_sge.sge;
2981 while (sqp->s_len) {
2982 u32 len = sqp->s_len;
2983
2984 if (len > sge->length)
2985 len = sge->length;
2986 if (len > sge->sge_length)
2987 len = sge->sge_length;
2988 WARN_ON_ONCE(len == 0);
2989 rvt_copy_sge(qp, &qp->r_sge, sge->vaddr,
2990 len, release, copy_last);
2991 sge->vaddr += len;
2992 sge->length -= len;
2993 sge->sge_length -= len;
2994 if (sge->sge_length == 0) {
2995 if (!release)
2996 rvt_put_mr(sge->mr);
2997 if (--sqp->s_sge.num_sge)
2998 *sge = *sqp->s_sge.sg_list++;
2999 } else if (sge->length == 0 && sge->mr->lkey) {
3000 if (++sge->n >= RVT_SEGSZ) {
3001 if (++sge->m >= sge->mr->mapsz)
3002 break;
3003 sge->n = 0;
3004 }
3005 sge->vaddr =
3006 sge->mr->map[sge->m]->segs[sge->n].vaddr;
3007 sge->length =
3008 sge->mr->map[sge->m]->segs[sge->n].length;
3009 }
3010 sqp->s_len -= len;
3011 }
3012 if (release)
3013 rvt_put_ss(&qp->r_sge);
3014
3015 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
3016 goto send_comp;
3017
3018 if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
3019 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
3020 else
3021 wc.opcode = IB_WC_RECV;
3022 wc.wr_id = qp->r_wr_id;
3023 wc.status = IB_WC_SUCCESS;
3024 wc.byte_len = wqe->length;
3025 wc.qp = &qp->ibqp;
3026 wc.src_qp = qp->remote_qpn;
3027 wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
3028 wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
3029 wc.port_num = 1;
3030 /* Signal completion event if the solicited bit is set. */
3031 rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
3032 wqe->wr.send_flags & IB_SEND_SOLICITED);
3033
3034send_comp:
3035 spin_lock_irqsave(&sqp->s_lock, flags);
3036 rvp->n_loop_pkts++;
3037flush_send:
3038 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
3039 rvt_send_complete(sqp, wqe, send_status);
3040 if (local_ops) {
3041 atomic_dec(&sqp->local_ops_pending);
3042 local_ops = 0;
3043 }
3044 goto again;
3045
3046rnr_nak:
3047 /* Handle RNR NAK */
3048 if (qp->ibqp.qp_type == IB_QPT_UC)
3049 goto send_comp;
3050 rvp->n_rnr_naks++;
3051 /*
3052 * Note: we don't need the s_lock held since the BUSY flag
3053 * makes this single threaded.
3054 */
3055 if (sqp->s_rnr_retry == 0) {
3056 send_status = IB_WC_RNR_RETRY_EXC_ERR;
3057 goto serr;
3058 }
3059 if (sqp->s_rnr_retry_cnt < 7)
3060 sqp->s_rnr_retry--;
3061 spin_lock_irqsave(&sqp->s_lock, flags);
3062 if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
3063 goto clr_busy;
3064 rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
3065 IB_AETH_CREDIT_SHIFT);
3066 goto clr_busy;
3067
3068op_err:
3069 send_status = IB_WC_REM_OP_ERR;
3070 wc.status = IB_WC_LOC_QP_OP_ERR;
3071 goto err;
3072
3073inv_err:
3074 send_status = IB_WC_REM_INV_REQ_ERR;
3075 wc.status = IB_WC_LOC_QP_OP_ERR;
3076 goto err;
3077
3078acc_err:
3079 send_status = IB_WC_REM_ACCESS_ERR;
3080 wc.status = IB_WC_LOC_PROT_ERR;
3081err:
3082 /* responder goes to error state */
3083 rvt_rc_error(qp, wc.status);
3084
3085serr:
3086 spin_lock_irqsave(&sqp->s_lock, flags);
3087 rvt_send_complete(sqp, wqe, send_status);
3088 if (sqp->ibqp.qp_type == IB_QPT_RC) {
3089 int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
3090
3091 sqp->s_flags &= ~RVT_S_BUSY;
3092 spin_unlock_irqrestore(&sqp->s_lock, flags);
3093 if (lastwqe) {
3094 struct ib_event ev;
3095
3096 ev.device = sqp->ibqp.device;
3097 ev.element.qp = &sqp->ibqp;
3098 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
3099 sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
3100 }
3101 goto done;
3102 }
3103clr_busy:
3104 sqp->s_flags &= ~RVT_S_BUSY;
3105unlock:
3106 spin_unlock_irqrestore(&sqp->s_lock, flags);
3107done:
3108 rcu_read_unlock();
3109}
3110EXPORT_SYMBOL(rvt_ruc_loopback);
diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h
index 264811fdc530..6d883972e0b8 100644
--- a/drivers/infiniband/sw/rdmavt/qp.h
+++ b/drivers/infiniband/sw/rdmavt/qp.h
@@ -66,4 +66,6 @@ int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
66 const struct ib_send_wr **bad_wr); 66 const struct ib_send_wr **bad_wr);
67int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, 67int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
68 const struct ib_recv_wr **bad_wr); 68 const struct ib_recv_wr **bad_wr);
69int rvt_wss_init(struct rvt_dev_info *rdi);
70void rvt_wss_exit(struct rvt_dev_info *rdi);
69#endif /* DEF_RVTQP_H */ 71#endif /* DEF_RVTQP_H */
diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h
index 0ef25fc49f25..d5df352eadb1 100644
--- a/drivers/infiniband/sw/rdmavt/trace_tx.h
+++ b/drivers/infiniband/sw/rdmavt/trace_tx.h
@@ -153,6 +153,48 @@ TRACE_EVENT(
153 ) 153 )
154); 154);
155 155
156TRACE_EVENT(
157 rvt_qp_send_completion,
158 TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, u32 idx),
159 TP_ARGS(qp, wqe, idx),
160 TP_STRUCT__entry(
161 RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
162 __field(struct rvt_swqe *, wqe)
163 __field(u64, wr_id)
164 __field(u32, qpn)
165 __field(u32, qpt)
166 __field(u32, length)
167 __field(u32, idx)
168 __field(u32, ssn)
169 __field(enum ib_wr_opcode, opcode)
170 __field(int, send_flags)
171 ),
172 TP_fast_assign(
173 RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
174 __entry->wqe = wqe;
175 __entry->wr_id = wqe->wr.wr_id;
176 __entry->qpn = qp->ibqp.qp_num;
177 __entry->qpt = qp->ibqp.qp_type;
178 __entry->length = wqe->length;
179 __entry->idx = idx;
180 __entry->ssn = wqe->ssn;
181 __entry->opcode = wqe->wr.opcode;
182 __entry->send_flags = wqe->wr.send_flags;
183 ),
184 TP_printk(
185 "[%s] qpn 0x%x qpt %u wqe %p idx %u wr_id %llx length %u ssn %u opcode %x send_flags %x",
186 __get_str(dev),
187 __entry->qpn,
188 __entry->qpt,
189 __entry->wqe,
190 __entry->idx,
191 __entry->wr_id,
192 __entry->length,
193 __entry->ssn,
194 __entry->opcode,
195 __entry->send_flags
196 )
197);
156#endif /* __RVT_TRACE_TX_H */ 198#endif /* __RVT_TRACE_TX_H */
157 199
158#undef TRACE_INCLUDE_PATH 200#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 17e4abc067af..723d3daf2eba 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -774,6 +774,13 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
774 goto bail_no_mr; 774 goto bail_no_mr;
775 } 775 }
776 776
777 /* Memory Working Set Size */
778 ret = rvt_wss_init(rdi);
779 if (ret) {
780 rvt_pr_err(rdi, "Error in WSS init.\n");
781 goto bail_mr;
782 }
783
777 /* Completion queues */ 784 /* Completion queues */
778 spin_lock_init(&rdi->n_cqs_lock); 785 spin_lock_init(&rdi->n_cqs_lock);
779 786
@@ -828,10 +835,11 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
828 835
829 rdi->ibdev.driver_id = driver_id; 836 rdi->ibdev.driver_id = driver_id;
830 /* We are now good to announce we exist */ 837 /* We are now good to announce we exist */
831 ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); 838 ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev),
839 rdi->driver_f.port_callback);
832 if (ret) { 840 if (ret) {
833 rvt_pr_err(rdi, "Failed to register driver with ib core.\n"); 841 rvt_pr_err(rdi, "Failed to register driver with ib core.\n");
834 goto bail_mr; 842 goto bail_wss;
835 } 843 }
836 844
837 rvt_create_mad_agents(rdi); 845 rvt_create_mad_agents(rdi);
@@ -839,6 +847,8 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
839 rvt_pr_info(rdi, "Registration with rdmavt done.\n"); 847 rvt_pr_info(rdi, "Registration with rdmavt done.\n");
840 return ret; 848 return ret;
841 849
850bail_wss:
851 rvt_wss_exit(rdi);
842bail_mr: 852bail_mr:
843 rvt_mr_exit(rdi); 853 rvt_mr_exit(rdi);
844 854
@@ -862,6 +872,7 @@ void rvt_unregister_device(struct rvt_dev_info *rdi)
862 rvt_free_mad_agents(rdi); 872 rvt_free_mad_agents(rdi);
863 873
864 ib_unregister_device(&rdi->ibdev); 874 ib_unregister_device(&rdi->ibdev);
875 rvt_wss_exit(rdi);
865 rvt_mr_exit(rdi); 876 rvt_mr_exit(rdi);
866 rvt_qp_exit(rdi); 877 rvt_qp_exit(rdi);
867} 878}
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 10999fa69281..383e65c7bbc0 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -103,7 +103,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
103 rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM; 103 rxe->attr.max_res_rd_atom = RXE_MAX_RES_RD_ATOM;
104 rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM; 104 rxe->attr.max_qp_init_rd_atom = RXE_MAX_QP_INIT_RD_ATOM;
105 rxe->attr.max_ee_init_rd_atom = RXE_MAX_EE_INIT_RD_ATOM; 105 rxe->attr.max_ee_init_rd_atom = RXE_MAX_EE_INIT_RD_ATOM;
106 rxe->attr.atomic_cap = RXE_ATOMIC_CAP; 106 rxe->attr.atomic_cap = IB_ATOMIC_HCA;
107 rxe->attr.max_ee = RXE_MAX_EE; 107 rxe->attr.max_ee = RXE_MAX_EE;
108 rxe->attr.max_rdd = RXE_MAX_RDD; 108 rxe->attr.max_rdd = RXE_MAX_RDD;
109 rxe->attr.max_mw = RXE_MAX_MW; 109 rxe->attr.max_mw = RXE_MAX_MW;
@@ -128,9 +128,9 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
128/* initialize port attributes */ 128/* initialize port attributes */
129static int rxe_init_port_param(struct rxe_port *port) 129static int rxe_init_port_param(struct rxe_port *port)
130{ 130{
131 port->attr.state = RXE_PORT_STATE; 131 port->attr.state = IB_PORT_DOWN;
132 port->attr.max_mtu = RXE_PORT_MAX_MTU; 132 port->attr.max_mtu = IB_MTU_4096;
133 port->attr.active_mtu = RXE_PORT_ACTIVE_MTU; 133 port->attr.active_mtu = IB_MTU_256;
134 port->attr.gid_tbl_len = RXE_PORT_GID_TBL_LEN; 134 port->attr.gid_tbl_len = RXE_PORT_GID_TBL_LEN;
135 port->attr.port_cap_flags = RXE_PORT_PORT_CAP_FLAGS; 135 port->attr.port_cap_flags = RXE_PORT_PORT_CAP_FLAGS;
136 port->attr.max_msg_sz = RXE_PORT_MAX_MSG_SZ; 136 port->attr.max_msg_sz = RXE_PORT_MAX_MSG_SZ;
@@ -147,8 +147,7 @@ static int rxe_init_port_param(struct rxe_port *port)
147 port->attr.active_width = RXE_PORT_ACTIVE_WIDTH; 147 port->attr.active_width = RXE_PORT_ACTIVE_WIDTH;
148 port->attr.active_speed = RXE_PORT_ACTIVE_SPEED; 148 port->attr.active_speed = RXE_PORT_ACTIVE_SPEED;
149 port->attr.phys_state = RXE_PORT_PHYS_STATE; 149 port->attr.phys_state = RXE_PORT_PHYS_STATE;
150 port->mtu_cap = 150 port->mtu_cap = ib_mtu_enum_to_int(IB_MTU_256);
151 ib_mtu_enum_to_int(RXE_PORT_ACTIVE_MTU);
152 port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX); 151 port->subnet_prefix = cpu_to_be64(RXE_PORT_SUBNET_PREFIX);
153 152
154 return 0; 153 return 0;
@@ -300,7 +299,7 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
300 mtu = eth_mtu_int_to_enum(ndev_mtu); 299 mtu = eth_mtu_int_to_enum(ndev_mtu);
301 300
302 /* Make sure that new MTU in range */ 301 /* Make sure that new MTU in range */
303 mtu = mtu ? min_t(enum ib_mtu, mtu, RXE_PORT_MAX_MTU) : IB_MTU_256; 302 mtu = mtu ? min_t(enum ib_mtu, mtu, IB_MTU_4096) : IB_MTU_256;
304 303
305 port->attr.active_mtu = mtu; 304 port->attr.active_mtu = mtu;
306 port->mtu_cap = ib_mtu_enum_to_int(mtu); 305 port->mtu_cap = ib_mtu_enum_to_int(mtu);
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 83311dd07019..ea089cb091ad 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -191,6 +191,7 @@ static inline void reset_retry_counters(struct rxe_qp *qp)
191{ 191{
192 qp->comp.retry_cnt = qp->attr.retry_cnt; 192 qp->comp.retry_cnt = qp->attr.retry_cnt;
193 qp->comp.rnr_retry = qp->attr.rnr_retry; 193 qp->comp.rnr_retry = qp->attr.rnr_retry;
194 qp->comp.started_retry = 0;
194} 195}
195 196
196static inline enum comp_state check_psn(struct rxe_qp *qp, 197static inline enum comp_state check_psn(struct rxe_qp *qp,
@@ -253,6 +254,17 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
253 case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: 254 case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
254 if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE && 255 if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE &&
255 pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) { 256 pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) {
257 /* read retries of partial data may restart from
258 * read response first or response only.
259 */
260 if ((pkt->psn == wqe->first_psn &&
261 pkt->opcode ==
262 IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) ||
263 (wqe->first_psn == wqe->last_psn &&
264 pkt->opcode ==
265 IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY))
266 break;
267
256 return COMPST_ERROR; 268 return COMPST_ERROR;
257 } 269 }
258 break; 270 break;
@@ -499,11 +511,11 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp,
499 struct rxe_pkt_info *pkt, 511 struct rxe_pkt_info *pkt,
500 struct rxe_send_wqe *wqe) 512 struct rxe_send_wqe *wqe)
501{ 513{
502 qp->comp.opcode = -1; 514 if (pkt && wqe->state == wqe_state_pending) {
503 515 if (psn_compare(wqe->last_psn, qp->comp.psn) >= 0) {
504 if (pkt) { 516 qp->comp.psn = (wqe->last_psn + 1) & BTH_PSN_MASK;
505 if (psn_compare(pkt->psn, qp->comp.psn) >= 0) 517 qp->comp.opcode = -1;
506 qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 518 }
507 519
508 if (qp->req.wait_psn) { 520 if (qp->req.wait_psn) {
509 qp->req.wait_psn = 0; 521 qp->req.wait_psn = 0;
@@ -676,6 +688,20 @@ int rxe_completer(void *arg)
676 goto exit; 688 goto exit;
677 } 689 }
678 690
691 /* if we've started a retry, don't start another
692 * retry sequence, unless this is a timeout.
693 */
694 if (qp->comp.started_retry &&
695 !qp->comp.timeout_retry) {
696 if (pkt) {
697 rxe_drop_ref(pkt->qp);
698 kfree_skb(skb);
699 skb = NULL;
700 }
701
702 goto done;
703 }
704
679 if (qp->comp.retry_cnt > 0) { 705 if (qp->comp.retry_cnt > 0) {
680 if (qp->comp.retry_cnt != 7) 706 if (qp->comp.retry_cnt != 7)
681 qp->comp.retry_cnt--; 707 qp->comp.retry_cnt--;
@@ -692,6 +718,7 @@ int rxe_completer(void *arg)
692 rxe_counter_inc(rxe, 718 rxe_counter_inc(rxe,
693 RXE_CNT_COMP_RETRY); 719 RXE_CNT_COMP_RETRY);
694 qp->req.need_retry = 1; 720 qp->req.need_retry = 1;
721 qp->comp.started_retry = 1;
695 rxe_run_task(&qp->req.task, 1); 722 rxe_run_task(&qp->req.task, 1);
696 } 723 }
697 724
@@ -701,7 +728,7 @@ int rxe_completer(void *arg)
701 skb = NULL; 728 skb = NULL;
702 } 729 }
703 730
704 goto exit; 731 goto done;
705 732
706 } else { 733 } else {
707 rxe_counter_inc(rxe, RXE_CNT_RETRY_EXCEEDED); 734 rxe_counter_inc(rxe, RXE_CNT_RETRY_EXCEEDED);
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index 2ee4b08b00ea..a57276f2cb84 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -30,7 +30,7 @@
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE. 31 * SOFTWARE.
32 */ 32 */
33 33#include <linux/vmalloc.h>
34#include "rxe.h" 34#include "rxe.h"
35#include "rxe_loc.h" 35#include "rxe_loc.h"
36#include "rxe_queue.h" 36#include "rxe_queue.h"
@@ -97,7 +97,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
97 err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context, 97 err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context,
98 cq->queue->buf, cq->queue->buf_size, &cq->queue->ip); 98 cq->queue->buf, cq->queue->buf_size, &cq->queue->ip);
99 if (err) { 99 if (err) {
100 kvfree(cq->queue->buf); 100 vfree(cq->queue->buf);
101 kfree(cq->queue); 101 kfree(cq->queue);
102 return err; 102 return err;
103 } 103 }
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 87d14f7ef21b..afd53f57a62b 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -144,8 +144,7 @@ void rxe_loopback(struct sk_buff *skb);
144int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb); 144int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb);
145struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, 145struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
146 int paylen, struct rxe_pkt_info *pkt); 146 int paylen, struct rxe_pkt_info *pkt);
147int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, 147int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc);
148 struct sk_buff *skb, u32 *crc);
149enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num); 148enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num);
150const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num); 149const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num);
151struct device *rxe_dma_device(struct rxe_dev *rxe); 150struct device *rxe_dma_device(struct rxe_dev *rxe);
@@ -196,7 +195,7 @@ static inline int qp_mtu(struct rxe_qp *qp)
196 if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) 195 if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
197 return qp->attr.path_mtu; 196 return qp->attr.path_mtu;
198 else 197 else
199 return RXE_PORT_MAX_MTU; 198 return IB_MTU_4096;
200} 199}
201 200
202static inline int rcv_wqe_size(int max_sge) 201static inline int rcv_wqe_size(int max_sge)
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index dff605fdf60f..9d3916b93f23 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -573,33 +573,20 @@ struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
573 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 573 struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
574 int index = key >> 8; 574 int index = key >> 8;
575 575
576 if (index >= RXE_MIN_MR_INDEX && index <= RXE_MAX_MR_INDEX) { 576 mem = rxe_pool_get_index(&rxe->mr_pool, index);
577 mem = rxe_pool_get_index(&rxe->mr_pool, index); 577 if (!mem)
578 if (!mem) 578 return NULL;
579 goto err1; 579
580 } else { 580 if (unlikely((type == lookup_local && mem->lkey != key) ||
581 goto err1; 581 (type == lookup_remote && mem->rkey != key) ||
582 mem->pd != pd ||
583 (access && !(access & mem->access)) ||
584 mem->state != RXE_MEM_STATE_VALID)) {
585 rxe_drop_ref(mem);
586 mem = NULL;
582 } 587 }
583 588
584 if ((type == lookup_local && mem->lkey != key) ||
585 (type == lookup_remote && mem->rkey != key))
586 goto err2;
587
588 if (mem->pd != pd)
589 goto err2;
590
591 if (access && !(access & mem->access))
592 goto err2;
593
594 if (mem->state != RXE_MEM_STATE_VALID)
595 goto err2;
596
597 return mem; 589 return mem;
598
599err2:
600 rxe_drop_ref(mem);
601err1:
602 return NULL;
603} 590}
604 591
605int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, 592int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 8094cbaa54a9..40e82e0f6c2d 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -72,7 +72,7 @@ struct rxe_dev *get_rxe_by_name(const char *name)
72 72
73 spin_lock_bh(&dev_list_lock); 73 spin_lock_bh(&dev_list_lock);
74 list_for_each_entry(rxe, &rxe_dev_list, list) { 74 list_for_each_entry(rxe, &rxe_dev_list, list) {
75 if (!strcmp(name, rxe->ib_dev.name)) { 75 if (!strcmp(name, dev_name(&rxe->ib_dev.dev))) {
76 found = rxe; 76 found = rxe;
77 break; 77 break;
78 } 78 }
@@ -182,19 +182,11 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
182 182
183#endif 183#endif
184 184
185static struct dst_entry *rxe_find_route(struct rxe_dev *rxe, 185static struct dst_entry *rxe_find_route(struct net_device *ndev,
186 struct rxe_qp *qp, 186 struct rxe_qp *qp,
187 struct rxe_av *av) 187 struct rxe_av *av)
188{ 188{
189 const struct ib_gid_attr *attr;
190 struct dst_entry *dst = NULL; 189 struct dst_entry *dst = NULL;
191 struct net_device *ndev;
192
193 attr = rdma_get_gid_attr(&rxe->ib_dev, qp->attr.port_num,
194 av->grh.sgid_index);
195 if (IS_ERR(attr))
196 return NULL;
197 ndev = attr->ndev;
198 190
199 if (qp_type(qp) == IB_QPT_RC) 191 if (qp_type(qp) == IB_QPT_RC)
200 dst = sk_dst_get(qp->sk->sk); 192 dst = sk_dst_get(qp->sk->sk);
@@ -229,7 +221,6 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
229 sk_dst_set(qp->sk->sk, dst); 221 sk_dst_set(qp->sk->sk, dst);
230 } 222 }
231 } 223 }
232 rdma_put_gid_attr(attr);
233 return dst; 224 return dst;
234} 225}
235 226
@@ -377,8 +368,8 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
377 ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); 368 ip6h->payload_len = htons(skb->len - sizeof(*ip6h));
378} 369}
379 370
380static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, 371static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb,
381 struct sk_buff *skb, struct rxe_av *av) 372 struct rxe_av *av)
382{ 373{
383 struct rxe_qp *qp = pkt->qp; 374 struct rxe_qp *qp = pkt->qp;
384 struct dst_entry *dst; 375 struct dst_entry *dst;
@@ -387,7 +378,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
387 struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; 378 struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr;
388 struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; 379 struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr;
389 380
390 dst = rxe_find_route(rxe, qp, av); 381 dst = rxe_find_route(skb->dev, qp, av);
391 if (!dst) { 382 if (!dst) {
392 pr_err("Host not reachable\n"); 383 pr_err("Host not reachable\n");
393 return -EHOSTUNREACH; 384 return -EHOSTUNREACH;
@@ -396,8 +387,8 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
396 if (!memcmp(saddr, daddr, sizeof(*daddr))) 387 if (!memcmp(saddr, daddr, sizeof(*daddr)))
397 pkt->mask |= RXE_LOOPBACK_MASK; 388 pkt->mask |= RXE_LOOPBACK_MASK;
398 389
399 prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT), 390 prepare_udp_hdr(skb, cpu_to_be16(qp->src_port),
400 htons(ROCE_V2_UDP_DPORT)); 391 cpu_to_be16(ROCE_V2_UDP_DPORT));
401 392
402 prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP, 393 prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP,
403 av->grh.traffic_class, av->grh.hop_limit, df, xnet); 394 av->grh.traffic_class, av->grh.hop_limit, df, xnet);
@@ -406,15 +397,15 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
406 return 0; 397 return 0;
407} 398}
408 399
409static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, 400static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb,
410 struct sk_buff *skb, struct rxe_av *av) 401 struct rxe_av *av)
411{ 402{
412 struct rxe_qp *qp = pkt->qp; 403 struct rxe_qp *qp = pkt->qp;
413 struct dst_entry *dst; 404 struct dst_entry *dst;
414 struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; 405 struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr;
415 struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; 406 struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr;
416 407
417 dst = rxe_find_route(rxe, qp, av); 408 dst = rxe_find_route(skb->dev, qp, av);
418 if (!dst) { 409 if (!dst) {
419 pr_err("Host not reachable\n"); 410 pr_err("Host not reachable\n");
420 return -EHOSTUNREACH; 411 return -EHOSTUNREACH;
@@ -423,8 +414,8 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
423 if (!memcmp(saddr, daddr, sizeof(*daddr))) 414 if (!memcmp(saddr, daddr, sizeof(*daddr)))
424 pkt->mask |= RXE_LOOPBACK_MASK; 415 pkt->mask |= RXE_LOOPBACK_MASK;
425 416
426 prepare_udp_hdr(skb, htons(RXE_ROCE_V2_SPORT), 417 prepare_udp_hdr(skb, cpu_to_be16(qp->src_port),
427 htons(ROCE_V2_UDP_DPORT)); 418 cpu_to_be16(ROCE_V2_UDP_DPORT));
428 419
429 prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP, 420 prepare_ipv6_hdr(dst, skb, saddr, daddr, IPPROTO_UDP,
430 av->grh.traffic_class, 421 av->grh.traffic_class,
@@ -434,16 +425,15 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
434 return 0; 425 return 0;
435} 426}
436 427
437int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, 428int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc)
438 struct sk_buff *skb, u32 *crc)
439{ 429{
440 int err = 0; 430 int err = 0;
441 struct rxe_av *av = rxe_get_av(pkt); 431 struct rxe_av *av = rxe_get_av(pkt);
442 432
443 if (av->network_type == RDMA_NETWORK_IPV4) 433 if (av->network_type == RDMA_NETWORK_IPV4)
444 err = prepare4(rxe, pkt, skb, av); 434 err = prepare4(pkt, skb, av);
445 else if (av->network_type == RDMA_NETWORK_IPV6) 435 else if (av->network_type == RDMA_NETWORK_IPV6)
446 err = prepare6(rxe, pkt, skb, av); 436 err = prepare6(pkt, skb, av);
447 437
448 *crc = rxe_icrc_hdr(pkt, skb); 438 *crc = rxe_icrc_hdr(pkt, skb);
449 439
@@ -501,11 +491,6 @@ void rxe_loopback(struct sk_buff *skb)
501 rxe_rcv(skb); 491 rxe_rcv(skb);
502} 492}
503 493
504static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av)
505{
506 return rxe->port.port_guid == av->grh.dgid.global.interface_id;
507}
508
509struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, 494struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
510 int paylen, struct rxe_pkt_info *pkt) 495 int paylen, struct rxe_pkt_info *pkt)
511{ 496{
@@ -625,7 +610,7 @@ void rxe_port_up(struct rxe_dev *rxe)
625 port->attr.phys_state = IB_PHYS_STATE_LINK_UP; 610 port->attr.phys_state = IB_PHYS_STATE_LINK_UP;
626 611
627 rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE); 612 rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE);
628 pr_info("set %s active\n", rxe->ib_dev.name); 613 dev_info(&rxe->ib_dev.dev, "set active\n");
629} 614}
630 615
631/* Caller must hold net_info_lock */ 616/* Caller must hold net_info_lock */
@@ -638,7 +623,7 @@ void rxe_port_down(struct rxe_dev *rxe)
638 port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN; 623 port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN;
639 624
640 rxe_port_event(rxe, IB_EVENT_PORT_ERR); 625 rxe_port_event(rxe, IB_EVENT_PORT_ERR);
641 pr_info("set %s down\n", rxe->ib_dev.name); 626 dev_info(&rxe->ib_dev.dev, "set down\n");
642} 627}
643 628
644static int rxe_notify(struct notifier_block *not_blk, 629static int rxe_notify(struct notifier_block *not_blk,
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 4555510d86c4..bdea899a58ac 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -90,7 +90,6 @@ enum rxe_device_param {
90 RXE_MAX_RES_RD_ATOM = 0x3f000, 90 RXE_MAX_RES_RD_ATOM = 0x3f000,
91 RXE_MAX_QP_INIT_RD_ATOM = 128, 91 RXE_MAX_QP_INIT_RD_ATOM = 128,
92 RXE_MAX_EE_INIT_RD_ATOM = 0, 92 RXE_MAX_EE_INIT_RD_ATOM = 0,
93 RXE_ATOMIC_CAP = 1,
94 RXE_MAX_EE = 0, 93 RXE_MAX_EE = 0,
95 RXE_MAX_RDD = 0, 94 RXE_MAX_RDD = 0,
96 RXE_MAX_MW = 0, 95 RXE_MAX_MW = 0,
@@ -139,9 +138,6 @@ enum rxe_device_param {
139 138
140/* default/initial rxe port parameters */ 139/* default/initial rxe port parameters */
141enum rxe_port_param { 140enum rxe_port_param {
142 RXE_PORT_STATE = IB_PORT_DOWN,
143 RXE_PORT_MAX_MTU = IB_MTU_4096,
144 RXE_PORT_ACTIVE_MTU = IB_MTU_256,
145 RXE_PORT_GID_TBL_LEN = 1024, 141 RXE_PORT_GID_TBL_LEN = 1024,
146 RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP, 142 RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP,
147 RXE_PORT_MAX_MSG_SZ = 0x800000, 143 RXE_PORT_MAX_MSG_SZ = 0x800000,
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index b4a8acc7bb7d..36b53fb94a49 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -207,7 +207,7 @@ int rxe_pool_init(
207 207
208 kref_init(&pool->ref_cnt); 208 kref_init(&pool->ref_cnt);
209 209
210 spin_lock_init(&pool->pool_lock); 210 rwlock_init(&pool->pool_lock);
211 211
212 if (rxe_type_info[type].flags & RXE_POOL_INDEX) { 212 if (rxe_type_info[type].flags & RXE_POOL_INDEX) {
213 err = rxe_pool_init_index(pool, 213 err = rxe_pool_init_index(pool,
@@ -222,7 +222,7 @@ int rxe_pool_init(
222 pool->key_size = rxe_type_info[type].key_size; 222 pool->key_size = rxe_type_info[type].key_size;
223 } 223 }
224 224
225 pool->state = rxe_pool_valid; 225 pool->state = RXE_POOL_STATE_VALID;
226 226
227out: 227out:
228 return err; 228 return err;
@@ -232,7 +232,7 @@ static void rxe_pool_release(struct kref *kref)
232{ 232{
233 struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt); 233 struct rxe_pool *pool = container_of(kref, struct rxe_pool, ref_cnt);
234 234
235 pool->state = rxe_pool_invalid; 235 pool->state = RXE_POOL_STATE_INVALID;
236 kfree(pool->table); 236 kfree(pool->table);
237} 237}
238 238
@@ -245,12 +245,12 @@ int rxe_pool_cleanup(struct rxe_pool *pool)
245{ 245{
246 unsigned long flags; 246 unsigned long flags;
247 247
248 spin_lock_irqsave(&pool->pool_lock, flags); 248 write_lock_irqsave(&pool->pool_lock, flags);
249 pool->state = rxe_pool_invalid; 249 pool->state = RXE_POOL_STATE_INVALID;
250 if (atomic_read(&pool->num_elem) > 0) 250 if (atomic_read(&pool->num_elem) > 0)
251 pr_warn("%s pool destroyed with unfree'd elem\n", 251 pr_warn("%s pool destroyed with unfree'd elem\n",
252 pool_name(pool)); 252 pool_name(pool));
253 spin_unlock_irqrestore(&pool->pool_lock, flags); 253 write_unlock_irqrestore(&pool->pool_lock, flags);
254 254
255 rxe_pool_put(pool); 255 rxe_pool_put(pool);
256 256
@@ -336,10 +336,10 @@ void rxe_add_key(void *arg, void *key)
336 struct rxe_pool *pool = elem->pool; 336 struct rxe_pool *pool = elem->pool;
337 unsigned long flags; 337 unsigned long flags;
338 338
339 spin_lock_irqsave(&pool->pool_lock, flags); 339 write_lock_irqsave(&pool->pool_lock, flags);
340 memcpy((u8 *)elem + pool->key_offset, key, pool->key_size); 340 memcpy((u8 *)elem + pool->key_offset, key, pool->key_size);
341 insert_key(pool, elem); 341 insert_key(pool, elem);
342 spin_unlock_irqrestore(&pool->pool_lock, flags); 342 write_unlock_irqrestore(&pool->pool_lock, flags);
343} 343}
344 344
345void rxe_drop_key(void *arg) 345void rxe_drop_key(void *arg)
@@ -348,9 +348,9 @@ void rxe_drop_key(void *arg)
348 struct rxe_pool *pool = elem->pool; 348 struct rxe_pool *pool = elem->pool;
349 unsigned long flags; 349 unsigned long flags;
350 350
351 spin_lock_irqsave(&pool->pool_lock, flags); 351 write_lock_irqsave(&pool->pool_lock, flags);
352 rb_erase(&elem->node, &pool->tree); 352 rb_erase(&elem->node, &pool->tree);
353 spin_unlock_irqrestore(&pool->pool_lock, flags); 353 write_unlock_irqrestore(&pool->pool_lock, flags);
354} 354}
355 355
356void rxe_add_index(void *arg) 356void rxe_add_index(void *arg)
@@ -359,10 +359,10 @@ void rxe_add_index(void *arg)
359 struct rxe_pool *pool = elem->pool; 359 struct rxe_pool *pool = elem->pool;
360 unsigned long flags; 360 unsigned long flags;
361 361
362 spin_lock_irqsave(&pool->pool_lock, flags); 362 write_lock_irqsave(&pool->pool_lock, flags);
363 elem->index = alloc_index(pool); 363 elem->index = alloc_index(pool);
364 insert_index(pool, elem); 364 insert_index(pool, elem);
365 spin_unlock_irqrestore(&pool->pool_lock, flags); 365 write_unlock_irqrestore(&pool->pool_lock, flags);
366} 366}
367 367
368void rxe_drop_index(void *arg) 368void rxe_drop_index(void *arg)
@@ -371,10 +371,10 @@ void rxe_drop_index(void *arg)
371 struct rxe_pool *pool = elem->pool; 371 struct rxe_pool *pool = elem->pool;
372 unsigned long flags; 372 unsigned long flags;
373 373
374 spin_lock_irqsave(&pool->pool_lock, flags); 374 write_lock_irqsave(&pool->pool_lock, flags);
375 clear_bit(elem->index - pool->min_index, pool->table); 375 clear_bit(elem->index - pool->min_index, pool->table);
376 rb_erase(&elem->node, &pool->tree); 376 rb_erase(&elem->node, &pool->tree);
377 spin_unlock_irqrestore(&pool->pool_lock, flags); 377 write_unlock_irqrestore(&pool->pool_lock, flags);
378} 378}
379 379
380void *rxe_alloc(struct rxe_pool *pool) 380void *rxe_alloc(struct rxe_pool *pool)
@@ -384,13 +384,13 @@ void *rxe_alloc(struct rxe_pool *pool)
384 384
385 might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC)); 385 might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
386 386
387 spin_lock_irqsave(&pool->pool_lock, flags); 387 read_lock_irqsave(&pool->pool_lock, flags);
388 if (pool->state != rxe_pool_valid) { 388 if (pool->state != RXE_POOL_STATE_VALID) {
389 spin_unlock_irqrestore(&pool->pool_lock, flags); 389 read_unlock_irqrestore(&pool->pool_lock, flags);
390 return NULL; 390 return NULL;
391 } 391 }
392 kref_get(&pool->ref_cnt); 392 kref_get(&pool->ref_cnt);
393 spin_unlock_irqrestore(&pool->pool_lock, flags); 393 read_unlock_irqrestore(&pool->pool_lock, flags);
394 394
395 kref_get(&pool->rxe->ref_cnt); 395 kref_get(&pool->rxe->ref_cnt);
396 396
@@ -436,9 +436,9 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
436 struct rxe_pool_entry *elem = NULL; 436 struct rxe_pool_entry *elem = NULL;
437 unsigned long flags; 437 unsigned long flags;
438 438
439 spin_lock_irqsave(&pool->pool_lock, flags); 439 read_lock_irqsave(&pool->pool_lock, flags);
440 440
441 if (pool->state != rxe_pool_valid) 441 if (pool->state != RXE_POOL_STATE_VALID)
442 goto out; 442 goto out;
443 443
444 node = pool->tree.rb_node; 444 node = pool->tree.rb_node;
@@ -450,15 +450,14 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
450 node = node->rb_left; 450 node = node->rb_left;
451 else if (elem->index < index) 451 else if (elem->index < index)
452 node = node->rb_right; 452 node = node->rb_right;
453 else 453 else {
454 kref_get(&elem->ref_cnt);
454 break; 455 break;
456 }
455 } 457 }
456 458
457 if (node)
458 kref_get(&elem->ref_cnt);
459
460out: 459out:
461 spin_unlock_irqrestore(&pool->pool_lock, flags); 460 read_unlock_irqrestore(&pool->pool_lock, flags);
462 return node ? elem : NULL; 461 return node ? elem : NULL;
463} 462}
464 463
@@ -469,9 +468,9 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
469 int cmp; 468 int cmp;
470 unsigned long flags; 469 unsigned long flags;
471 470
472 spin_lock_irqsave(&pool->pool_lock, flags); 471 read_lock_irqsave(&pool->pool_lock, flags);
473 472
474 if (pool->state != rxe_pool_valid) 473 if (pool->state != RXE_POOL_STATE_VALID)
475 goto out; 474 goto out;
476 475
477 node = pool->tree.rb_node; 476 node = pool->tree.rb_node;
@@ -494,6 +493,6 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
494 kref_get(&elem->ref_cnt); 493 kref_get(&elem->ref_cnt);
495 494
496out: 495out:
497 spin_unlock_irqrestore(&pool->pool_lock, flags); 496 read_unlock_irqrestore(&pool->pool_lock, flags);
498 return node ? elem : NULL; 497 return node ? elem : NULL;
499} 498}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 47df28e43acf..aa4ba307097b 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -74,8 +74,8 @@ struct rxe_type_info {
74extern struct rxe_type_info rxe_type_info[]; 74extern struct rxe_type_info rxe_type_info[];
75 75
76enum rxe_pool_state { 76enum rxe_pool_state {
77 rxe_pool_invalid, 77 RXE_POOL_STATE_INVALID,
78 rxe_pool_valid, 78 RXE_POOL_STATE_VALID,
79}; 79};
80 80
81struct rxe_pool_entry { 81struct rxe_pool_entry {
@@ -90,7 +90,7 @@ struct rxe_pool_entry {
90 90
91struct rxe_pool { 91struct rxe_pool {
92 struct rxe_dev *rxe; 92 struct rxe_dev *rxe;
93 spinlock_t pool_lock; /* pool spinlock */ 93 rwlock_t pool_lock; /* protects pool add/del/search */
94 size_t elem_size; 94 size_t elem_size;
95 struct kref ref_cnt; 95 struct kref ref_cnt;
96 void (*cleanup)(struct rxe_pool_entry *obj); 96 void (*cleanup)(struct rxe_pool_entry *obj);
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index c58452daffc7..b9710907dac2 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -34,6 +34,7 @@
34#include <linux/skbuff.h> 34#include <linux/skbuff.h>
35#include <linux/delay.h> 35#include <linux/delay.h>
36#include <linux/sched.h> 36#include <linux/sched.h>
37#include <linux/vmalloc.h>
37 38
38#include "rxe.h" 39#include "rxe.h"
39#include "rxe_loc.h" 40#include "rxe_loc.h"
@@ -227,6 +228,16 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
227 return err; 228 return err;
228 qp->sk->sk->sk_user_data = qp; 229 qp->sk->sk->sk_user_data = qp;
229 230
231 /* pick a source UDP port number for this QP based on
232 * the source QPN. this spreads traffic for different QPs
233 * across different NIC RX queues (while using a single
234 * flow for a given QP to maintain packet order).
235 * the port number must be in the Dynamic Ports range
236 * (0xc000 - 0xffff).
237 */
238 qp->src_port = RXE_ROCE_V2_SPORT +
239 (hash_32_generic(qp_num(qp), 14) & 0x3fff);
240
230 qp->sq.max_wr = init->cap.max_send_wr; 241 qp->sq.max_wr = init->cap.max_send_wr;
231 qp->sq.max_sge = init->cap.max_send_sge; 242 qp->sq.max_sge = init->cap.max_send_sge;
232 qp->sq.max_inline = init->cap.max_inline_data; 243 qp->sq.max_inline = init->cap.max_inline_data;
@@ -247,7 +258,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
247 &qp->sq.queue->ip); 258 &qp->sq.queue->ip);
248 259
249 if (err) { 260 if (err) {
250 kvfree(qp->sq.queue->buf); 261 vfree(qp->sq.queue->buf);
251 kfree(qp->sq.queue); 262 kfree(qp->sq.queue);
252 return err; 263 return err;
253 } 264 }
@@ -300,7 +311,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
300 qp->rq.queue->buf, qp->rq.queue->buf_size, 311 qp->rq.queue->buf, qp->rq.queue->buf_size,
301 &qp->rq.queue->ip); 312 &qp->rq.queue->ip);
302 if (err) { 313 if (err) {
303 kvfree(qp->rq.queue->buf); 314 vfree(qp->rq.queue->buf);
304 kfree(qp->rq.queue); 315 kfree(qp->rq.queue);
305 return err; 316 return err;
306 } 317 }
@@ -408,8 +419,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
408 enum ib_qp_state new_state = (mask & IB_QP_STATE) ? 419 enum ib_qp_state new_state = (mask & IB_QP_STATE) ?
409 attr->qp_state : cur_state; 420 attr->qp_state : cur_state;
410 421
411 if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask, 422 if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) {
412 IB_LINK_LAYER_ETHERNET)) {
413 pr_warn("invalid mask or state for qp\n"); 423 pr_warn("invalid mask or state for qp\n");
414 goto err1; 424 goto err1;
415 } 425 }
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index d30dbac24583..5c29a1bb575a 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -122,7 +122,7 @@ static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
122 set_bad_pkey_cntr(port); 122 set_bad_pkey_cntr(port);
123 goto err1; 123 goto err1;
124 } 124 }
125 } else if (qpn != 0) { 125 } else {
126 if (unlikely(!pkey_match(pkey, 126 if (unlikely(!pkey_match(pkey,
127 port->pkey_tbl[qp->attr.pkey_index] 127 port->pkey_tbl[qp->attr.pkey_index]
128 ))) { 128 ))) {
@@ -134,7 +134,7 @@ static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
134 } 134 }
135 135
136 if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) && 136 if ((qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) &&
137 qpn != 0 && pkt->mask) { 137 pkt->mask) {
138 u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey; 138 u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey;
139 139
140 if (unlikely(deth_qkey(pkt) != qkey)) { 140 if (unlikely(deth_qkey(pkt) != qkey)) {
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 8be27238a86e..6c361d70d7cd 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -73,9 +73,6 @@ static void req_retry(struct rxe_qp *qp)
73 int npsn; 73 int npsn;
74 int first = 1; 74 int first = 1;
75 75
76 wqe = queue_head(qp->sq.queue);
77 npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK;
78
79 qp->req.wqe_index = consumer_index(qp->sq.queue); 76 qp->req.wqe_index = consumer_index(qp->sq.queue);
80 qp->req.psn = qp->comp.psn; 77 qp->req.psn = qp->comp.psn;
81 qp->req.opcode = -1; 78 qp->req.opcode = -1;
@@ -107,11 +104,17 @@ static void req_retry(struct rxe_qp *qp)
107 if (first) { 104 if (first) {
108 first = 0; 105 first = 0;
109 106
110 if (mask & WR_WRITE_OR_SEND_MASK) 107 if (mask & WR_WRITE_OR_SEND_MASK) {
108 npsn = (qp->comp.psn - wqe->first_psn) &
109 BTH_PSN_MASK;
111 retry_first_write_send(qp, wqe, mask, npsn); 110 retry_first_write_send(qp, wqe, mask, npsn);
111 }
112 112
113 if (mask & WR_READ_MASK) 113 if (mask & WR_READ_MASK) {
114 npsn = (wqe->dma.length - wqe->dma.resid) /
115 qp->mtu;
114 wqe->iova += npsn * qp->mtu; 116 wqe->iova += npsn * qp->mtu;
117 }
115 } 118 }
116 119
117 wqe->state = wqe_state_posted; 120 wqe->state = wqe_state_posted;
@@ -435,7 +438,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
435 if (pkt->mask & RXE_RETH_MASK) { 438 if (pkt->mask & RXE_RETH_MASK) {
436 reth_set_rkey(pkt, ibwr->wr.rdma.rkey); 439 reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
437 reth_set_va(pkt, wqe->iova); 440 reth_set_va(pkt, wqe->iova);
438 reth_set_len(pkt, wqe->dma.length); 441 reth_set_len(pkt, wqe->dma.resid);
439 } 442 }
440 443
441 if (pkt->mask & RXE_IMMDT_MASK) 444 if (pkt->mask & RXE_IMMDT_MASK)
@@ -476,7 +479,7 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
476 u32 *p; 479 u32 *p;
477 int err; 480 int err;
478 481
479 err = rxe_prepare(rxe, pkt, skb, &crc); 482 err = rxe_prepare(pkt, skb, &crc);
480 if (err) 483 if (err)
481 return err; 484 return err;
482 485
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index aa5833318372..c962160292f4 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -637,7 +637,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
637 if (ack->mask & RXE_ATMACK_MASK) 637 if (ack->mask & RXE_ATMACK_MASK)
638 atmack_set_orig(ack, qp->resp.atomic_orig); 638 atmack_set_orig(ack, qp->resp.atomic_orig);
639 639
640 err = rxe_prepare(rxe, ack, skb, &crc); 640 err = rxe_prepare(ack, skb, &crc);
641 if (err) { 641 if (err) {
642 kfree_skb(skb); 642 kfree_skb(skb);
643 return NULL; 643 return NULL;
@@ -682,6 +682,7 @@ static enum resp_states read_reply(struct rxe_qp *qp,
682 rxe_advance_resp_resource(qp); 682 rxe_advance_resp_resource(qp);
683 683
684 res->type = RXE_READ_MASK; 684 res->type = RXE_READ_MASK;
685 res->replay = 0;
685 686
686 res->read.va = qp->resp.va; 687 res->read.va = qp->resp.va;
687 res->read.va_org = qp->resp.va; 688 res->read.va_org = qp->resp.va;
@@ -752,7 +753,8 @@ static enum resp_states read_reply(struct rxe_qp *qp,
752 state = RESPST_DONE; 753 state = RESPST_DONE;
753 } else { 754 } else {
754 qp->resp.res = NULL; 755 qp->resp.res = NULL;
755 qp->resp.opcode = -1; 756 if (!res->replay)
757 qp->resp.opcode = -1;
756 if (psn_compare(res->cur_psn, qp->resp.psn) >= 0) 758 if (psn_compare(res->cur_psn, qp->resp.psn) >= 0)
757 qp->resp.psn = res->cur_psn; 759 qp->resp.psn = res->cur_psn;
758 state = RESPST_CLEANUP; 760 state = RESPST_CLEANUP;
@@ -814,6 +816,7 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
814 816
815 /* next expected psn, read handles this separately */ 817 /* next expected psn, read handles this separately */
816 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 818 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
819 qp->resp.ack_psn = qp->resp.psn;
817 820
818 qp->resp.opcode = pkt->opcode; 821 qp->resp.opcode = pkt->opcode;
819 qp->resp.status = IB_WC_SUCCESS; 822 qp->resp.status = IB_WC_SUCCESS;
@@ -1065,7 +1068,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
1065 struct rxe_pkt_info *pkt) 1068 struct rxe_pkt_info *pkt)
1066{ 1069{
1067 enum resp_states rc; 1070 enum resp_states rc;
1068 u32 prev_psn = (qp->resp.psn - 1) & BTH_PSN_MASK; 1071 u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK;
1069 1072
1070 if (pkt->mask & RXE_SEND_MASK || 1073 if (pkt->mask & RXE_SEND_MASK ||
1071 pkt->mask & RXE_WRITE_MASK) { 1074 pkt->mask & RXE_WRITE_MASK) {
@@ -1108,6 +1111,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
1108 res->state = (pkt->psn == res->first_psn) ? 1111 res->state = (pkt->psn == res->first_psn) ?
1109 rdatm_res_state_new : 1112 rdatm_res_state_new :
1110 rdatm_res_state_replay; 1113 rdatm_res_state_replay;
1114 res->replay = 1;
1111 1115
1112 /* Reset the resource, except length. */ 1116 /* Reset the resource, except length. */
1113 res->read.va_org = iova; 1117 res->read.va_org = iova;
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index 0d6c04ba7fc3..c41a5fee81f7 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -31,6 +31,7 @@
31 * SOFTWARE. 31 * SOFTWARE.
32 */ 32 */
33 33
34#include <linux/vmalloc.h>
34#include "rxe.h" 35#include "rxe.h"
35#include "rxe_loc.h" 36#include "rxe_loc.h"
36#include "rxe_queue.h" 37#include "rxe_queue.h"
@@ -129,13 +130,18 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
129 130
130 err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context, q->buf, 131 err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, context, q->buf,
131 q->buf_size, &q->ip); 132 q->buf_size, &q->ip);
132 if (err) 133 if (err) {
134 vfree(q->buf);
135 kfree(q);
133 return err; 136 return err;
137 }
134 138
135 if (uresp) { 139 if (uresp) {
136 if (copy_to_user(&uresp->srq_num, &srq->srq_num, 140 if (copy_to_user(&uresp->srq_num, &srq->srq_num,
137 sizeof(uresp->srq_num))) 141 sizeof(uresp->srq_num))) {
142 rxe_queue_cleanup(q);
138 return -EFAULT; 143 return -EFAULT;
144 }
139 } 145 }
140 146
141 return 0; 147 return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c
index d5ed7571128f..73a19f808e1b 100644
--- a/drivers/infiniband/sw/rxe/rxe_sysfs.c
+++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c
@@ -105,7 +105,7 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
105 } 105 }
106 106
107 rxe_set_port_state(ndev); 107 rxe_set_port_state(ndev);
108 pr_info("added %s to %s\n", rxe->ib_dev.name, intf); 108 dev_info(&rxe->ib_dev.dev, "added %s\n", intf);
109err: 109err:
110 if (ndev) 110 if (ndev)
111 dev_put(ndev); 111 dev_put(ndev);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index f5b1e0ad6142..9c19f2027511 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1148,18 +1148,21 @@ static ssize_t parent_show(struct device *device,
1148 1148
1149static DEVICE_ATTR_RO(parent); 1149static DEVICE_ATTR_RO(parent);
1150 1150
1151static struct device_attribute *rxe_dev_attributes[] = { 1151static struct attribute *rxe_dev_attributes[] = {
1152 &dev_attr_parent, 1152 &dev_attr_parent.attr,
1153 NULL
1154};
1155
1156static const struct attribute_group rxe_attr_group = {
1157 .attrs = rxe_dev_attributes,
1153}; 1158};
1154 1159
1155int rxe_register_device(struct rxe_dev *rxe) 1160int rxe_register_device(struct rxe_dev *rxe)
1156{ 1161{
1157 int err; 1162 int err;
1158 int i;
1159 struct ib_device *dev = &rxe->ib_dev; 1163 struct ib_device *dev = &rxe->ib_dev;
1160 struct crypto_shash *tfm; 1164 struct crypto_shash *tfm;
1161 1165
1162 strlcpy(dev->name, "rxe%d", IB_DEVICE_NAME_MAX);
1163 strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); 1166 strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
1164 1167
1165 dev->owner = THIS_MODULE; 1168 dev->owner = THIS_MODULE;
@@ -1260,26 +1263,16 @@ int rxe_register_device(struct rxe_dev *rxe)
1260 } 1263 }
1261 rxe->tfm = tfm; 1264 rxe->tfm = tfm;
1262 1265
1266 rdma_set_device_sysfs_group(dev, &rxe_attr_group);
1263 dev->driver_id = RDMA_DRIVER_RXE; 1267 dev->driver_id = RDMA_DRIVER_RXE;
1264 err = ib_register_device(dev, NULL); 1268 err = ib_register_device(dev, "rxe%d", NULL);
1265 if (err) { 1269 if (err) {
1266 pr_warn("%s failed with error %d\n", __func__, err); 1270 pr_warn("%s failed with error %d\n", __func__, err);
1267 goto err1; 1271 goto err1;
1268 } 1272 }
1269 1273
1270 for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) {
1271 err = device_create_file(&dev->dev, rxe_dev_attributes[i]);
1272 if (err) {
1273 pr_warn("%s failed with error %d for attr number %d\n",
1274 __func__, err, i);
1275 goto err2;
1276 }
1277 }
1278
1279 return 0; 1274 return 0;
1280 1275
1281err2:
1282 ib_unregister_device(dev);
1283err1: 1276err1:
1284 crypto_free_shash(rxe->tfm); 1277 crypto_free_shash(rxe->tfm);
1285 1278
@@ -1288,12 +1281,8 @@ err1:
1288 1281
1289int rxe_unregister_device(struct rxe_dev *rxe) 1282int rxe_unregister_device(struct rxe_dev *rxe)
1290{ 1283{
1291 int i;
1292 struct ib_device *dev = &rxe->ib_dev; 1284 struct ib_device *dev = &rxe->ib_dev;
1293 1285
1294 for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i)
1295 device_remove_file(&dev->dev, rxe_dev_attributes[i]);
1296
1297 ib_unregister_device(dev); 1286 ib_unregister_device(dev);
1298 1287
1299 return 0; 1288 return 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index af1470d29391..82e670d6eeea 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -158,6 +158,7 @@ struct rxe_comp_info {
158 int opcode; 158 int opcode;
159 int timeout; 159 int timeout;
160 int timeout_retry; 160 int timeout_retry;
161 int started_retry;
161 u32 retry_cnt; 162 u32 retry_cnt;
162 u32 rnr_retry; 163 u32 rnr_retry;
163 struct rxe_task task; 164 struct rxe_task task;
@@ -171,6 +172,7 @@ enum rdatm_res_state {
171 172
172struct resp_res { 173struct resp_res {
173 int type; 174 int type;
175 int replay;
174 u32 first_psn; 176 u32 first_psn;
175 u32 last_psn; 177 u32 last_psn;
176 u32 cur_psn; 178 u32 cur_psn;
@@ -195,6 +197,7 @@ struct rxe_resp_info {
195 enum rxe_qp_state state; 197 enum rxe_qp_state state;
196 u32 msn; 198 u32 msn;
197 u32 psn; 199 u32 psn;
200 u32 ack_psn;
198 int opcode; 201 int opcode;
199 int drop_msg; 202 int drop_msg;
200 int goto_error; 203 int goto_error;
@@ -248,6 +251,7 @@ struct rxe_qp {
248 251
249 struct socket *sk; 252 struct socket *sk;
250 u32 dst_cookie; 253 u32 dst_cookie;
254 u16 src_port;
251 255
252 struct rxe_av pri_av; 256 struct rxe_av pri_av;
253 struct rxe_av alt_av; 257 struct rxe_av alt_av;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 3d5424f335cb..0428e01e8f69 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1438,11 +1438,15 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
1438 spin_unlock_irqrestore(&priv->lock, flags); 1438 spin_unlock_irqrestore(&priv->lock, flags);
1439 netif_tx_unlock_bh(dev); 1439 netif_tx_unlock_bh(dev);
1440 1440
1441 if (skb->protocol == htons(ETH_P_IP)) 1441 if (skb->protocol == htons(ETH_P_IP)) {
1442 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
1442 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 1443 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1444 }
1443#if IS_ENABLED(CONFIG_IPV6) 1445#if IS_ENABLED(CONFIG_IPV6)
1444 else if (skb->protocol == htons(ETH_P_IPV6)) 1446 else if (skb->protocol == htons(ETH_P_IPV6)) {
1447 memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
1445 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1448 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1449 }
1446#endif 1450#endif
1447 dev_kfree_skb_any(skb); 1451 dev_kfree_skb_any(skb);
1448 1452
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 8baa75a705c5..8710214594d8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -243,7 +243,8 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
243 return 0; 243 return 0;
244 } 244 }
245 245
246 if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) 246 if (new_mtu < (ETH_MIN_MTU + IPOIB_ENCAP_LEN) ||
247 new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu))
247 return -EINVAL; 248 return -EINVAL;
248 249
249 priv->admin_mtu = new_mtu; 250 priv->admin_mtu = new_mtu;
@@ -1880,6 +1881,8 @@ static int ipoib_parent_init(struct net_device *ndev)
1880 sizeof(union ib_gid)); 1881 sizeof(union ib_gid));
1881 1882
1882 SET_NETDEV_DEV(priv->dev, priv->ca->dev.parent); 1883 SET_NETDEV_DEV(priv->dev, priv->ca->dev.parent);
1884 priv->dev->dev_port = priv->port - 1;
1885 /* Let's set this one too for backwards compatibility. */
1883 priv->dev->dev_id = priv->port - 1; 1886 priv->dev->dev_id = priv->port - 1;
1884 1887
1885 return 0; 1888 return 0;
@@ -2385,6 +2388,35 @@ int ipoib_add_pkey_attr(struct net_device *dev)
2385 return device_create_file(&dev->dev, &dev_attr_pkey); 2388 return device_create_file(&dev->dev, &dev_attr_pkey);
2386} 2389}
2387 2390
2391/*
2392 * We erroneously exposed the iface's port number in the dev_id
2393 * sysfs field long after dev_port was introduced for that purpose[1],
2394 * and we need to stop everyone from relying on that.
2395 * Let's overload the shower routine for the dev_id file here
2396 * to gently bring the issue up.
2397 *
2398 * [1] https://www.spinics.net/lists/netdev/msg272123.html
2399 */
2400static ssize_t dev_id_show(struct device *dev,
2401 struct device_attribute *attr, char *buf)
2402{
2403 struct net_device *ndev = to_net_dev(dev);
2404
2405 if (ndev->dev_id == ndev->dev_port)
2406 netdev_info_once(ndev,
2407 "\"%s\" wants to know my dev_id. Should it look at dev_port instead? See Documentation/ABI/testing/sysfs-class-net for more info.\n",
2408 current->comm);
2409
2410 return sprintf(buf, "%#x\n", ndev->dev_id);
2411}
2412static DEVICE_ATTR_RO(dev_id);
2413
2414int ipoib_intercept_dev_id_attr(struct net_device *dev)
2415{
2416 device_remove_file(&dev->dev, &dev_attr_dev_id);
2417 return device_create_file(&dev->dev, &dev_attr_dev_id);
2418}
2419
2388static struct net_device *ipoib_add_port(const char *format, 2420static struct net_device *ipoib_add_port(const char *format,
2389 struct ib_device *hca, u8 port) 2421 struct ib_device *hca, u8 port)
2390{ 2422{
@@ -2437,6 +2469,8 @@ static struct net_device *ipoib_add_port(const char *format,
2437 */ 2469 */
2438 ndev->priv_destructor = ipoib_intf_free; 2470 ndev->priv_destructor = ipoib_intf_free;
2439 2471
2472 if (ipoib_intercept_dev_id_attr(ndev))
2473 goto sysfs_failed;
2440 if (ipoib_cm_add_mode_attr(ndev)) 2474 if (ipoib_cm_add_mode_attr(ndev))
2441 goto sysfs_failed; 2475 goto sysfs_failed;
2442 if (ipoib_add_pkey_attr(ndev)) 2476 if (ipoib_add_pkey_attr(ndev))
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 9f36ca786df8..1e88213459f2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -277,7 +277,7 @@ void ipoib_event(struct ib_event_handler *handler,
277 return; 277 return;
278 278
279 ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event, 279 ipoib_dbg(priv, "Event %d on device %s port %d\n", record->event,
280 record->device->name, record->element.port_num); 280 dev_name(&record->device->dev), record->element.port_num);
281 281
282 if (record->event == IB_EVENT_SM_CHANGE || 282 if (record->event == IB_EVENT_SM_CHANGE ||
283 record->event == IB_EVENT_CLIENT_REREGISTER) { 283 record->event == IB_EVENT_CLIENT_REREGISTER) {
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 2f6388596f88..96af06cfe0af 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -589,13 +589,19 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
589 ib_conn->post_recv_buf_count--; 589 ib_conn->post_recv_buf_count--;
590} 590}
591 591
592static inline void 592static inline int
593iser_inv_desc(struct iser_fr_desc *desc, u32 rkey) 593iser_inv_desc(struct iser_fr_desc *desc, u32 rkey)
594{ 594{
595 if (likely(rkey == desc->rsc.mr->rkey)) 595 if (likely(rkey == desc->rsc.mr->rkey)) {
596 desc->rsc.mr_valid = 0; 596 desc->rsc.mr_valid = 0;
597 else if (likely(rkey == desc->pi_ctx->sig_mr->rkey)) 597 } else if (likely(desc->pi_ctx && rkey == desc->pi_ctx->sig_mr->rkey)) {
598 desc->pi_ctx->sig_mr_valid = 0; 598 desc->pi_ctx->sig_mr_valid = 0;
599 } else {
600 iser_err("Bogus remote invalidation for rkey %#x\n", rkey);
601 return -EINVAL;
602 }
603
604 return 0;
599} 605}
600 606
601static int 607static int
@@ -623,12 +629,14 @@ iser_check_remote_inv(struct iser_conn *iser_conn,
623 629
624 if (iser_task->dir[ISER_DIR_IN]) { 630 if (iser_task->dir[ISER_DIR_IN]) {
625 desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h; 631 desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h;
626 iser_inv_desc(desc, rkey); 632 if (unlikely(iser_inv_desc(desc, rkey)))
633 return -EINVAL;
627 } 634 }
628 635
629 if (iser_task->dir[ISER_DIR_OUT]) { 636 if (iser_task->dir[ISER_DIR_OUT]) {
630 desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h; 637 desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h;
631 iser_inv_desc(desc, rkey); 638 if (unlikely(iser_inv_desc(desc, rkey)))
639 return -EINVAL;
632 } 640 }
633 } else { 641 } else {
634 iser_err("failed to get task for itt=%d\n", hdr->itt); 642 iser_err("failed to get task for itt=%d\n", hdr->itt);
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index b686a4aaffe8..946b623ba5eb 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -55,7 +55,7 @@ static void iser_event_handler(struct ib_event_handler *handler,
55{ 55{
56 iser_err("async event %s (%d) on device %s port %d\n", 56 iser_err("async event %s (%d) on device %s port %d\n",
57 ib_event_msg(event->event), event->event, 57 ib_event_msg(event->event), event->event,
58 event->device->name, event->element.port_num); 58 dev_name(&event->device->dev), event->element.port_num);
59} 59}
60 60
61/** 61/**
@@ -85,7 +85,7 @@ static int iser_create_device_ib_res(struct iser_device *device)
85 max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe); 85 max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe);
86 86
87 iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n", 87 iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
88 device->comps_used, ib_dev->name, 88 device->comps_used, dev_name(&ib_dev->dev),
89 ib_dev->num_comp_vectors, max_cqe); 89 ib_dev->num_comp_vectors, max_cqe);
90 90
91 device->pd = ib_alloc_pd(ib_dev, 91 device->pd = ib_alloc_pd(ib_dev,
@@ -468,7 +468,8 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
468 iser_conn->max_cmds = 468 iser_conn->max_cmds =
469 ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr); 469 ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr);
470 iser_dbg("device %s supports max_send_wr %d\n", 470 iser_dbg("device %s supports max_send_wr %d\n",
471 device->ib_device->name, ib_dev->attrs.max_qp_wr); 471 dev_name(&device->ib_device->dev),
472 ib_dev->attrs.max_qp_wr);
472 } 473 }
473 } 474 }
474 475
@@ -764,7 +765,7 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
764 IB_DEVICE_SIGNATURE_HANDOVER)) { 765 IB_DEVICE_SIGNATURE_HANDOVER)) {
765 iser_warn("T10-PI requested but not supported on %s, " 766 iser_warn("T10-PI requested but not supported on %s, "
766 "continue without T10-PI\n", 767 "continue without T10-PI\n",
767 ib_conn->device->ib_device->name); 768 dev_name(&ib_conn->device->ib_device->dev));
768 ib_conn->pi_support = false; 769 ib_conn->pi_support = false;
769 } else { 770 } else {
770 ib_conn->pi_support = true; 771 ib_conn->pi_support = true;
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index f39670c5c25c..e3dd13798d79 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -262,7 +262,7 @@ isert_alloc_comps(struct isert_device *device)
262 262
263 isert_info("Using %d CQs, %s supports %d vectors support " 263 isert_info("Using %d CQs, %s supports %d vectors support "
264 "pi_capable %d\n", 264 "pi_capable %d\n",
265 device->comps_used, device->ib_device->name, 265 device->comps_used, dev_name(&device->ib_device->dev),
266 device->ib_device->num_comp_vectors, 266 device->ib_device->num_comp_vectors,
267 device->pi_capable); 267 device->pi_capable);
268 268
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
index 267da8215e08..31cd361416ac 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c
@@ -351,7 +351,8 @@ static uint32_t opa_vnic_get_dlid(struct opa_vnic_adapter *adapter,
351 if (unlikely(!dlid)) 351 if (unlikely(!dlid))
352 v_warn("Null dlid in MAC address\n"); 352 v_warn("Null dlid in MAC address\n");
353 } else if (def_port != OPA_VNIC_INVALID_PORT) { 353 } else if (def_port != OPA_VNIC_INVALID_PORT) {
354 dlid = info->vesw.u_ucast_dlid[def_port]; 354 if (def_port < OPA_VESW_MAX_NUM_DEF_PORT)
355 dlid = info->vesw.u_ucast_dlid[def_port];
355 } 356 }
356 } 357 }
357 358
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
index 15711dcc6f58..d119d9afa845 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
@@ -888,7 +888,8 @@ static void opa_vnic_event(struct ib_event_handler *handler,
888 return; 888 return;
889 889
890 c_dbg("OPA_VNIC received event %d on device %s port %d\n", 890 c_dbg("OPA_VNIC received event %d on device %s port %d\n",
891 record->event, record->device->name, record->element.port_num); 891 record->event, dev_name(&record->device->dev),
892 record->element.port_num);
892 893
893 if (record->event == IB_EVENT_PORT_ERR) 894 if (record->event == IB_EVENT_PORT_ERR)
894 idr_for_each(&port->vport_idr, vema_disable_vport, NULL); 895 idr_for_each(&port->vport_idr, vema_disable_vport, NULL);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 0b34e909505f..eed0eb3bb04c 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1330,17 +1330,8 @@ static void srp_terminate_io(struct srp_rport *rport)
1330{ 1330{
1331 struct srp_target_port *target = rport->lld_data; 1331 struct srp_target_port *target = rport->lld_data;
1332 struct srp_rdma_ch *ch; 1332 struct srp_rdma_ch *ch;
1333 struct Scsi_Host *shost = target->scsi_host;
1334 struct scsi_device *sdev;
1335 int i, j; 1333 int i, j;
1336 1334
1337 /*
1338 * Invoking srp_terminate_io() while srp_queuecommand() is running
1339 * is not safe. Hence the warning statement below.
1340 */
1341 shost_for_each_device(sdev, shost)
1342 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1343
1344 for (i = 0; i < target->ch_count; i++) { 1335 for (i = 0; i < target->ch_count; i++) {
1345 ch = &target->ch[i]; 1336 ch = &target->ch[i];
1346 1337
@@ -3124,7 +3115,8 @@ static ssize_t show_local_ib_device(struct device *dev,
3124{ 3115{
3125 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3116 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3126 3117
3127 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name); 3118 return sprintf(buf, "%s\n",
3119 dev_name(&target->srp_host->srp_dev->dev->dev));
3128} 3120}
3129 3121
3130static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, 3122static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
@@ -3987,7 +3979,7 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3987{ 3979{
3988 struct srp_host *host = container_of(dev, struct srp_host, dev); 3980 struct srp_host *host = container_of(dev, struct srp_host, dev);
3989 3981
3990 return sprintf(buf, "%s\n", host->srp_dev->dev->name); 3982 return sprintf(buf, "%s\n", dev_name(&host->srp_dev->dev->dev));
3991} 3983}
3992 3984
3993static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 3985static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
@@ -4019,7 +4011,8 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
4019 4011
4020 host->dev.class = &srp_class; 4012 host->dev.class = &srp_class;
4021 host->dev.parent = device->dev->dev.parent; 4013 host->dev.parent = device->dev->dev.parent;
4022 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port); 4014 dev_set_name(&host->dev, "srp-%s-%d", dev_name(&device->dev->dev),
4015 port);
4023 4016
4024 if (device_register(&host->dev)) 4017 if (device_register(&host->dev))
4025 goto free_host; 4018 goto free_host;
@@ -4095,7 +4088,7 @@ static void srp_add_one(struct ib_device *device)
4095 srp_dev->mr_max_size = srp_dev->mr_page_size * 4088 srp_dev->mr_max_size = srp_dev->mr_page_size *
4096 srp_dev->max_pages_per_mr; 4089 srp_dev->max_pages_per_mr;
4097 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", 4090 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
4098 device->name, mr_page_shift, attr->max_mr_size, 4091 dev_name(&device->dev), mr_page_shift, attr->max_mr_size,
4099 attr->max_fast_reg_page_list_len, 4092 attr->max_fast_reg_page_list_len,
4100 srp_dev->max_pages_per_mr, srp_dev->mr_max_size); 4093 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
4101 4094
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index f37cbad022a2..2357aa727dcf 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -148,7 +148,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
148 return; 148 return;
149 149
150 pr_debug("ASYNC event= %d on device= %s\n", event->event, 150 pr_debug("ASYNC event= %d on device= %s\n", event->event,
151 sdev->device->name); 151 dev_name(&sdev->device->dev));
152 152
153 switch (event->event) { 153 switch (event->event) {
154 case IB_EVENT_PORT_ERR: 154 case IB_EVENT_PORT_ERR:
@@ -1941,7 +1941,8 @@ static void __srpt_close_all_ch(struct srpt_port *sport)
1941 if (srpt_disconnect_ch(ch) >= 0) 1941 if (srpt_disconnect_ch(ch) >= 0)
1942 pr_info("Closing channel %s because target %s_%d has been disabled\n", 1942 pr_info("Closing channel %s because target %s_%d has been disabled\n",
1943 ch->sess_name, 1943 ch->sess_name,
1944 sport->sdev->device->name, sport->port); 1944 dev_name(&sport->sdev->device->dev),
1945 sport->port);
1945 srpt_close_ch(ch); 1946 srpt_close_ch(ch);
1946 } 1947 }
1947 } 1948 }
@@ -2127,7 +2128,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
2127 if (!sport->enabled) { 2128 if (!sport->enabled) {
2128 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); 2129 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2129 pr_info("rejected SRP_LOGIN_REQ because target port %s_%d has not yet been enabled\n", 2130 pr_info("rejected SRP_LOGIN_REQ because target port %s_%d has not yet been enabled\n",
2130 sport->sdev->device->name, port_num); 2131 dev_name(&sport->sdev->device->dev), port_num);
2131 goto reject; 2132 goto reject;
2132 } 2133 }
2133 2134
@@ -2267,7 +2268,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
2267 rej->reason = cpu_to_be32( 2268 rej->reason = cpu_to_be32(
2268 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); 2269 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2269 pr_info("rejected SRP_LOGIN_REQ because target %s_%d is not enabled\n", 2270 pr_info("rejected SRP_LOGIN_REQ because target %s_%d is not enabled\n",
2270 sdev->device->name, port_num); 2271 dev_name(&sdev->device->dev), port_num);
2271 mutex_unlock(&sport->mutex); 2272 mutex_unlock(&sport->mutex);
2272 goto reject; 2273 goto reject;
2273 } 2274 }
@@ -2708,7 +2709,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
2708 break; 2709 break;
2709 } 2710 }
2710 2711
2711 if (unlikely(WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) 2712 if (WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))
2712 return; 2713 return;
2713 2714
2714 /* For read commands, transfer the data to the initiator. */ 2715 /* For read commands, transfer the data to the initiator. */
@@ -2842,7 +2843,7 @@ static int srpt_release_sport(struct srpt_port *sport)
2842 while (wait_event_timeout(sport->ch_releaseQ, 2843 while (wait_event_timeout(sport->ch_releaseQ,
2843 srpt_ch_list_empty(sport), 5 * HZ) <= 0) { 2844 srpt_ch_list_empty(sport), 5 * HZ) <= 0) {
2844 pr_info("%s_%d: waiting for session unregistration ...\n", 2845 pr_info("%s_%d: waiting for session unregistration ...\n",
2845 sport->sdev->device->name, sport->port); 2846 dev_name(&sport->sdev->device->dev), sport->port);
2846 rcu_read_lock(); 2847 rcu_read_lock();
2847 list_for_each_entry(nexus, &sport->nexus_list, entry) { 2848 list_for_each_entry(nexus, &sport->nexus_list, entry) {
2848 list_for_each_entry(ch, &nexus->ch_list, list) { 2849 list_for_each_entry(ch, &nexus->ch_list, list) {
@@ -2932,7 +2933,7 @@ static int srpt_alloc_srq(struct srpt_device *sdev)
2932 } 2933 }
2933 2934
2934 pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n", sdev->srq_size, 2935 pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n", sdev->srq_size,
2935 sdev->device->attrs.max_srq_wr, device->name); 2936 sdev->device->attrs.max_srq_wr, dev_name(&device->dev));
2936 2937
2937 sdev->ioctx_ring = (struct srpt_recv_ioctx **) 2938 sdev->ioctx_ring = (struct srpt_recv_ioctx **)
2938 srpt_alloc_ioctx_ring(sdev, sdev->srq_size, 2939 srpt_alloc_ioctx_ring(sdev, sdev->srq_size,
@@ -2965,8 +2966,8 @@ static int srpt_use_srq(struct srpt_device *sdev, bool use_srq)
2965 } else if (use_srq && !sdev->srq) { 2966 } else if (use_srq && !sdev->srq) {
2966 ret = srpt_alloc_srq(sdev); 2967 ret = srpt_alloc_srq(sdev);
2967 } 2968 }
2968 pr_debug("%s(%s): use_srq = %d; ret = %d\n", __func__, device->name, 2969 pr_debug("%s(%s): use_srq = %d; ret = %d\n", __func__,
2969 sdev->use_srq, ret); 2970 dev_name(&device->dev), sdev->use_srq, ret);
2970 return ret; 2971 return ret;
2971} 2972}
2972 2973
@@ -3052,7 +3053,7 @@ static void srpt_add_one(struct ib_device *device)
3052 3053
3053 if (srpt_refresh_port(sport)) { 3054 if (srpt_refresh_port(sport)) {
3054 pr_err("MAD registration failed for %s-%d.\n", 3055 pr_err("MAD registration failed for %s-%d.\n",
3055 sdev->device->name, i); 3056 dev_name(&sdev->device->dev), i);
3056 goto err_event; 3057 goto err_event;
3057 } 3058 }
3058 } 3059 }
@@ -3063,7 +3064,7 @@ static void srpt_add_one(struct ib_device *device)
3063 3064
3064out: 3065out:
3065 ib_set_client_data(device, &srpt_client, sdev); 3066 ib_set_client_data(device, &srpt_client, sdev);
3066 pr_debug("added %s.\n", device->name); 3067 pr_debug("added %s.\n", dev_name(&device->dev));
3067 return; 3068 return;
3068 3069
3069err_event: 3070err_event:
@@ -3078,7 +3079,7 @@ free_dev:
3078 kfree(sdev); 3079 kfree(sdev);
3079err: 3080err:
3080 sdev = NULL; 3081 sdev = NULL;
3081 pr_info("%s(%s) failed.\n", __func__, device->name); 3082 pr_info("%s(%s) failed.\n", __func__, dev_name(&device->dev));
3082 goto out; 3083 goto out;
3083} 3084}
3084 3085
@@ -3093,7 +3094,8 @@ static void srpt_remove_one(struct ib_device *device, void *client_data)
3093 int i; 3094 int i;
3094 3095
3095 if (!sdev) { 3096 if (!sdev) {
3096 pr_info("%s(%s): nothing to do.\n", __func__, device->name); 3097 pr_info("%s(%s): nothing to do.\n", __func__,
3098 dev_name(&device->dev));
3097 return; 3099 return;
3098 } 3100 }
3099 3101
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 31460eeb6fe0..aa5963b5d38e 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -97,14 +97,15 @@ enum {
97}; 97};
98 98
99enum { 99enum {
100 MLX5_ATOMIC_MODE_IB_COMP = 1 << 16, 100 MLX5_ATOMIC_MODE_OFFSET = 16,
101 MLX5_ATOMIC_MODE_CX = 2 << 16, 101 MLX5_ATOMIC_MODE_IB_COMP = 1,
102 MLX5_ATOMIC_MODE_8B = 3 << 16, 102 MLX5_ATOMIC_MODE_CX = 2,
103 MLX5_ATOMIC_MODE_16B = 4 << 16, 103 MLX5_ATOMIC_MODE_8B = 3,
104 MLX5_ATOMIC_MODE_32B = 5 << 16, 104 MLX5_ATOMIC_MODE_16B = 4,
105 MLX5_ATOMIC_MODE_64B = 6 << 16, 105 MLX5_ATOMIC_MODE_32B = 5,
106 MLX5_ATOMIC_MODE_128B = 7 << 16, 106 MLX5_ATOMIC_MODE_64B = 6,
107 MLX5_ATOMIC_MODE_256B = 8 << 16, 107 MLX5_ATOMIC_MODE_128B = 7,
108 MLX5_ATOMIC_MODE_256B = 8,
108}; 109};
109 110
110enum { 111enum {
@@ -163,13 +164,11 @@ enum mlx5_dcbx_oper_mode {
163 MLX5E_DCBX_PARAM_VER_OPER_AUTO = 0x3, 164 MLX5E_DCBX_PARAM_VER_OPER_AUTO = 0x3,
164}; 165};
165 166
166enum mlx5_dct_atomic_mode {
167 MLX5_ATOMIC_MODE_DCT_CX = 2,
168};
169
170enum { 167enum {
171 MLX5_ATOMIC_OPS_CMP_SWAP = 1 << 0, 168 MLX5_ATOMIC_OPS_CMP_SWAP = 1 << 0,
172 MLX5_ATOMIC_OPS_FETCH_ADD = 1 << 1, 169 MLX5_ATOMIC_OPS_FETCH_ADD = 1 << 1,
170 MLX5_ATOMIC_OPS_EXTENDED_CMP_SWAP = 1 << 2,
171 MLX5_ATOMIC_OPS_EXTENDED_FETCH_ADD = 1 << 3,
173}; 172};
174 173
175enum mlx5_page_fault_resume_flags { 174enum mlx5_page_fault_resume_flags {
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index df4d13f7e191..d15f8e4815e3 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -39,15 +39,6 @@
39#include <linux/qed/qed_ll2_if.h> 39#include <linux/qed/qed_ll2_if.h>
40#include <linux/qed/rdma_common.h> 40#include <linux/qed/rdma_common.h>
41 41
42enum qed_roce_ll2_tx_dest {
43 /* Light L2 TX Destination to the Network */
44 QED_ROCE_LL2_TX_DEST_NW,
45
46 /* Light L2 TX Destination to the Loopback */
47 QED_ROCE_LL2_TX_DEST_LB,
48 QED_ROCE_LL2_TX_DEST_MAX
49};
50
51#define QED_RDMA_MAX_CNQ_SIZE (0xFFFF) 42#define QED_RDMA_MAX_CNQ_SIZE (0xFFFF)
52 43
53/* rdma interface */ 44/* rdma interface */
@@ -581,7 +572,7 @@ struct qed_roce_ll2_packet {
581 int n_seg; 572 int n_seg;
582 struct qed_roce_ll2_buffer payload[RDMA_MAX_SGE_PER_SQ_WQE]; 573 struct qed_roce_ll2_buffer payload[RDMA_MAX_SGE_PER_SQ_WQE];
583 int roce_mode; 574 int roce_mode;
584 enum qed_roce_ll2_tx_dest tx_dest; 575 enum qed_ll2_tx_dest tx_dest;
585}; 576};
586 577
587enum qed_rdma_type { 578enum qed_rdma_type {
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 77c7908b7d73..2734c895c1bf 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -46,7 +46,6 @@
46#include <net/ip.h> 46#include <net/ip.h>
47#include <rdma/ib_verbs.h> 47#include <rdma/ib_verbs.h>
48#include <rdma/ib_pack.h> 48#include <rdma/ib_pack.h>
49#include <net/ipv6.h>
50#include <net/net_namespace.h> 49#include <net/net_namespace.h>
51 50
52/** 51/**
@@ -95,20 +94,18 @@ int rdma_translate_ip(const struct sockaddr *addr,
95 * @timeout_ms: Amount of time to wait for the address resolution to complete. 94 * @timeout_ms: Amount of time to wait for the address resolution to complete.
96 * @callback: Call invoked once address resolution has completed, timed out, 95 * @callback: Call invoked once address resolution has completed, timed out,
97 * or been canceled. A status of 0 indicates success. 96 * or been canceled. A status of 0 indicates success.
97 * @resolve_by_gid_attr: Resolve the ip based on the GID attribute from
98 * rdma_dev_addr.
98 * @context: User-specified context associated with the call. 99 * @context: User-specified context associated with the call.
99 */ 100 */
100int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, 101int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
101 struct rdma_dev_addr *addr, int timeout_ms, 102 struct rdma_dev_addr *addr, unsigned long timeout_ms,
102 void (*callback)(int status, struct sockaddr *src_addr, 103 void (*callback)(int status, struct sockaddr *src_addr,
103 struct rdma_dev_addr *addr, void *context), 104 struct rdma_dev_addr *addr, void *context),
104 void *context); 105 bool resolve_by_gid_attr, void *context);
105 106
106void rdma_addr_cancel(struct rdma_dev_addr *addr); 107void rdma_addr_cancel(struct rdma_dev_addr *addr);
107 108
108void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
109 const struct net_device *dev,
110 const unsigned char *dst_dev_addr);
111
112int rdma_addr_size(const struct sockaddr *addr); 109int rdma_addr_size(const struct sockaddr *addr);
113int rdma_addr_size_in6(struct sockaddr_in6 *addr); 110int rdma_addr_size_in6(struct sockaddr_in6 *addr);
114int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr); 111int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr);
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index c10f4b5ea8ab..49f4f75499b3 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -583,7 +583,7 @@ struct ib_cm_sidr_req_param {
583 struct sa_path_rec *path; 583 struct sa_path_rec *path;
584 const struct ib_gid_attr *sgid_attr; 584 const struct ib_gid_attr *sgid_attr;
585 __be64 service_id; 585 __be64 service_id;
586 int timeout_ms; 586 unsigned long timeout_ms;
587 const void *private_data; 587 const void *private_data;
588 u8 private_data_len; 588 u8 private_data_len;
589 u8 max_cm_retries; 589 u8 max_cm_retries;
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index b6ddf2a1b9d8..19520979b84c 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -449,28 +449,23 @@ struct ib_sa_query;
449 449
450void ib_sa_cancel_query(int id, struct ib_sa_query *query); 450void ib_sa_cancel_query(int id, struct ib_sa_query *query);
451 451
452int ib_sa_path_rec_get(struct ib_sa_client *client, 452int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device,
453 struct ib_device *device, u8 port_num, 453 u8 port_num, struct sa_path_rec *rec,
454 struct sa_path_rec *rec, 454 ib_sa_comp_mask comp_mask, unsigned long timeout_ms,
455 ib_sa_comp_mask comp_mask, 455 gfp_t gfp_mask,
456 int timeout_ms, gfp_t gfp_mask, 456 void (*callback)(int status, struct sa_path_rec *resp,
457 void (*callback)(int status,
458 struct sa_path_rec *resp,
459 void *context), 457 void *context),
460 void *context, 458 void *context, struct ib_sa_query **query);
461 struct ib_sa_query **query);
462 459
463int ib_sa_service_rec_query(struct ib_sa_client *client, 460int ib_sa_service_rec_query(struct ib_sa_client *client,
464 struct ib_device *device, u8 port_num, 461 struct ib_device *device, u8 port_num, u8 method,
465 u8 method, 462 struct ib_sa_service_rec *rec,
466 struct ib_sa_service_rec *rec, 463 ib_sa_comp_mask comp_mask, unsigned long timeout_ms,
467 ib_sa_comp_mask comp_mask, 464 gfp_t gfp_mask,
468 int timeout_ms, gfp_t gfp_mask, 465 void (*callback)(int status,
469 void (*callback)(int status, 466 struct ib_sa_service_rec *resp,
470 struct ib_sa_service_rec *resp, 467 void *context),
471 void *context), 468 void *context, struct ib_sa_query **sa_query);
472 void *context,
473 struct ib_sa_query **sa_query);
474 469
475struct ib_sa_multicast { 470struct ib_sa_multicast {
476 struct ib_sa_mcmember_rec rec; 471 struct ib_sa_mcmember_rec rec;
@@ -573,12 +568,11 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
573 struct ib_device *device, u8 port_num, 568 struct ib_device *device, u8 port_num,
574 struct ib_sa_guidinfo_rec *rec, 569 struct ib_sa_guidinfo_rec *rec,
575 ib_sa_comp_mask comp_mask, u8 method, 570 ib_sa_comp_mask comp_mask, u8 method,
576 int timeout_ms, gfp_t gfp_mask, 571 unsigned long timeout_ms, gfp_t gfp_mask,
577 void (*callback)(int status, 572 void (*callback)(int status,
578 struct ib_sa_guidinfo_rec *resp, 573 struct ib_sa_guidinfo_rec *resp,
579 void *context), 574 void *context),
580 void *context, 575 void *context, struct ib_sa_query **sa_query);
581 struct ib_sa_query **sa_query);
582 576
583bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client, 577bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
584 struct ib_device *device, 578 struct ib_device *device,
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index a1fd63871d17..5d3755ec5afa 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -42,15 +42,14 @@ struct ib_umem_odp;
42 42
43struct ib_umem { 43struct ib_umem {
44 struct ib_ucontext *context; 44 struct ib_ucontext *context;
45 struct mm_struct *owning_mm;
45 size_t length; 46 size_t length;
46 unsigned long address; 47 unsigned long address;
47 int page_shift; 48 int page_shift;
48 int writable; 49 u32 writable : 1;
49 int hugetlb; 50 u32 hugetlb : 1;
51 u32 is_odp : 1;
50 struct work_struct work; 52 struct work_struct work;
51 struct mm_struct *mm;
52 unsigned long diff;
53 struct ib_umem_odp *odp_data;
54 struct sg_table sg_head; 53 struct sg_table sg_head;
55 int nmap; 54 int nmap;
56 int npages; 55 int npages;
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 381cdf5a9bd1..0b1446fe2fab 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -43,6 +43,9 @@ struct umem_odp_node {
43}; 43};
44 44
45struct ib_umem_odp { 45struct ib_umem_odp {
46 struct ib_umem umem;
47 struct ib_ucontext_per_mm *per_mm;
48
46 /* 49 /*
47 * An array of the pages included in the on-demand paging umem. 50 * An array of the pages included in the on-demand paging umem.
48 * Indices of pages that are currently not mapped into the device will 51 * Indices of pages that are currently not mapped into the device will
@@ -64,16 +67,9 @@ struct ib_umem_odp {
64 struct mutex umem_mutex; 67 struct mutex umem_mutex;
65 void *private; /* for the HW driver to use. */ 68 void *private; /* for the HW driver to use. */
66 69
67 /* When false, use the notifier counter in the ucontext struct. */
68 bool mn_counters_active;
69 int notifiers_seq; 70 int notifiers_seq;
70 int notifiers_count; 71 int notifiers_count;
71 72
72 /* A linked list of umems that don't have private mmu notifier
73 * counters yet. */
74 struct list_head no_private_counters;
75 struct ib_umem *umem;
76
77 /* Tree tracking */ 73 /* Tree tracking */
78 struct umem_odp_node interval_tree; 74 struct umem_odp_node interval_tree;
79 75
@@ -82,15 +78,34 @@ struct ib_umem_odp {
82 struct work_struct work; 78 struct work_struct work;
83}; 79};
84 80
81static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem)
82{
83 return container_of(umem, struct ib_umem_odp, umem);
84}
85
85#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 86#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
86 87
87int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem, 88struct ib_ucontext_per_mm {
88 int access); 89 struct ib_ucontext *context;
89struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, 90 struct mm_struct *mm;
90 unsigned long addr, 91 struct pid *tgid;
91 size_t size); 92 bool active;
93
94 struct rb_root_cached umem_tree;
95 /* Protects umem_tree */
96 struct rw_semaphore umem_rwsem;
92 97
93void ib_umem_odp_release(struct ib_umem *umem); 98 struct mmu_notifier mn;
99 unsigned int odp_mrs_count;
100
101 struct list_head ucontext_list;
102 struct rcu_head rcu;
103};
104
105int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access);
106struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
107 unsigned long addr, size_t size);
108void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
94 109
95/* 110/*
96 * The lower 2 bits of the DMA address signal the R/W permissions for 111 * The lower 2 bits of the DMA address signal the R/W permissions for
@@ -105,13 +120,14 @@ void ib_umem_odp_release(struct ib_umem *umem);
105 120
106#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) 121#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
107 122
108int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt, 123int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
109 u64 access_mask, unsigned long current_seq); 124 u64 bcnt, u64 access_mask,
125 unsigned long current_seq);
110 126
111void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset, 127void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
112 u64 bound); 128 u64 bound);
113 129
114typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end, 130typedef int (*umem_call_back)(struct ib_umem_odp *item, u64 start, u64 end,
115 void *cookie); 131 void *cookie);
116/* 132/*
117 * Call the callback on each ib_umem in the range. Returns the logical or of 133 * Call the callback on each ib_umem in the range. Returns the logical or of
@@ -129,46 +145,37 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
129struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, 145struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root,
130 u64 addr, u64 length); 146 u64 addr, u64 length);
131 147
132static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item, 148static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp,
133 unsigned long mmu_seq) 149 unsigned long mmu_seq)
134{ 150{
135 /* 151 /*
136 * This code is strongly based on the KVM code from 152 * This code is strongly based on the KVM code from
137 * mmu_notifier_retry. Should be called with 153 * mmu_notifier_retry. Should be called with
138 * the relevant locks taken (item->odp_data->umem_mutex 154 * the relevant locks taken (umem_odp->umem_mutex
139 * and the ucontext umem_mutex semaphore locked for read). 155 * and the ucontext umem_mutex semaphore locked for read).
140 */ 156 */
141 157
142 /* Do not allow page faults while the new ib_umem hasn't seen a state 158 if (unlikely(umem_odp->notifiers_count))
143 * with zero notifiers yet, and doesn't have its own valid set of
144 * private counters. */
145 if (!item->odp_data->mn_counters_active)
146 return 1;
147
148 if (unlikely(item->odp_data->notifiers_count))
149 return 1; 159 return 1;
150 if (item->odp_data->notifiers_seq != mmu_seq) 160 if (umem_odp->notifiers_seq != mmu_seq)
151 return 1; 161 return 1;
152 return 0; 162 return 0;
153} 163}
154 164
155#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ 165#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
156 166
157static inline int ib_umem_odp_get(struct ib_ucontext *context, 167static inline int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
158 struct ib_umem *umem,
159 int access)
160{ 168{
161 return -EINVAL; 169 return -EINVAL;
162} 170}
163 171
164static inline struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, 172static inline struct ib_umem_odp *
165 unsigned long addr, 173ib_alloc_odp_umem(struct ib_ucontext *context, unsigned long addr, size_t size)
166 size_t size)
167{ 174{
168 return ERR_PTR(-EINVAL); 175 return ERR_PTR(-EINVAL);
169} 176}
170 177
171static inline void ib_umem_odp_release(struct ib_umem *umem) {} 178static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {}
172 179
173#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ 180#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
174 181
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 0ed5d913a492..9c0c2132a2d6 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -69,8 +69,11 @@
69 69
70#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN 70#define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN
71 71
72struct ib_umem_odp;
73
72extern struct workqueue_struct *ib_wq; 74extern struct workqueue_struct *ib_wq;
73extern struct workqueue_struct *ib_comp_wq; 75extern struct workqueue_struct *ib_comp_wq;
76extern struct workqueue_struct *ib_comp_unbound_wq;
74 77
75union ib_gid { 78union ib_gid {
76 u8 raw[16]; 79 u8 raw[16];
@@ -1137,7 +1140,9 @@ enum ib_qp_create_flags {
1137 */ 1140 */
1138 1141
1139struct ib_qp_init_attr { 1142struct ib_qp_init_attr {
1143 /* Consumer's event_handler callback must not block */
1140 void (*event_handler)(struct ib_event *, void *); 1144 void (*event_handler)(struct ib_event *, void *);
1145
1141 void *qp_context; 1146 void *qp_context;
1142 struct ib_cq *send_cq; 1147 struct ib_cq *send_cq;
1143 struct ib_cq *recv_cq; 1148 struct ib_cq *recv_cq;
@@ -1146,7 +1151,7 @@ struct ib_qp_init_attr {
1146 struct ib_qp_cap cap; 1151 struct ib_qp_cap cap;
1147 enum ib_sig_type sq_sig_type; 1152 enum ib_sig_type sq_sig_type;
1148 enum ib_qp_type qp_type; 1153 enum ib_qp_type qp_type;
1149 enum ib_qp_create_flags create_flags; 1154 u32 create_flags;
1150 1155
1151 /* 1156 /*
1152 * Only needed for special QP types, or when using the RW API. 1157 * Only needed for special QP types, or when using the RW API.
@@ -1278,21 +1283,27 @@ struct ib_qp_attr {
1278}; 1283};
1279 1284
1280enum ib_wr_opcode { 1285enum ib_wr_opcode {
1281 IB_WR_RDMA_WRITE, 1286 /* These are shared with userspace */
1282 IB_WR_RDMA_WRITE_WITH_IMM, 1287 IB_WR_RDMA_WRITE = IB_UVERBS_WR_RDMA_WRITE,
1283 IB_WR_SEND, 1288 IB_WR_RDMA_WRITE_WITH_IMM = IB_UVERBS_WR_RDMA_WRITE_WITH_IMM,
1284 IB_WR_SEND_WITH_IMM, 1289 IB_WR_SEND = IB_UVERBS_WR_SEND,
1285 IB_WR_RDMA_READ, 1290 IB_WR_SEND_WITH_IMM = IB_UVERBS_WR_SEND_WITH_IMM,
1286 IB_WR_ATOMIC_CMP_AND_SWP, 1291 IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ,
1287 IB_WR_ATOMIC_FETCH_AND_ADD, 1292 IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP,
1288 IB_WR_LSO, 1293 IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD,
1289 IB_WR_SEND_WITH_INV, 1294 IB_WR_LSO = IB_UVERBS_WR_TSO,
1290 IB_WR_RDMA_READ_WITH_INV, 1295 IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV,
1291 IB_WR_LOCAL_INV, 1296 IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV,
1292 IB_WR_REG_MR, 1297 IB_WR_LOCAL_INV = IB_UVERBS_WR_LOCAL_INV,
1293 IB_WR_MASKED_ATOMIC_CMP_AND_SWP, 1298 IB_WR_MASKED_ATOMIC_CMP_AND_SWP =
1294 IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, 1299 IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP,
1300 IB_WR_MASKED_ATOMIC_FETCH_AND_ADD =
1301 IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD,
1302
1303 /* These are kernel only and can not be issued by userspace */
1304 IB_WR_REG_MR = 0x20,
1295 IB_WR_REG_SIG_MR, 1305 IB_WR_REG_SIG_MR,
1306
1296 /* reserve values for low level drivers' internal use. 1307 /* reserve values for low level drivers' internal use.
1297 * These values will not be used at all in the ib core layer. 1308 * These values will not be used at all in the ib core layer.
1298 */ 1309 */
@@ -1485,26 +1496,15 @@ struct ib_ucontext {
1485 * it is set when we are closing the file descriptor and indicates 1496 * it is set when we are closing the file descriptor and indicates
1486 * that mm_sem may be locked. 1497 * that mm_sem may be locked.
1487 */ 1498 */
1488 int closing; 1499 bool closing;
1489 1500
1490 bool cleanup_retryable; 1501 bool cleanup_retryable;
1491 1502
1492 struct pid *tgid;
1493#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1503#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1494 struct rb_root_cached umem_tree; 1504 void (*invalidate_range)(struct ib_umem_odp *umem_odp,
1495 /*
1496 * Protects .umem_rbroot and tree, as well as odp_mrs_count and
1497 * mmu notifiers registration.
1498 */
1499 struct rw_semaphore umem_rwsem;
1500 void (*invalidate_range)(struct ib_umem *umem,
1501 unsigned long start, unsigned long end); 1505 unsigned long start, unsigned long end);
1502 1506 struct mutex per_mm_list_lock;
1503 struct mmu_notifier mn; 1507 struct list_head per_mm_list;
1504 atomic_t notifier_count;
1505 /* A list of umems that don't have private mmu notifier counters yet. */
1506 struct list_head no_private_counters;
1507 int odp_mrs_count;
1508#endif 1508#endif
1509 1509
1510 struct ib_rdmacg_object cg_obj; 1510 struct ib_rdmacg_object cg_obj;
@@ -1570,9 +1570,10 @@ struct ib_ah {
1570typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); 1570typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
1571 1571
1572enum ib_poll_context { 1572enum ib_poll_context {
1573 IB_POLL_DIRECT, /* caller context, no hw completions */ 1573 IB_POLL_DIRECT, /* caller context, no hw completions */
1574 IB_POLL_SOFTIRQ, /* poll from softirq context */ 1574 IB_POLL_SOFTIRQ, /* poll from softirq context */
1575 IB_POLL_WORKQUEUE, /* poll from workqueue */ 1575 IB_POLL_WORKQUEUE, /* poll from workqueue */
1576 IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */
1576}; 1577};
1577 1578
1578struct ib_cq { 1579struct ib_cq {
@@ -1589,6 +1590,7 @@ struct ib_cq {
1589 struct irq_poll iop; 1590 struct irq_poll iop;
1590 struct work_struct work; 1591 struct work_struct work;
1591 }; 1592 };
1593 struct workqueue_struct *comp_wq;
1592 /* 1594 /*
1593 * Implementation details of the RDMA core, don't use in drivers: 1595 * Implementation details of the RDMA core, don't use in drivers:
1594 */ 1596 */
@@ -2263,10 +2265,11 @@ struct ib_device {
2263 struct list_head event_handler_list; 2265 struct list_head event_handler_list;
2264 spinlock_t event_handler_lock; 2266 spinlock_t event_handler_lock;
2265 2267
2266 spinlock_t client_data_lock; 2268 rwlock_t client_data_lock;
2267 struct list_head core_list; 2269 struct list_head core_list;
2268 /* Access to the client_data_list is protected by the client_data_lock 2270 /* Access to the client_data_list is protected by the client_data_lock
2269 * spinlock and the lists_rwsem read-write semaphore */ 2271 * rwlock and the lists_rwsem read-write semaphore
2272 */
2270 struct list_head client_data_list; 2273 struct list_head client_data_list;
2271 2274
2272 struct ib_cache cache; 2275 struct ib_cache cache;
@@ -2550,7 +2553,13 @@ struct ib_device {
2550 2553
2551 struct module *owner; 2554 struct module *owner;
2552 struct device dev; 2555 struct device dev;
2553 struct kobject *ports_parent; 2556 /* First group for device attributes,
2557 * Second group for driver provided attributes (optional).
2558 * It is NULL terminated array.
2559 */
2560 const struct attribute_group *groups[3];
2561
2562 struct kobject *ports_kobj;
2554 struct list_head port_list; 2563 struct list_head port_list;
2555 2564
2556 enum { 2565 enum {
@@ -2633,9 +2642,9 @@ void ib_dealloc_device(struct ib_device *device);
2633 2642
2634void ib_get_device_fw_str(struct ib_device *device, char *str); 2643void ib_get_device_fw_str(struct ib_device *device, char *str);
2635 2644
2636int ib_register_device(struct ib_device *device, 2645int ib_register_device(struct ib_device *device, const char *name,
2637 int (*port_callback)(struct ib_device *, 2646 int (*port_callback)(struct ib_device *, u8,
2638 u8, struct kobject *)); 2647 struct kobject *));
2639void ib_unregister_device(struct ib_device *device); 2648void ib_unregister_device(struct ib_device *device);
2640 2649
2641int ib_register_client (struct ib_client *client); 2650int ib_register_client (struct ib_client *client);
@@ -2645,6 +2654,28 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
2645void ib_set_client_data(struct ib_device *device, struct ib_client *client, 2654void ib_set_client_data(struct ib_device *device, struct ib_client *client,
2646 void *data); 2655 void *data);
2647 2656
2657#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
2658int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
2659 unsigned long pfn, unsigned long size, pgprot_t prot);
2660int rdma_user_mmap_page(struct ib_ucontext *ucontext,
2661 struct vm_area_struct *vma, struct page *page,
2662 unsigned long size);
2663#else
2664static inline int rdma_user_mmap_io(struct ib_ucontext *ucontext,
2665 struct vm_area_struct *vma,
2666 unsigned long pfn, unsigned long size,
2667 pgprot_t prot)
2668{
2669 return -EINVAL;
2670}
2671static inline int rdma_user_mmap_page(struct ib_ucontext *ucontext,
2672 struct vm_area_struct *vma, struct page *page,
2673 unsigned long size)
2674{
2675 return -EINVAL;
2676}
2677#endif
2678
2648static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) 2679static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
2649{ 2680{
2650 return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0; 2681 return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
@@ -2728,7 +2759,6 @@ static inline int ib_destroy_usecnt(atomic_t *usecnt,
2728 * @next_state: Next QP state 2759 * @next_state: Next QP state
2729 * @type: QP type 2760 * @type: QP type
2730 * @mask: Mask of supplied QP attributes 2761 * @mask: Mask of supplied QP attributes
2731 * @ll : link layer of port
2732 * 2762 *
2733 * This function is a helper function that a low-level driver's 2763 * This function is a helper function that a low-level driver's
2734 * modify_qp method can use to validate the consumer's input. It 2764 * modify_qp method can use to validate the consumer's input. It
@@ -2737,8 +2767,7 @@ static inline int ib_destroy_usecnt(atomic_t *usecnt,
2737 * and that the attribute mask supplied is allowed for the transition. 2767 * and that the attribute mask supplied is allowed for the transition.
2738 */ 2768 */
2739bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, 2769bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
2740 enum ib_qp_type type, enum ib_qp_attr_mask mask, 2770 enum ib_qp_type type, enum ib_qp_attr_mask mask);
2741 enum rdma_link_layer ll);
2742 2771
2743void ib_register_event_handler(struct ib_event_handler *event_handler); 2772void ib_register_event_handler(struct ib_event_handler *event_handler);
2744void ib_unregister_event_handler(struct ib_event_handler *event_handler); 2773void ib_unregister_event_handler(struct ib_event_handler *event_handler);
@@ -4167,20 +4196,6 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
4167 4196
4168} 4197}
4169 4198
4170static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
4171 struct ib_qp *qp, struct ib_device *device)
4172{
4173 uobj->object = ibflow;
4174 ibflow->uobject = uobj;
4175
4176 if (qp) {
4177 atomic_inc(&qp->usecnt);
4178 ibflow->qp = qp;
4179 }
4180
4181 ibflow->device = device;
4182}
4183
4184/** 4199/**
4185 * rdma_roce_rescan_device - Rescan all of the network devices in the system 4200 * rdma_roce_rescan_device - Rescan all of the network devices in the system
4186 * and add their gids, as needed, to the relevant RoCE devices. 4201 * and add their gids, as needed, to the relevant RoCE devices.
@@ -4205,4 +4220,26 @@ int rdma_init_netdev(struct ib_device *device, u8 port_num,
4205 void (*setup)(struct net_device *), 4220 void (*setup)(struct net_device *),
4206 struct net_device *netdev); 4221 struct net_device *netdev);
4207 4222
4223/**
4224 * rdma_set_device_sysfs_group - Set device attributes group to have
4225 * driver specific sysfs entries at
4226 * for infiniband class.
4227 *
4228 * @device: device pointer for which attributes to be created
4229 * @group: Pointer to group which should be added when device
4230 * is registered with sysfs.
4231 * rdma_set_device_sysfs_group() allows existing drivers to expose one
4232 * group per device to have sysfs attributes.
4233 *
4234 * NOTE: New drivers should not make use of this API; instead new device
4235 * parameter should be exposed via netlink command. This API and mechanism
4236 * exist only for existing drivers.
4237 */
4238static inline void
4239rdma_set_device_sysfs_group(struct ib_device *dev,
4240 const struct attribute_group *group)
4241{
4242 dev->groups[1] = group;
4243}
4244
4208#endif /* IB_VERBS_H */ 4245#endif /* IB_VERBS_H */
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 5d71a7f51a9f..60987a5903b7 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -152,7 +152,11 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
152 * @ps: RDMA port space. 152 * @ps: RDMA port space.
153 * @qp_type: type of queue pair associated with the id. 153 * @qp_type: type of queue pair associated with the id.
154 * 154 *
155 * The id holds a reference on the network namespace until it is destroyed. 155 * Returns a new rdma_cm_id. The id holds a reference on the network
156 * namespace until it is destroyed.
157 *
158 * The event handler callback serializes on the id's mutex and is
159 * allowed to sleep.
156 */ 160 */
157#define rdma_create_id(net, event_handler, context, ps, qp_type) \ 161#define rdma_create_id(net, event_handler, context, ps, qp_type) \
158 __rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \ 162 __rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \
@@ -192,7 +196,8 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr);
192 * @timeout_ms: Time to wait for resolution to complete. 196 * @timeout_ms: Time to wait for resolution to complete.
193 */ 197 */
194int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 198int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
195 const struct sockaddr *dst_addr, int timeout_ms); 199 const struct sockaddr *dst_addr,
200 unsigned long timeout_ms);
196 201
197/** 202/**
198 * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier 203 * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier
@@ -202,7 +207,7 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
202 * Users must have first called rdma_resolve_addr to resolve a dst_addr 207 * Users must have first called rdma_resolve_addr to resolve a dst_addr
203 * into an RDMA address before calling this routine. 208 * into an RDMA address before calling this routine.
204 */ 209 */
205int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms); 210int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms);
206 211
207/** 212/**
208 * rdma_create_qp - Allocate a QP and associate it with the specified RDMA 213 * rdma_create_qp - Allocate a QP and associate it with the specified RDMA
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index c369703fcd69..70218e6b5187 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -96,7 +96,7 @@ int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags);
96/** 96/**
97 * Check if there are any listeners to the netlink group 97 * Check if there are any listeners to the netlink group
98 * @group: the netlink group ID 98 * @group: the netlink group ID
99 * Returns 0 on success or a negative for no listeners. 99 * Returns true on success or false if no listeners.
100 */ 100 */
101int rdma_nl_chk_listeners(unsigned int group); 101bool rdma_nl_chk_listeners(unsigned int group);
102#endif /* _RDMA_NETLINK_H */ 102#endif /* _RDMA_NETLINK_H */
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index e79229a0cf01..3584d0816fcd 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -149,6 +149,10 @@ struct rvt_ibport {
149 149
150#define RVT_CQN_MAX 16 /* maximum length of cq name */ 150#define RVT_CQN_MAX 16 /* maximum length of cq name */
151 151
152#define RVT_SGE_COPY_MEMCPY 0
153#define RVT_SGE_COPY_CACHELESS 1
154#define RVT_SGE_COPY_ADAPTIVE 2
155
152/* 156/*
153 * Things that are driver specific, module parameters in hfi1 and qib 157 * Things that are driver specific, module parameters in hfi1 and qib
154 */ 158 */
@@ -161,6 +165,9 @@ struct rvt_driver_params {
161 */ 165 */
162 unsigned int lkey_table_size; 166 unsigned int lkey_table_size;
163 unsigned int qp_table_size; 167 unsigned int qp_table_size;
168 unsigned int sge_copy_mode;
169 unsigned int wss_threshold;
170 unsigned int wss_clean_period;
164 int qpn_start; 171 int qpn_start;
165 int qpn_inc; 172 int qpn_inc;
166 int qpn_res_start; 173 int qpn_res_start;
@@ -193,6 +200,19 @@ struct rvt_ah {
193 u8 log_pmtu; 200 u8 log_pmtu;
194}; 201};
195 202
203/* memory working set size */
204struct rvt_wss {
205 unsigned long *entries;
206 atomic_t total_count;
207 atomic_t clean_counter;
208 atomic_t clean_entry;
209
210 int threshold;
211 int num_entries;
212 long pages_mask;
213 unsigned int clean_period;
214};
215
196struct rvt_dev_info; 216struct rvt_dev_info;
197struct rvt_swqe; 217struct rvt_swqe;
198struct rvt_driver_provided { 218struct rvt_driver_provided {
@@ -211,11 +231,18 @@ struct rvt_driver_provided {
211 * version requires the s_lock not to be held. The other assumes the 231 * version requires the s_lock not to be held. The other assumes the
212 * s_lock is held. 232 * s_lock is held.
213 */ 233 */
214 void (*schedule_send)(struct rvt_qp *qp); 234 bool (*schedule_send)(struct rvt_qp *qp);
215 void (*schedule_send_no_lock)(struct rvt_qp *qp); 235 bool (*schedule_send_no_lock)(struct rvt_qp *qp);
216 236
217 /* Driver specific work request checking */ 237 /*
218 int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe); 238 * Driver specific work request setup and checking.
239 * This function is allowed to perform any setup, checks, or
240 * adjustments required to the SWQE in order to be usable by
241 * underlying protocols. This includes private data structure
242 * allocations.
243 */
244 int (*setup_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe,
245 bool *call_send);
219 246
220 /* 247 /*
221 * Sometimes rdmavt needs to kick the driver's send progress. That is 248 * Sometimes rdmavt needs to kick the driver's send progress. That is
@@ -371,6 +398,9 @@ struct rvt_dev_info {
371 /* post send table */ 398 /* post send table */
372 const struct rvt_operation_params *post_parms; 399 const struct rvt_operation_params *post_parms;
373 400
401 /* opcode translation table */
402 const enum ib_wc_opcode *wc_opcode;
403
374 /* Driver specific helper functions */ 404 /* Driver specific helper functions */
375 struct rvt_driver_provided driver_f; 405 struct rvt_driver_provided driver_f;
376 406
@@ -411,6 +441,8 @@ struct rvt_dev_info {
411 u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ 441 u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
412 spinlock_t n_mcast_grps_lock; 442 spinlock_t n_mcast_grps_lock;
413 443
444 /* Memory Working Set Size */
445 struct rvt_wss *wss;
414}; 446};
415 447
416/** 448/**
@@ -423,7 +455,14 @@ static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi,
423 const char *fmt, const char *name, 455 const char *fmt, const char *name,
424 const int unit) 456 const int unit)
425{ 457{
426 snprintf(rdi->ibdev.name, sizeof(rdi->ibdev.name), fmt, name, unit); 458 /*
459 * FIXME: rvt and its users want to touch the ibdev before
460 * registration and have things like the name work. We don't have the
461 * infrastructure in the core to support this directly today, hack it
462 * to work by setting the name manually here.
463 */
464 dev_set_name(&rdi->ibdev.dev, fmt, name, unit);
465 strlcpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX);
427} 466}
428 467
429/** 468/**
@@ -434,7 +473,7 @@ static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi,
434 */ 473 */
435static inline const char *rvt_get_ibdev_name(const struct rvt_dev_info *rdi) 474static inline const char *rvt_get_ibdev_name(const struct rvt_dev_info *rdi)
436{ 475{
437 return rdi->ibdev.name; 476 return dev_name(&rdi->ibdev.dev);
438} 477}
439 478
440static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) 479static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd)
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 927f6d5b6d0f..cbafb1878669 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -678,6 +678,13 @@ void rvt_del_timers_sync(struct rvt_qp *qp);
678void rvt_stop_rc_timers(struct rvt_qp *qp); 678void rvt_stop_rc_timers(struct rvt_qp *qp);
679void rvt_add_retry_timer(struct rvt_qp *qp); 679void rvt_add_retry_timer(struct rvt_qp *qp);
680 680
681void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss,
682 void *data, u32 length,
683 bool release, bool copy_last);
684void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
685 enum ib_wc_status status);
686void rvt_ruc_loopback(struct rvt_qp *qp);
687
681/** 688/**
682 * struct rvt_qp_iter - the iterator for QPs 689 * struct rvt_qp_iter - the iterator for QPs
683 * @qp - the current QP 690 * @qp - the current QP
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index 9654d33edd98..2638fa7cd702 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -173,16 +173,10 @@ int rdma_restrack_put(struct rdma_restrack_entry *res);
173/** 173/**
174 * rdma_restrack_set_task() - set the task for this resource 174 * rdma_restrack_set_task() - set the task for this resource
175 * @res: resource entry 175 * @res: resource entry
176 * @task: task struct 176 * @caller: kernel name, the current task will be used if the caller is NULL.
177 */ 177 */
178static inline void rdma_restrack_set_task(struct rdma_restrack_entry *res, 178void rdma_restrack_set_task(struct rdma_restrack_entry *res,
179 struct task_struct *task) 179 const char *caller);
180{
181 if (res->task)
182 put_task_struct(res->task);
183 get_task_struct(task);
184 res->task = task;
185}
186 180
187/* 181/*
188 * Helper functions for rdma drivers when filling out 182 * Helper functions for rdma drivers when filling out
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 9e997c3c2f04..84d3d15f1f38 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -52,6 +52,7 @@ enum uverbs_attr_type {
52 UVERBS_ATTR_TYPE_IDR, 52 UVERBS_ATTR_TYPE_IDR,
53 UVERBS_ATTR_TYPE_FD, 53 UVERBS_ATTR_TYPE_FD,
54 UVERBS_ATTR_TYPE_ENUM_IN, 54 UVERBS_ATTR_TYPE_ENUM_IN,
55 UVERBS_ATTR_TYPE_IDRS_ARRAY,
55}; 56};
56 57
57enum uverbs_obj_access { 58enum uverbs_obj_access {
@@ -101,7 +102,7 @@ struct uverbs_attr_spec {
101 } enum_def; 102 } enum_def;
102 } u; 103 } u;
103 104
104 /* This weird split of the enum lets us remove some padding */ 105 /* This weird split lets us remove some padding */
105 union { 106 union {
106 struct { 107 struct {
107 /* 108 /*
@@ -111,6 +112,17 @@ struct uverbs_attr_spec {
111 */ 112 */
112 const struct uverbs_attr_spec *ids; 113 const struct uverbs_attr_spec *ids;
113 } enum_def; 114 } enum_def;
115
116 struct {
117 /*
118 * higher bits mean the namespace and lower bits mean
119 * the type id within the namespace.
120 */
121 u16 obj_type;
122 u16 min_len;
123 u16 max_len;
124 u8 access;
125 } objs_arr;
114 } u2; 126 } u2;
115}; 127};
116 128
@@ -251,6 +263,11 @@ static inline __attribute_const__ u32 uapi_bkey_attr(u32 attr_key)
251 return attr_key - 1; 263 return attr_key - 1;
252} 264}
253 265
266static inline __attribute_const__ u32 uapi_bkey_to_key_attr(u32 attr_bkey)
267{
268 return attr_bkey + 1;
269}
270
254/* 271/*
255 * ======================================= 272 * =======================================
256 * Verbs definitions 273 * Verbs definitions
@@ -323,6 +340,27 @@ struct uverbs_object_tree_def {
323#define UA_MANDATORY .mandatory = 1 340#define UA_MANDATORY .mandatory = 1
324#define UA_OPTIONAL .mandatory = 0 341#define UA_OPTIONAL .mandatory = 0
325 342
343/*
344 * min_len must be bigger than 0 and _max_len must be smaller than 4095. Only
345 * READ\WRITE accesses are supported.
346 */
347#define UVERBS_ATTR_IDRS_ARR(_attr_id, _idr_type, _access, _min_len, _max_len, \
348 ...) \
349 (&(const struct uverbs_attr_def){ \
350 .id = (_attr_id) + \
351 BUILD_BUG_ON_ZERO((_min_len) == 0 || \
352 (_max_len) > \
353 PAGE_SIZE / sizeof(void *) || \
354 (_min_len) > (_max_len) || \
355 (_access) == UVERBS_ACCESS_NEW || \
356 (_access) == UVERBS_ACCESS_DESTROY), \
357 .attr = { .type = UVERBS_ATTR_TYPE_IDRS_ARRAY, \
358 .u2.objs_arr.obj_type = _idr_type, \
359 .u2.objs_arr.access = _access, \
360 .u2.objs_arr.min_len = _min_len, \
361 .u2.objs_arr.max_len = _max_len, \
362 __VA_ARGS__ } })
363
326#define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \ 364#define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \
327 (&(const struct uverbs_attr_def){ \ 365 (&(const struct uverbs_attr_def){ \
328 .id = _attr_id, \ 366 .id = _attr_id, \
@@ -365,6 +403,15 @@ struct uverbs_object_tree_def {
365 __VA_ARGS__ }, \ 403 __VA_ARGS__ }, \
366 }) 404 })
367 405
406/* An input value that is a member in the enum _enum_type. */
407#define UVERBS_ATTR_CONST_IN(_attr_id, _enum_type, ...) \
408 UVERBS_ATTR_PTR_IN( \
409 _attr_id, \
410 UVERBS_ATTR_SIZE( \
411 sizeof(u64) + BUILD_BUG_ON_ZERO(!sizeof(_enum_type)), \
412 sizeof(u64)), \
413 __VA_ARGS__)
414
368/* 415/*
369 * An input value that is a bitwise combination of values of _enum_type. 416 * An input value that is a bitwise combination of values of _enum_type.
370 * This permits the flag value to be passed as either a u32 or u64, it must 417 * This permits the flag value to be passed as either a u32 or u64, it must
@@ -431,10 +478,16 @@ struct uverbs_obj_attr {
431 const struct uverbs_api_attr *attr_elm; 478 const struct uverbs_api_attr *attr_elm;
432}; 479};
433 480
481struct uverbs_objs_arr_attr {
482 struct ib_uobject **uobjects;
483 u16 len;
484};
485
434struct uverbs_attr { 486struct uverbs_attr {
435 union { 487 union {
436 struct uverbs_ptr_attr ptr_attr; 488 struct uverbs_ptr_attr ptr_attr;
437 struct uverbs_obj_attr obj_attr; 489 struct uverbs_obj_attr obj_attr;
490 struct uverbs_objs_arr_attr objs_arr_attr;
438 }; 491 };
439}; 492};
440 493
@@ -507,6 +560,31 @@ uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx)
507 return attr->ptr_attr.len; 560 return attr->ptr_attr.len;
508} 561}
509 562
563/**
564 * uverbs_attr_get_uobjs_arr() - Provides array's properties for attribute for
565 * UVERBS_ATTR_TYPE_IDRS_ARRAY.
566 * @arr: Returned pointer to array of pointers for uobjects or NULL if
567 * the attribute isn't provided.
568 *
569 * Return: The array length or 0 if no attribute was provided.
570 */
571static inline int uverbs_attr_get_uobjs_arr(
572 const struct uverbs_attr_bundle *attrs_bundle, u16 attr_idx,
573 struct ib_uobject ***arr)
574{
575 const struct uverbs_attr *attr =
576 uverbs_attr_get(attrs_bundle, attr_idx);
577
578 if (IS_ERR(attr)) {
579 *arr = NULL;
580 return 0;
581 }
582
583 *arr = attr->objs_arr_attr.uobjects;
584
585 return attr->objs_arr_attr.len;
586}
587
510static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr) 588static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr)
511{ 589{
512 return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data); 590 return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data);
@@ -603,6 +681,9 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
603{ 681{
604 return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO); 682 return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO);
605} 683}
684int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
685 size_t idx, s64 lower_bound, u64 upper_bound,
686 s64 *def_val);
606#else 687#else
607static inline int 688static inline int
608uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, 689uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
@@ -631,6 +712,34 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
631{ 712{
632 return ERR_PTR(-EINVAL); 713 return ERR_PTR(-EINVAL);
633} 714}
715static inline int
716_uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
717 size_t idx, s64 lower_bound, u64 upper_bound,
718 s64 *def_val)
719{
720 return -EINVAL;
721}
634#endif 722#endif
635 723
724#define uverbs_get_const(_to, _attrs_bundle, _idx) \
725 ({ \
726 s64 _val; \
727 int _ret = _uverbs_get_const(&_val, _attrs_bundle, _idx, \
728 type_min(typeof(*_to)), \
729 type_max(typeof(*_to)), NULL); \
730 (*_to) = _val; \
731 _ret; \
732 })
733
734#define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default) \
735 ({ \
736 s64 _val; \
737 s64 _def_val = _default; \
738 int _ret = \
739 _uverbs_get_const(&_val, _attrs_bundle, _idx, \
740 type_min(typeof(*_to)), \
741 type_max(typeof(*_to)), &_def_val); \
742 (*_to) = _val; \
743 _ret; \
744 })
636#endif 745#endif
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 3b00231cc084..3db2802fbc68 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -140,5 +140,56 @@ __uobj_alloc(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile,
140#define uobj_alloc(_type, _ufile, _ib_dev) \ 140#define uobj_alloc(_type, _ufile, _ib_dev) \
141 __uobj_alloc(uobj_get_type(_ufile, _type), _ufile, _ib_dev) 141 __uobj_alloc(uobj_get_type(_ufile, _type), _ufile, _ib_dev)
142 142
143static inline void uverbs_flow_action_fill_action(struct ib_flow_action *action,
144 struct ib_uobject *uobj,
145 struct ib_device *ib_dev,
146 enum ib_flow_action_type type)
147{
148 atomic_set(&action->usecnt, 0);
149 action->device = ib_dev;
150 action->type = type;
151 action->uobject = uobj;
152 uobj->object = action;
153}
154
155struct ib_uflow_resources {
156 size_t max;
157 size_t num;
158 size_t collection_num;
159 size_t counters_num;
160 struct ib_counters **counters;
161 struct ib_flow_action **collection;
162};
163
164struct ib_uflow_object {
165 struct ib_uobject uobject;
166 struct ib_uflow_resources *resources;
167};
168
169struct ib_uflow_resources *flow_resources_alloc(size_t num_specs);
170void flow_resources_add(struct ib_uflow_resources *uflow_res,
171 enum ib_flow_spec_type type,
172 void *ibobj);
173void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
174
175static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
176 struct ib_qp *qp, struct ib_device *device,
177 struct ib_uflow_resources *uflow_res)
178{
179 struct ib_uflow_object *uflow;
180
181 uobj->object = ibflow;
182 ibflow->uobject = uobj;
183
184 if (qp) {
185 atomic_inc(&qp->usecnt);
186 ibflow->qp = qp;
187 }
188
189 ibflow->device = device;
190 uflow = container_of(uobj, typeof(*uflow), uobject);
191 uflow->resources = uflow_res;
192}
193
143#endif 194#endif
144 195
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 25a16760de2a..1254b51a551a 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -763,10 +763,28 @@ struct ib_uverbs_sge {
763 __u32 lkey; 763 __u32 lkey;
764}; 764};
765 765
766enum ib_uverbs_wr_opcode {
767 IB_UVERBS_WR_RDMA_WRITE = 0,
768 IB_UVERBS_WR_RDMA_WRITE_WITH_IMM = 1,
769 IB_UVERBS_WR_SEND = 2,
770 IB_UVERBS_WR_SEND_WITH_IMM = 3,
771 IB_UVERBS_WR_RDMA_READ = 4,
772 IB_UVERBS_WR_ATOMIC_CMP_AND_SWP = 5,
773 IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD = 6,
774 IB_UVERBS_WR_LOCAL_INV = 7,
775 IB_UVERBS_WR_BIND_MW = 8,
776 IB_UVERBS_WR_SEND_WITH_INV = 9,
777 IB_UVERBS_WR_TSO = 10,
778 IB_UVERBS_WR_RDMA_READ_WITH_INV = 11,
779 IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP = 12,
780 IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD = 13,
781 /* Review enum ib_wr_opcode before modifying this */
782};
783
766struct ib_uverbs_send_wr { 784struct ib_uverbs_send_wr {
767 __aligned_u64 wr_id; 785 __aligned_u64 wr_id;
768 __u32 num_sge; 786 __u32 num_sge;
769 __u32 opcode; 787 __u32 opcode; /* see enum ib_uverbs_wr_opcode */
770 __u32 send_flags; 788 __u32 send_flags;
771 union { 789 union {
772 __be32 imm_data; 790 __be32 imm_data;
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index addbb9c4529e..8fa9f90e2bb1 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -45,6 +45,9 @@ enum {
45 MLX5_QP_FLAG_BFREG_INDEX = 1 << 3, 45 MLX5_QP_FLAG_BFREG_INDEX = 1 << 3,
46 MLX5_QP_FLAG_TYPE_DCT = 1 << 4, 46 MLX5_QP_FLAG_TYPE_DCT = 1 << 4,
47 MLX5_QP_FLAG_TYPE_DCI = 1 << 5, 47 MLX5_QP_FLAG_TYPE_DCI = 1 << 5,
48 MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC = 1 << 6,
49 MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC = 1 << 7,
50 MLX5_QP_FLAG_ALLOW_SCATTER_CQE = 1 << 8,
48}; 51};
49 52
50enum { 53enum {
@@ -349,9 +352,22 @@ struct mlx5_ib_create_qp_rss {
349 __u32 flags; 352 __u32 flags;
350}; 353};
351 354
355enum mlx5_ib_create_qp_resp_mask {
356 MLX5_IB_CREATE_QP_RESP_MASK_TIRN = 1UL << 0,
357 MLX5_IB_CREATE_QP_RESP_MASK_TISN = 1UL << 1,
358 MLX5_IB_CREATE_QP_RESP_MASK_RQN = 1UL << 2,
359 MLX5_IB_CREATE_QP_RESP_MASK_SQN = 1UL << 3,
360};
361
352struct mlx5_ib_create_qp_resp { 362struct mlx5_ib_create_qp_resp {
353 __u32 bfreg_index; 363 __u32 bfreg_index;
354 __u32 reserved; 364 __u32 reserved;
365 __u32 comp_mask;
366 __u32 tirn;
367 __u32 tisn;
368 __u32 rqn;
369 __u32 sqn;
370 __u32 reserved1;
355}; 371};
356 372
357struct mlx5_ib_alloc_mw { 373struct mlx5_ib_alloc_mw {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 9c51801b9e64..408e220034de 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -125,6 +125,7 @@ enum mlx5_ib_flow_matcher_create_attrs {
125 MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK, 125 MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
126 MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE, 126 MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
127 MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA, 127 MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
128 MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
128}; 129};
129 130
130enum mlx5_ib_flow_matcher_destroy_attrs { 131enum mlx5_ib_flow_matcher_destroy_attrs {
@@ -155,6 +156,8 @@ enum mlx5_ib_create_flow_attrs {
155 MLX5_IB_ATTR_CREATE_FLOW_DEST_QP, 156 MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
156 MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX, 157 MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
157 MLX5_IB_ATTR_CREATE_FLOW_MATCHER, 158 MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
159 MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
160 MLX5_IB_ATTR_CREATE_FLOW_TAG,
158}; 161};
159 162
160enum mlx5_ib_destoy_flow_attrs { 163enum mlx5_ib_destoy_flow_attrs {
@@ -166,4 +169,22 @@ enum mlx5_ib_flow_methods {
166 MLX5_IB_METHOD_DESTROY_FLOW, 169 MLX5_IB_METHOD_DESTROY_FLOW,
167}; 170};
168 171
172enum mlx5_ib_flow_action_methods {
173 MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER = (1U << UVERBS_ID_NS_SHIFT),
174 MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
175};
176
177enum mlx5_ib_create_flow_action_create_modify_header_attrs {
178 MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
179 MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
180 MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
181};
182
183enum mlx5_ib_create_flow_action_create_packet_reformat_attrs {
184 MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
185 MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
186 MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
187 MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
188};
189
169#endif 190#endif
diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
index 8a2fb33f3ed4..4ef62c0e8452 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
@@ -39,5 +39,17 @@ enum mlx5_ib_uapi_flow_action_flags {
39 MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA = 1 << 0, 39 MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA = 1 << 0,
40}; 40};
41 41
42enum mlx5_ib_uapi_flow_table_type {
43 MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX = 0x0,
44 MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX = 0x1,
45};
46
47enum mlx5_ib_uapi_flow_action_packet_reformat_type {
48 MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2 = 0x0,
49 MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x1,
50 MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x2,
51 MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3,
52};
53
42#endif 54#endif
43 55
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index edba6351ac13..f9c41bf59efc 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -227,8 +227,9 @@ enum rdma_nldev_command {
227 RDMA_NLDEV_CMD_UNSPEC, 227 RDMA_NLDEV_CMD_UNSPEC,
228 228
229 RDMA_NLDEV_CMD_GET, /* can dump */ 229 RDMA_NLDEV_CMD_GET, /* can dump */
230 RDMA_NLDEV_CMD_SET,
230 231
231 /* 2 - 4 are free to use */ 232 /* 3 - 4 are free to use */
232 233
233 RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */ 234 RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */
234 235
diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h
index 24800c6c1f32..06c34d99be85 100644
--- a/include/uapi/rdma/rdma_user_ioctl_cmds.h
+++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h
@@ -53,7 +53,7 @@ enum {
53 53
54struct ib_uverbs_attr { 54struct ib_uverbs_attr {
55 __u16 attr_id; /* command specific type attribute */ 55 __u16 attr_id; /* command specific type attribute */
56 __u16 len; /* only for pointers */ 56 __u16 len; /* only for pointers and IDRs array */
57 __u16 flags; /* combination of UVERBS_ATTR_F_XXXX */ 57 __u16 flags; /* combination of UVERBS_ATTR_F_XXXX */
58 union { 58 union {
59 struct { 59 struct {
@@ -63,7 +63,10 @@ struct ib_uverbs_attr {
63 __u16 reserved; 63 __u16 reserved;
64 } attr_data; 64 } attr_data;
65 union { 65 union {
66 /* Used by PTR_IN/OUT, ENUM_IN and IDR */ 66 /*
67 * ptr to command, inline data, idr/fd or
68 * ptr to __u32 array of IDRs
69 */
67 __aligned_u64 data; 70 __aligned_u64 data;
68 /* Used by FD_IN and FD_OUT */ 71 /* Used by FD_IN and FD_OUT */
69 __s64 data_s64; 72 __s64 data_s64;