aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-07 16:33:07 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-07 16:33:07 -0500
commitab9f2faf8f40604551336e5b0a18e0910a57b92c (patch)
tree9068c73acf24452762d6e2b096df19e29436183e
parent75021d28594d9b6fb4d05bbc41f77948a0db0e02 (diff)
parentdb7489e07669073970358b6cacf6a9dd8dc9275e (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rdma updates from Doug Ledford: "This is my initial round of 4.4 merge window patches. There are a few other things I wish to get in for 4.4 that aren't in this pull, as this represents what has gone through merge/build/run testing and not what is the last few items for which testing is not yet complete. - "Checksum offload support in user space" enablement - Misc cxgb4 fixes, add T6 support - Misc usnic fixes - 32 bit build warning fixes - Misc ocrdma fixes - Multicast loopback prevention extension - Extend the GID cache to store and return attributes of GIDs - Misc iSER updates - iSER clustering update - Network NameSpace support for rdma CM - Work Request cleanup series - New Memory Registration API" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (76 commits) IB/core, cma: Make __attribute_const__ declarations sparse-friendly IB/core: Remove old fast registration API IB/ipath: Remove fast registration from the code IB/hfi1: Remove fast registration from the code RDMA/nes: Remove old FRWR API IB/qib: Remove old FRWR API iw_cxgb4: Remove old FRWR API RDMA/cxgb3: Remove old FRWR API RDMA/ocrdma: Remove old FRWR API IB/mlx4: Remove old FRWR API support IB/mlx5: Remove old FRWR API support IB/srp: Dont allocate a page vector when using fast_reg IB/srp: Remove srp_finish_mapping IB/srp: Convert to new registration API IB/srp: Split srp_map_sg RDS/IW: Convert to new memory registration API svcrdma: Port to new memory registration API xprtrdma: Port to new memory registration API iser-target: Port to new memory registration API IB/iser: Port to new fast registration API ...
-rw-r--r--MAINTAINERS5
-rw-r--r--drivers/infiniband/core/addr.c20
-rw-r--r--drivers/infiniband/core/agent.c2
-rw-r--r--drivers/infiniband/core/cache.c112
-rw-r--r--drivers/infiniband/core/cm.c40
-rw-r--r--drivers/infiniband/core/cma.c173
-rw-r--r--drivers/infiniband/core/core_priv.h9
-rw-r--r--drivers/infiniband/core/device.c19
-rw-r--r--drivers/infiniband/core/mad.c42
-rw-r--r--drivers/infiniband/core/mad_priv.h2
-rw-r--r--drivers/infiniband/core/multicast.c3
-rw-r--r--drivers/infiniband/core/sa_query.c19
-rw-r--r--drivers/infiniband/core/sysfs.c2
-rw-r--r--drivers/infiniband/core/ucma.c5
-rw-r--r--drivers/infiniband/core/uverbs.h1
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c402
-rw-r--r--drivers/infiniband/core/uverbs_main.c1
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c4
-rw-r--r--drivers/infiniband/core/verbs.c295
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c39
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c43
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c331
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c10
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h25
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c63
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c5
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c73
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h5
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c17
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c2
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c91
-rw-r--r--drivers/infiniband/hw/mlx4/main.c75
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c2
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h37
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c169
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c330
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c4
-rw-r--r--drivers/infiniband/hw/mlx5/main.c3
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h54
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c187
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c227
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c84
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.h6
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c170
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c22
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c60
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c19
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c184
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h7
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c41
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c46
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c38
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c20
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c20
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c29
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h19
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c9
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c22
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c6
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c20
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h25
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c51
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c370
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c20
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c269
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h8
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c262
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h11
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c35
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h41
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c22
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h85
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_msg.h48
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_main.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_resources.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/qp.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c30
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h6
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c35
-rw-r--r--drivers/staging/rdma/amso1100/c2_qp.c8
-rw-r--r--drivers/staging/rdma/ehca/ehca_reqs.c53
-rw-r--r--drivers/staging/rdma/hfi1/keys.c55
-rw-r--r--drivers/staging/rdma/hfi1/mr.c33
-rw-r--r--drivers/staging/rdma/hfi1/qp.c2
-rw-r--r--drivers/staging/rdma/hfi1/rc.c24
-rw-r--r--drivers/staging/rdma/hfi1/ruc.c18
-rw-r--r--drivers/staging/rdma/hfi1/uc.c4
-rw-r--r--drivers/staging/rdma/hfi1/ud.c20
-rw-r--r--drivers/staging/rdma/hfi1/verbs.c26
-rw-r--r--drivers/staging/rdma/hfi1/verbs.h14
-rw-r--r--drivers/staging/rdma/ipath/ipath_rc.c24
-rw-r--r--drivers/staging/rdma/ipath/ipath_ruc.c16
-rw-r--r--drivers/staging/rdma/ipath/ipath_uc.c4
-rw-r--r--drivers/staging/rdma/ipath/ipath_ud.c26
-rw-r--r--drivers/staging/rdma/ipath/ipath_verbs.c17
-rw-r--r--drivers/staging/rdma/ipath/ipath_verbs.h8
-rw-r--r--include/linux/mlx4/device.h2
-rw-r--r--include/linux/mlx4/qp.h24
-rw-r--r--include/linux/sunrpc/svc_rdma.h6
-rw-r--r--include/rdma/ib_addr.h18
-rw-r--r--include/rdma/ib_cache.h40
-rw-r--r--include/rdma/ib_pack.h2
-rw-r--r--include/rdma/ib_sa.h12
-rw-r--r--include/rdma/ib_verbs.h222
-rw-r--r--include/rdma/rdma_cm.h8
-rw-r--r--include/uapi/rdma/ib_user_verbs.h26
-rw-r--r--net/9p/trans_rdma.c4
-rw-r--r--net/rds/ib.c2
-rw-r--r--net/rds/ib.h6
-rw-r--r--net/rds/ib_cm.c2
-rw-r--r--net/rds/ib_send.c71
-rw-r--r--net/rds/iw.c2
-rw-r--r--net/rds/iw.h9
-rw-r--r--net/rds/iw_cm.c2
-rw-r--r--net/rds/iw_rdma.c129
-rw-r--r--net/rds/iw_send.c154
-rw-r--r--net/rds/rdma_transport.c4
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c119
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c123
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c18
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c38
-rw-r--r--net/sunrpc/xprtrdma/verbs.c3
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h3
140 files changed, 3600 insertions, 3015 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index f56a10a3eabc..4c5446a6a4a2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2757,9 +2757,10 @@ S: Supported
2757F: drivers/net/ethernet/cisco/enic/ 2757F: drivers/net/ethernet/cisco/enic/
2758 2758
2759CISCO VIC LOW LATENCY NIC DRIVER 2759CISCO VIC LOW LATENCY NIC DRIVER
2760M: Upinder Malhi <umalhi@cisco.com> 2760M: Christian Benvenuti <benve@cisco.com>
2761M: Dave Goodell <dgoodell@cisco.com>
2761S: Supported 2762S: Supported
2762F: drivers/infiniband/hw/usnic 2763F: drivers/infiniband/hw/usnic/
2763 2764
2764CIRRUS LOGIC EP93XX ETHERNET DRIVER 2765CIRRUS LOGIC EP93XX ETHERNET DRIVER
2765M: Hartley Sweeten <hsweeten@visionengravers.com> 2766M: Hartley Sweeten <hsweeten@visionengravers.com>
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 746cdf56bc76..34b1adad07aa 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -128,7 +128,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
128 int ret = -EADDRNOTAVAIL; 128 int ret = -EADDRNOTAVAIL;
129 129
130 if (dev_addr->bound_dev_if) { 130 if (dev_addr->bound_dev_if) {
131 dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); 131 dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
132 if (!dev) 132 if (!dev)
133 return -ENODEV; 133 return -ENODEV;
134 ret = rdma_copy_addr(dev_addr, dev, NULL); 134 ret = rdma_copy_addr(dev_addr, dev, NULL);
@@ -138,7 +138,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
138 138
139 switch (addr->sa_family) { 139 switch (addr->sa_family) {
140 case AF_INET: 140 case AF_INET:
141 dev = ip_dev_find(&init_net, 141 dev = ip_dev_find(dev_addr->net,
142 ((struct sockaddr_in *) addr)->sin_addr.s_addr); 142 ((struct sockaddr_in *) addr)->sin_addr.s_addr);
143 143
144 if (!dev) 144 if (!dev)
@@ -149,12 +149,11 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
149 *vlan_id = rdma_vlan_dev_vlan_id(dev); 149 *vlan_id = rdma_vlan_dev_vlan_id(dev);
150 dev_put(dev); 150 dev_put(dev);
151 break; 151 break;
152
153#if IS_ENABLED(CONFIG_IPV6) 152#if IS_ENABLED(CONFIG_IPV6)
154 case AF_INET6: 153 case AF_INET6:
155 rcu_read_lock(); 154 rcu_read_lock();
156 for_each_netdev_rcu(&init_net, dev) { 155 for_each_netdev_rcu(dev_addr->net, dev) {
157 if (ipv6_chk_addr(&init_net, 156 if (ipv6_chk_addr(dev_addr->net,
158 &((struct sockaddr_in6 *) addr)->sin6_addr, 157 &((struct sockaddr_in6 *) addr)->sin6_addr,
159 dev, 1)) { 158 dev, 1)) {
160 ret = rdma_copy_addr(dev_addr, dev, NULL); 159 ret = rdma_copy_addr(dev_addr, dev, NULL);
@@ -236,7 +235,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,
236 fl4.daddr = dst_ip; 235 fl4.daddr = dst_ip;
237 fl4.saddr = src_ip; 236 fl4.saddr = src_ip;
238 fl4.flowi4_oif = addr->bound_dev_if; 237 fl4.flowi4_oif = addr->bound_dev_if;
239 rt = ip_route_output_key(&init_net, &fl4); 238 rt = ip_route_output_key(addr->net, &fl4);
240 if (IS_ERR(rt)) { 239 if (IS_ERR(rt)) {
241 ret = PTR_ERR(rt); 240 ret = PTR_ERR(rt);
242 goto out; 241 goto out;
@@ -278,12 +277,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
278 fl6.saddr = src_in->sin6_addr; 277 fl6.saddr = src_in->sin6_addr;
279 fl6.flowi6_oif = addr->bound_dev_if; 278 fl6.flowi6_oif = addr->bound_dev_if;
280 279
281 dst = ip6_route_output(&init_net, NULL, &fl6); 280 dst = ip6_route_output(addr->net, NULL, &fl6);
282 if ((ret = dst->error)) 281 if ((ret = dst->error))
283 goto put; 282 goto put;
284 283
285 if (ipv6_addr_any(&fl6.saddr)) { 284 if (ipv6_addr_any(&fl6.saddr)) {
286 ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, 285 ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
287 &fl6.daddr, 0, &fl6.saddr); 286 &fl6.daddr, 0, &fl6.saddr);
288 if (ret) 287 if (ret)
289 goto put; 288 goto put;
@@ -458,7 +457,7 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
458} 457}
459 458
460int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, 459int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
461 u8 *dmac, u16 *vlan_id) 460 u8 *dmac, u16 *vlan_id, int if_index)
462{ 461{
463 int ret = 0; 462 int ret = 0;
464 struct rdma_dev_addr dev_addr; 463 struct rdma_dev_addr dev_addr;
@@ -476,6 +475,8 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
476 rdma_gid2ip(&dgid_addr._sockaddr, dgid); 475 rdma_gid2ip(&dgid_addr._sockaddr, dgid);
477 476
478 memset(&dev_addr, 0, sizeof(dev_addr)); 477 memset(&dev_addr, 0, sizeof(dev_addr));
478 dev_addr.bound_dev_if = if_index;
479 dev_addr.net = &init_net;
479 480
480 ctx.addr = &dev_addr; 481 ctx.addr = &dev_addr;
481 init_completion(&ctx.comp); 482 init_completion(&ctx.comp);
@@ -510,6 +511,7 @@ int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
510 rdma_gid2ip(&gid_addr._sockaddr, sgid); 511 rdma_gid2ip(&gid_addr._sockaddr, sgid);
511 512
512 memset(&dev_addr, 0, sizeof(dev_addr)); 513 memset(&dev_addr, 0, sizeof(dev_addr));
514 dev_addr.net = &init_net;
513 ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); 515 ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
514 if (ret) 516 if (ret)
515 return ret; 517 return ret;
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 0429040304fd..4fa524dfb6cf 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -126,7 +126,7 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *
126 mad_send_wr = container_of(send_buf, 126 mad_send_wr = container_of(send_buf,
127 struct ib_mad_send_wr_private, 127 struct ib_mad_send_wr_private,
128 send_buf); 128 send_buf);
129 mad_send_wr->send_wr.wr.ud.port_num = port_num; 129 mad_send_wr->send_wr.port_num = port_num;
130 } 130 }
131 131
132 if (ib_post_send_mad(send_buf, NULL)) { 132 if (ib_post_send_mad(send_buf, NULL)) {
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 87471ef37198..89bebeada38b 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -409,10 +409,10 @@ static int ib_cache_gid_find(struct ib_device *ib_dev,
409 mask, port, index); 409 mask, port, index);
410} 410}
411 411
412int ib_cache_gid_find_by_port(struct ib_device *ib_dev, 412int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
413 const union ib_gid *gid, 413 const union ib_gid *gid,
414 u8 port, struct net_device *ndev, 414 u8 port, struct net_device *ndev,
415 u16 *index) 415 u16 *index)
416{ 416{
417 int local_index; 417 int local_index;
418 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 418 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
@@ -438,6 +438,82 @@ int ib_cache_gid_find_by_port(struct ib_device *ib_dev,
438 438
439 return -ENOENT; 439 return -ENOENT;
440} 440}
441EXPORT_SYMBOL(ib_find_cached_gid_by_port);
442
443/**
444 * ib_find_gid_by_filter - Returns the GID table index where a specified
445 * GID value occurs
446 * @device: The device to query.
447 * @gid: The GID value to search for.
448 * @port_num: The port number of the device where the GID value could be
449 * searched.
450 * @filter: The filter function is executed on any matching GID in the table.
451 * If the filter function returns true, the corresponding index is returned,
452 * otherwise, we continue searching the GID table. It's guaranteed that
453 * while filter is executed, ndev field is valid and the structure won't
454 * change. filter is executed in an atomic context. filter must not be NULL.
455 * @index: The index into the cached GID table where the GID was found. This
456 * parameter may be NULL.
457 *
458 * ib_cache_gid_find_by_filter() searches for the specified GID value
459 * of which the filter function returns true in the port's GID table.
460 * This function is only supported on RoCE ports.
461 *
462 */
463static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
464 const union ib_gid *gid,
465 u8 port,
466 bool (*filter)(const union ib_gid *,
467 const struct ib_gid_attr *,
468 void *),
469 void *context,
470 u16 *index)
471{
472 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
473 struct ib_gid_table *table;
474 unsigned int i;
475 bool found = false;
476
477 if (!ports_table)
478 return -EOPNOTSUPP;
479
480 if (port < rdma_start_port(ib_dev) ||
481 port > rdma_end_port(ib_dev) ||
482 !rdma_protocol_roce(ib_dev, port))
483 return -EPROTONOSUPPORT;
484
485 table = ports_table[port - rdma_start_port(ib_dev)];
486
487 for (i = 0; i < table->sz; i++) {
488 struct ib_gid_attr attr;
489 unsigned long flags;
490
491 read_lock_irqsave(&table->data_vec[i].lock, flags);
492 if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
493 goto next;
494
495 if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
496 goto next;
497
498 memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
499
500 if (filter(gid, &attr, context))
501 found = true;
502
503next:
504 read_unlock_irqrestore(&table->data_vec[i].lock, flags);
505
506 if (found)
507 break;
508 }
509
510 if (!found)
511 return -ENOENT;
512
513 if (index)
514 *index = i;
515 return 0;
516}
441 517
442static struct ib_gid_table *alloc_gid_table(int sz) 518static struct ib_gid_table *alloc_gid_table(int sz)
443{ 519{
@@ -649,24 +725,44 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
649int ib_get_cached_gid(struct ib_device *device, 725int ib_get_cached_gid(struct ib_device *device,
650 u8 port_num, 726 u8 port_num,
651 int index, 727 int index,
652 union ib_gid *gid) 728 union ib_gid *gid,
729 struct ib_gid_attr *gid_attr)
653{ 730{
654 if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) 731 if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
655 return -EINVAL; 732 return -EINVAL;
656 733
657 return __ib_cache_gid_get(device, port_num, index, gid, NULL); 734 return __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
658} 735}
659EXPORT_SYMBOL(ib_get_cached_gid); 736EXPORT_SYMBOL(ib_get_cached_gid);
660 737
661int ib_find_cached_gid(struct ib_device *device, 738int ib_find_cached_gid(struct ib_device *device,
662 const union ib_gid *gid, 739 const union ib_gid *gid,
740 struct net_device *ndev,
663 u8 *port_num, 741 u8 *port_num,
664 u16 *index) 742 u16 *index)
665{ 743{
666 return ib_cache_gid_find(device, gid, NULL, port_num, index); 744 return ib_cache_gid_find(device, gid, ndev, port_num, index);
667} 745}
668EXPORT_SYMBOL(ib_find_cached_gid); 746EXPORT_SYMBOL(ib_find_cached_gid);
669 747
748int ib_find_gid_by_filter(struct ib_device *device,
749 const union ib_gid *gid,
750 u8 port_num,
751 bool (*filter)(const union ib_gid *gid,
752 const struct ib_gid_attr *,
753 void *),
754 void *context, u16 *index)
755{
756 /* Only RoCE GID table supports filter function */
757 if (!rdma_cap_roce_gid_table(device, port_num) && filter)
758 return -EPROTONOSUPPORT;
759
760 return ib_cache_gid_find_by_filter(device, gid,
761 port_num, filter,
762 context, index);
763}
764EXPORT_SYMBOL(ib_find_gid_by_filter);
765
670int ib_get_cached_pkey(struct ib_device *device, 766int ib_get_cached_pkey(struct ib_device *device,
671 u8 port_num, 767 u8 port_num,
672 int index, 768 int index,
@@ -845,7 +941,7 @@ static void ib_cache_update(struct ib_device *device,
845 if (!use_roce_gid_table) { 941 if (!use_roce_gid_table) {
846 for (i = 0; i < gid_cache->table_len; ++i) { 942 for (i = 0; i < gid_cache->table_len; ++i) {
847 ret = ib_query_gid(device, port, i, 943 ret = ib_query_gid(device, port, i,
848 gid_cache->table + i); 944 gid_cache->table + i, NULL);
849 if (ret) { 945 if (ret) {
850 printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n", 946 printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n",
851 ret, device->name, i); 947 ret, device->name, i);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 4f918b929eca..0a26dd6d9b19 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -179,8 +179,6 @@ struct cm_av {
179 struct ib_ah_attr ah_attr; 179 struct ib_ah_attr ah_attr;
180 u16 pkey_index; 180 u16 pkey_index;
181 u8 timeout; 181 u8 timeout;
182 u8 valid;
183 u8 smac[ETH_ALEN];
184}; 182};
185 183
186struct cm_work { 184struct cm_work {
@@ -361,17 +359,21 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
361 unsigned long flags; 359 unsigned long flags;
362 int ret; 360 int ret;
363 u8 p; 361 u8 p;
362 struct net_device *ndev = ib_get_ndev_from_path(path);
364 363
365 read_lock_irqsave(&cm.device_lock, flags); 364 read_lock_irqsave(&cm.device_lock, flags);
366 list_for_each_entry(cm_dev, &cm.device_list, list) { 365 list_for_each_entry(cm_dev, &cm.device_list, list) {
367 if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, 366 if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
368 &p, NULL)) { 367 ndev, &p, NULL)) {
369 port = cm_dev->port[p-1]; 368 port = cm_dev->port[p-1];
370 break; 369 break;
371 } 370 }
372 } 371 }
373 read_unlock_irqrestore(&cm.device_lock, flags); 372 read_unlock_irqrestore(&cm.device_lock, flags);
374 373
374 if (ndev)
375 dev_put(ndev);
376
375 if (!port) 377 if (!port)
376 return -EINVAL; 378 return -EINVAL;
377 379
@@ -384,9 +386,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
384 ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path, 386 ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
385 &av->ah_attr); 387 &av->ah_attr);
386 av->timeout = path->packet_life_time + 1; 388 av->timeout = path->packet_life_time + 1;
387 memcpy(av->smac, path->smac, sizeof(av->smac));
388 389
389 av->valid = 1;
390 return 0; 390 return 0;
391} 391}
392 392
@@ -1639,11 +1639,11 @@ static int cm_req_handler(struct cm_work *work)
1639 cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); 1639 cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
1640 1640
1641 memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN); 1641 memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
1642 work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
1643 ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); 1642 ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
1644 if (ret) { 1643 if (ret) {
1645 ib_get_cached_gid(work->port->cm_dev->ib_device, 1644 ib_get_cached_gid(work->port->cm_dev->ib_device,
1646 work->port->port_num, 0, &work->path[0].sgid); 1645 work->port->port_num, 0, &work->path[0].sgid,
1646 NULL);
1647 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, 1647 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1648 &work->path[0].sgid, sizeof work->path[0].sgid, 1648 &work->path[0].sgid, sizeof work->path[0].sgid,
1649 NULL, 0); 1649 NULL, 0);
@@ -3618,32 +3618,6 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3618 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | 3618 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
3619 IB_QP_DEST_QPN | IB_QP_RQ_PSN; 3619 IB_QP_DEST_QPN | IB_QP_RQ_PSN;
3620 qp_attr->ah_attr = cm_id_priv->av.ah_attr; 3620 qp_attr->ah_attr = cm_id_priv->av.ah_attr;
3621 if (!cm_id_priv->av.valid) {
3622 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3623 return -EINVAL;
3624 }
3625 if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) {
3626 qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id;
3627 *qp_attr_mask |= IB_QP_VID;
3628 }
3629 if (!is_zero_ether_addr(cm_id_priv->av.smac)) {
3630 memcpy(qp_attr->smac, cm_id_priv->av.smac,
3631 sizeof(qp_attr->smac));
3632 *qp_attr_mask |= IB_QP_SMAC;
3633 }
3634 if (cm_id_priv->alt_av.valid) {
3635 if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) {
3636 qp_attr->alt_vlan_id =
3637 cm_id_priv->alt_av.ah_attr.vlan_id;
3638 *qp_attr_mask |= IB_QP_ALT_VID;
3639 }
3640 if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) {
3641 memcpy(qp_attr->alt_smac,
3642 cm_id_priv->alt_av.smac,
3643 sizeof(qp_attr->alt_smac));
3644 *qp_attr_mask |= IB_QP_ALT_SMAC;
3645 }
3646 }
3647 qp_attr->path_mtu = cm_id_priv->path_mtu; 3621 qp_attr->path_mtu = cm_id_priv->path_mtu;
3648 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); 3622 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
3649 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); 3623 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 36b12d560e17..944cd90417bc 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -44,6 +44,8 @@
44#include <linux/module.h> 44#include <linux/module.h>
45#include <net/route.h> 45#include <net/route.h>
46 46
47#include <net/net_namespace.h>
48#include <net/netns/generic.h>
47#include <net/tcp.h> 49#include <net/tcp.h>
48#include <net/ipv6.h> 50#include <net/ipv6.h>
49#include <net/ip_fib.h> 51#include <net/ip_fib.h>
@@ -86,7 +88,7 @@ static const char * const cma_events[] = {
86 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", 88 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit",
87}; 89};
88 90
89const char *rdma_event_msg(enum rdma_cm_event_type event) 91const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
90{ 92{
91 size_t index = event; 93 size_t index = event;
92 94
@@ -110,22 +112,33 @@ static LIST_HEAD(dev_list);
110static LIST_HEAD(listen_any_list); 112static LIST_HEAD(listen_any_list);
111static DEFINE_MUTEX(lock); 113static DEFINE_MUTEX(lock);
112static struct workqueue_struct *cma_wq; 114static struct workqueue_struct *cma_wq;
113static DEFINE_IDR(tcp_ps); 115static int cma_pernet_id;
114static DEFINE_IDR(udp_ps);
115static DEFINE_IDR(ipoib_ps);
116static DEFINE_IDR(ib_ps);
117 116
118static struct idr *cma_idr(enum rdma_port_space ps) 117struct cma_pernet {
118 struct idr tcp_ps;
119 struct idr udp_ps;
120 struct idr ipoib_ps;
121 struct idr ib_ps;
122};
123
124static struct cma_pernet *cma_pernet(struct net *net)
125{
126 return net_generic(net, cma_pernet_id);
127}
128
129static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps)
119{ 130{
131 struct cma_pernet *pernet = cma_pernet(net);
132
120 switch (ps) { 133 switch (ps) {
121 case RDMA_PS_TCP: 134 case RDMA_PS_TCP:
122 return &tcp_ps; 135 return &pernet->tcp_ps;
123 case RDMA_PS_UDP: 136 case RDMA_PS_UDP:
124 return &udp_ps; 137 return &pernet->udp_ps;
125 case RDMA_PS_IPOIB: 138 case RDMA_PS_IPOIB:
126 return &ipoib_ps; 139 return &pernet->ipoib_ps;
127 case RDMA_PS_IB: 140 case RDMA_PS_IB:
128 return &ib_ps; 141 return &pernet->ib_ps;
129 default: 142 default:
130 return NULL; 143 return NULL;
131 } 144 }
@@ -145,24 +158,25 @@ struct rdma_bind_list {
145 unsigned short port; 158 unsigned short port;
146}; 159};
147 160
148static int cma_ps_alloc(enum rdma_port_space ps, 161static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
149 struct rdma_bind_list *bind_list, int snum) 162 struct rdma_bind_list *bind_list, int snum)
150{ 163{
151 struct idr *idr = cma_idr(ps); 164 struct idr *idr = cma_pernet_idr(net, ps);
152 165
153 return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); 166 return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL);
154} 167}
155 168
156static struct rdma_bind_list *cma_ps_find(enum rdma_port_space ps, int snum) 169static struct rdma_bind_list *cma_ps_find(struct net *net,
170 enum rdma_port_space ps, int snum)
157{ 171{
158 struct idr *idr = cma_idr(ps); 172 struct idr *idr = cma_pernet_idr(net, ps);
159 173
160 return idr_find(idr, snum); 174 return idr_find(idr, snum);
161} 175}
162 176
163static void cma_ps_remove(enum rdma_port_space ps, int snum) 177static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum)
164{ 178{
165 struct idr *idr = cma_idr(ps); 179 struct idr *idr = cma_pernet_idr(net, ps);
166 180
167 idr_remove(idr, snum); 181 idr_remove(idr, snum);
168} 182}
@@ -427,10 +441,11 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
427} 441}
428 442
429static inline int cma_validate_port(struct ib_device *device, u8 port, 443static inline int cma_validate_port(struct ib_device *device, u8 port,
430 union ib_gid *gid, int dev_type) 444 union ib_gid *gid, int dev_type,
445 int bound_if_index)
431{ 446{
432 u8 found_port;
433 int ret = -ENODEV; 447 int ret = -ENODEV;
448 struct net_device *ndev = NULL;
434 449
435 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) 450 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
436 return ret; 451 return ret;
@@ -438,9 +453,13 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
438 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) 453 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
439 return ret; 454 return ret;
440 455
441 ret = ib_find_cached_gid(device, gid, &found_port, NULL); 456 if (dev_type == ARPHRD_ETHER)
442 if (port != found_port) 457 ndev = dev_get_by_index(&init_net, bound_if_index);
443 return -ENODEV; 458
459 ret = ib_find_cached_gid_by_port(device, gid, port, ndev, NULL);
460
461 if (ndev)
462 dev_put(ndev);
444 463
445 return ret; 464 return ret;
446} 465}
@@ -472,7 +491,8 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
472 &iboe_gid : &gid; 491 &iboe_gid : &gid;
473 492
474 ret = cma_validate_port(cma_dev->device, port, gidp, 493 ret = cma_validate_port(cma_dev->device, port, gidp,
475 dev_addr->dev_type); 494 dev_addr->dev_type,
495 dev_addr->bound_dev_if);
476 if (!ret) { 496 if (!ret) {
477 id_priv->id.port_num = port; 497 id_priv->id.port_num = port;
478 goto out; 498 goto out;
@@ -490,7 +510,8 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
490 &iboe_gid : &gid; 510 &iboe_gid : &gid;
491 511
492 ret = cma_validate_port(cma_dev->device, port, gidp, 512 ret = cma_validate_port(cma_dev->device, port, gidp,
493 dev_addr->dev_type); 513 dev_addr->dev_type,
514 dev_addr->bound_dev_if);
494 if (!ret) { 515 if (!ret) {
495 id_priv->id.port_num = port; 516 id_priv->id.port_num = port;
496 goto out; 517 goto out;
@@ -531,7 +552,9 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
531 if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) 552 if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
532 continue; 553 continue;
533 554
534 for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid); i++) { 555 for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
556 &gid, NULL);
557 i++) {
535 if (!memcmp(&gid, dgid, sizeof(gid))) { 558 if (!memcmp(&gid, dgid, sizeof(gid))) {
536 cma_dev = cur_dev; 559 cma_dev = cur_dev;
537 sgid = gid; 560 sgid = gid;
@@ -577,7 +600,8 @@ static int cma_disable_callback(struct rdma_id_private *id_priv,
577 return 0; 600 return 0;
578} 601}
579 602
580struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, 603struct rdma_cm_id *rdma_create_id(struct net *net,
604 rdma_cm_event_handler event_handler,
581 void *context, enum rdma_port_space ps, 605 void *context, enum rdma_port_space ps,
582 enum ib_qp_type qp_type) 606 enum ib_qp_type qp_type)
583{ 607{
@@ -601,6 +625,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
601 INIT_LIST_HEAD(&id_priv->listen_list); 625 INIT_LIST_HEAD(&id_priv->listen_list);
602 INIT_LIST_HEAD(&id_priv->mc_list); 626 INIT_LIST_HEAD(&id_priv->mc_list);
603 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); 627 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
628 id_priv->id.route.addr.dev_addr.net = get_net(net);
604 629
605 return &id_priv->id; 630 return &id_priv->id;
606} 631}
@@ -718,18 +743,12 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
718 goto out; 743 goto out;
719 744
720 ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, 745 ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
721 qp_attr.ah_attr.grh.sgid_index, &sgid); 746 qp_attr.ah_attr.grh.sgid_index, &sgid, NULL);
722 if (ret) 747 if (ret)
723 goto out; 748 goto out;
724 749
725 BUG_ON(id_priv->cma_dev->device != id_priv->id.device); 750 BUG_ON(id_priv->cma_dev->device != id_priv->id.device);
726 751
727 if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) {
728 ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL);
729
730 if (ret)
731 goto out;
732 }
733 if (conn_param) 752 if (conn_param)
734 qp_attr.max_dest_rd_atomic = conn_param->responder_resources; 753 qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
735 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 754 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
@@ -1260,7 +1279,7 @@ static bool cma_match_net_dev(const struct rdma_id_private *id_priv,
1260 cma_protocol_roce(&id_priv->id); 1279 cma_protocol_roce(&id_priv->id);
1261 1280
1262 return !addr->dev_addr.bound_dev_if || 1281 return !addr->dev_addr.bound_dev_if ||
1263 (net_eq(dev_net(net_dev), &init_net) && 1282 (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
1264 addr->dev_addr.bound_dev_if == net_dev->ifindex); 1283 addr->dev_addr.bound_dev_if == net_dev->ifindex);
1265} 1284}
1266 1285
@@ -1321,7 +1340,8 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
1321 } 1340 }
1322 } 1341 }
1323 1342
1324 bind_list = cma_ps_find(rdma_ps_from_service_id(req.service_id), 1343 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
1344 rdma_ps_from_service_id(req.service_id),
1325 cma_port_from_service_id(req.service_id)); 1345 cma_port_from_service_id(req.service_id));
1326 id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); 1346 id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
1327 if (IS_ERR(id_priv) && *net_dev) { 1347 if (IS_ERR(id_priv) && *net_dev) {
@@ -1392,6 +1412,7 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
1392static void cma_release_port(struct rdma_id_private *id_priv) 1412static void cma_release_port(struct rdma_id_private *id_priv)
1393{ 1413{
1394 struct rdma_bind_list *bind_list = id_priv->bind_list; 1414 struct rdma_bind_list *bind_list = id_priv->bind_list;
1415 struct net *net = id_priv->id.route.addr.dev_addr.net;
1395 1416
1396 if (!bind_list) 1417 if (!bind_list)
1397 return; 1418 return;
@@ -1399,7 +1420,7 @@ static void cma_release_port(struct rdma_id_private *id_priv)
1399 mutex_lock(&lock); 1420 mutex_lock(&lock);
1400 hlist_del(&id_priv->node); 1421 hlist_del(&id_priv->node);
1401 if (hlist_empty(&bind_list->owners)) { 1422 if (hlist_empty(&bind_list->owners)) {
1402 cma_ps_remove(bind_list->ps, bind_list->port); 1423 cma_ps_remove(net, bind_list->ps, bind_list->port);
1403 kfree(bind_list); 1424 kfree(bind_list);
1404 } 1425 }
1405 mutex_unlock(&lock); 1426 mutex_unlock(&lock);
@@ -1458,6 +1479,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
1458 cma_deref_id(id_priv->id.context); 1479 cma_deref_id(id_priv->id.context);
1459 1480
1460 kfree(id_priv->id.route.path_rec); 1481 kfree(id_priv->id.route.path_rec);
1482 put_net(id_priv->id.route.addr.dev_addr.net);
1461 kfree(id_priv); 1483 kfree(id_priv);
1462} 1484}
1463EXPORT_SYMBOL(rdma_destroy_id); 1485EXPORT_SYMBOL(rdma_destroy_id);
@@ -1588,7 +1610,8 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
1588 ib_event->param.req_rcvd.primary_path->service_id; 1610 ib_event->param.req_rcvd.primary_path->service_id;
1589 int ret; 1611 int ret;
1590 1612
1591 id = rdma_create_id(listen_id->event_handler, listen_id->context, 1613 id = rdma_create_id(listen_id->route.addr.dev_addr.net,
1614 listen_id->event_handler, listen_id->context,
1592 listen_id->ps, ib_event->param.req_rcvd.qp_type); 1615 listen_id->ps, ib_event->param.req_rcvd.qp_type);
1593 if (IS_ERR(id)) 1616 if (IS_ERR(id))
1594 return NULL; 1617 return NULL;
@@ -1643,9 +1666,10 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1643 struct rdma_id_private *id_priv; 1666 struct rdma_id_private *id_priv;
1644 struct rdma_cm_id *id; 1667 struct rdma_cm_id *id;
1645 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 1668 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
1669 struct net *net = listen_id->route.addr.dev_addr.net;
1646 int ret; 1670 int ret;
1647 1671
1648 id = rdma_create_id(listen_id->event_handler, listen_id->context, 1672 id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
1649 listen_id->ps, IB_QPT_UD); 1673 listen_id->ps, IB_QPT_UD);
1650 if (IS_ERR(id)) 1674 if (IS_ERR(id))
1651 return NULL; 1675 return NULL;
@@ -1882,7 +1906,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1882 return -ECONNABORTED; 1906 return -ECONNABORTED;
1883 1907
1884 /* Create a new RDMA id for the new IW CM ID */ 1908 /* Create a new RDMA id for the new IW CM ID */
1885 new_cm_id = rdma_create_id(listen_id->id.event_handler, 1909 new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
1910 listen_id->id.event_handler,
1886 listen_id->id.context, 1911 listen_id->id.context,
1887 RDMA_PS_TCP, IB_QPT_RC); 1912 RDMA_PS_TCP, IB_QPT_RC);
1888 if (IS_ERR(new_cm_id)) { 1913 if (IS_ERR(new_cm_id)) {
@@ -2010,12 +2035,13 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
2010{ 2035{
2011 struct rdma_id_private *dev_id_priv; 2036 struct rdma_id_private *dev_id_priv;
2012 struct rdma_cm_id *id; 2037 struct rdma_cm_id *id;
2038 struct net *net = id_priv->id.route.addr.dev_addr.net;
2013 int ret; 2039 int ret;
2014 2040
2015 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) 2041 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
2016 return; 2042 return;
2017 2043
2018 id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps, 2044 id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
2019 id_priv->id.qp_type); 2045 id_priv->id.qp_type);
2020 if (IS_ERR(id)) 2046 if (IS_ERR(id))
2021 return; 2047 return;
@@ -2294,16 +2320,17 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
2294 2320
2295 route->num_paths = 1; 2321 route->num_paths = 1;
2296 2322
2297 if (addr->dev_addr.bound_dev_if) 2323 if (addr->dev_addr.bound_dev_if) {
2298 ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); 2324 ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
2325 route->path_rec->net = &init_net;
2326 route->path_rec->ifindex = addr->dev_addr.bound_dev_if;
2327 }
2299 if (!ndev) { 2328 if (!ndev) {
2300 ret = -ENODEV; 2329 ret = -ENODEV;
2301 goto err2; 2330 goto err2;
2302 } 2331 }
2303 2332
2304 route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev);
2305 memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); 2333 memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
2306 memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len);
2307 2334
2308 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 2335 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
2309 &route->path_rec->sgid); 2336 &route->path_rec->sgid);
@@ -2426,7 +2453,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
2426 p = 1; 2453 p = 1;
2427 2454
2428port_found: 2455port_found:
2429 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid); 2456 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL);
2430 if (ret) 2457 if (ret)
2431 goto out; 2458 goto out;
2432 2459
@@ -2688,7 +2715,8 @@ static int cma_alloc_port(enum rdma_port_space ps,
2688 if (!bind_list) 2715 if (!bind_list)
2689 return -ENOMEM; 2716 return -ENOMEM;
2690 2717
2691 ret = cma_ps_alloc(ps, bind_list, snum); 2718 ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list,
2719 snum);
2692 if (ret < 0) 2720 if (ret < 0)
2693 goto err; 2721 goto err;
2694 2722
@@ -2707,13 +2735,14 @@ static int cma_alloc_any_port(enum rdma_port_space ps,
2707 static unsigned int last_used_port; 2735 static unsigned int last_used_port;
2708 int low, high, remaining; 2736 int low, high, remaining;
2709 unsigned int rover; 2737 unsigned int rover;
2738 struct net *net = id_priv->id.route.addr.dev_addr.net;
2710 2739
2711 inet_get_local_port_range(&init_net, &low, &high); 2740 inet_get_local_port_range(net, &low, &high);
2712 remaining = (high - low) + 1; 2741 remaining = (high - low) + 1;
2713 rover = prandom_u32() % remaining + low; 2742 rover = prandom_u32() % remaining + low;
2714retry: 2743retry:
2715 if (last_used_port != rover && 2744 if (last_used_port != rover &&
2716 !cma_ps_find(ps, (unsigned short)rover)) { 2745 !cma_ps_find(net, ps, (unsigned short)rover)) {
2717 int ret = cma_alloc_port(ps, id_priv, rover); 2746 int ret = cma_alloc_port(ps, id_priv, rover);
2718 /* 2747 /*
2719 * Remember previously used port number in order to avoid 2748 * Remember previously used port number in order to avoid
@@ -2779,7 +2808,7 @@ static int cma_use_port(enum rdma_port_space ps,
2779 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) 2808 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
2780 return -EACCES; 2809 return -EACCES;
2781 2810
2782 bind_list = cma_ps_find(ps, snum); 2811 bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum);
2783 if (!bind_list) { 2812 if (!bind_list) {
2784 ret = cma_alloc_port(ps, id_priv, snum); 2813 ret = cma_alloc_port(ps, id_priv, snum);
2785 } else { 2814 } else {
@@ -2971,8 +3000,11 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2971 if (addr->sa_family == AF_INET) 3000 if (addr->sa_family == AF_INET)
2972 id_priv->afonly = 1; 3001 id_priv->afonly = 1;
2973#if IS_ENABLED(CONFIG_IPV6) 3002#if IS_ENABLED(CONFIG_IPV6)
2974 else if (addr->sa_family == AF_INET6) 3003 else if (addr->sa_family == AF_INET6) {
2975 id_priv->afonly = init_net.ipv6.sysctl.bindv6only; 3004 struct net *net = id_priv->id.route.addr.dev_addr.net;
3005
3006 id_priv->afonly = net->ipv6.sysctl.bindv6only;
3007 }
2976#endif 3008#endif
2977 } 3009 }
2978 ret = cma_get_port(id_priv); 3010 ret = cma_get_port(id_priv);
@@ -3777,6 +3809,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
3777 dev_addr = &id_priv->id.route.addr.dev_addr; 3809 dev_addr = &id_priv->id.route.addr.dev_addr;
3778 3810
3779 if ((dev_addr->bound_dev_if == ndev->ifindex) && 3811 if ((dev_addr->bound_dev_if == ndev->ifindex) &&
3812 (net_eq(dev_net(ndev), dev_addr->net)) &&
3780 memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { 3813 memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
3781 printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", 3814 printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
3782 ndev->name, &id_priv->id); 3815 ndev->name, &id_priv->id);
@@ -3802,9 +3835,6 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
3802 struct rdma_id_private *id_priv; 3835 struct rdma_id_private *id_priv;
3803 int ret = NOTIFY_DONE; 3836 int ret = NOTIFY_DONE;
3804 3837
3805 if (dev_net(ndev) != &init_net)
3806 return NOTIFY_DONE;
3807
3808 if (event != NETDEV_BONDING_FAILOVER) 3838 if (event != NETDEV_BONDING_FAILOVER)
3809 return NOTIFY_DONE; 3839 return NOTIFY_DONE;
3810 3840
@@ -3999,6 +4029,35 @@ static const struct ibnl_client_cbs cma_cb_table[] = {
3999 .module = THIS_MODULE }, 4029 .module = THIS_MODULE },
4000}; 4030};
4001 4031
4032static int cma_init_net(struct net *net)
4033{
4034 struct cma_pernet *pernet = cma_pernet(net);
4035
4036 idr_init(&pernet->tcp_ps);
4037 idr_init(&pernet->udp_ps);
4038 idr_init(&pernet->ipoib_ps);
4039 idr_init(&pernet->ib_ps);
4040
4041 return 0;
4042}
4043
4044static void cma_exit_net(struct net *net)
4045{
4046 struct cma_pernet *pernet = cma_pernet(net);
4047
4048 idr_destroy(&pernet->tcp_ps);
4049 idr_destroy(&pernet->udp_ps);
4050 idr_destroy(&pernet->ipoib_ps);
4051 idr_destroy(&pernet->ib_ps);
4052}
4053
4054static struct pernet_operations cma_pernet_operations = {
4055 .init = cma_init_net,
4056 .exit = cma_exit_net,
4057 .id = &cma_pernet_id,
4058 .size = sizeof(struct cma_pernet),
4059};
4060
4002static int __init cma_init(void) 4061static int __init cma_init(void)
4003{ 4062{
4004 int ret; 4063 int ret;
@@ -4007,6 +4066,10 @@ static int __init cma_init(void)
4007 if (!cma_wq) 4066 if (!cma_wq)
4008 return -ENOMEM; 4067 return -ENOMEM;
4009 4068
4069 ret = register_pernet_subsys(&cma_pernet_operations);
4070 if (ret)
4071 goto err_wq;
4072
4010 ib_sa_register_client(&sa_client); 4073 ib_sa_register_client(&sa_client);
4011 rdma_addr_register_client(&addr_client); 4074 rdma_addr_register_client(&addr_client);
4012 register_netdevice_notifier(&cma_nb); 4075 register_netdevice_notifier(&cma_nb);
@@ -4024,6 +4087,7 @@ err:
4024 unregister_netdevice_notifier(&cma_nb); 4087 unregister_netdevice_notifier(&cma_nb);
4025 rdma_addr_unregister_client(&addr_client); 4088 rdma_addr_unregister_client(&addr_client);
4026 ib_sa_unregister_client(&sa_client); 4089 ib_sa_unregister_client(&sa_client);
4090err_wq:
4027 destroy_workqueue(cma_wq); 4091 destroy_workqueue(cma_wq);
4028 return ret; 4092 return ret;
4029} 4093}
@@ -4035,11 +4099,8 @@ static void __exit cma_cleanup(void)
4035 unregister_netdevice_notifier(&cma_nb); 4099 unregister_netdevice_notifier(&cma_nb);
4036 rdma_addr_unregister_client(&addr_client); 4100 rdma_addr_unregister_client(&addr_client);
4037 ib_sa_unregister_client(&sa_client); 4101 ib_sa_unregister_client(&sa_client);
4102 unregister_pernet_subsys(&cma_pernet_operations);
4038 destroy_workqueue(cma_wq); 4103 destroy_workqueue(cma_wq);
4039 idr_destroy(&tcp_ps);
4040 idr_destroy(&udp_ps);
4041 idr_destroy(&ipoib_ps);
4042 idr_destroy(&ib_ps);
4043} 4104}
4044 4105
4045module_init(cma_init); 4106module_init(cma_init);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 70bb36ebb03b..5cf6eb716f00 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -46,8 +46,8 @@ void ib_device_unregister_sysfs(struct ib_device *device);
46void ib_cache_setup(void); 46void ib_cache_setup(void);
47void ib_cache_cleanup(void); 47void ib_cache_cleanup(void);
48 48
49int ib_resolve_eth_l2_attrs(struct ib_qp *qp, 49int ib_resolve_eth_dmac(struct ib_qp *qp,
50 struct ib_qp_attr *qp_attr, int *qp_attr_mask); 50 struct ib_qp_attr *qp_attr, int *qp_attr_mask);
51 51
52typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, 52typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
53 struct net_device *idev, void *cookie); 53 struct net_device *idev, void *cookie);
@@ -65,11 +65,6 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
65 roce_netdev_callback cb, 65 roce_netdev_callback cb,
66 void *cookie); 66 void *cookie);
67 67
68int ib_cache_gid_find_by_port(struct ib_device *ib_dev,
69 const union ib_gid *gid,
70 u8 port, struct net_device *ndev,
71 u16 *index);
72
73enum ib_cache_gid_default_mode { 68enum ib_cache_gid_default_mode {
74 IB_CACHE_GID_DEFAULT_MODE_SET, 69 IB_CACHE_GID_DEFAULT_MODE_SET,
75 IB_CACHE_GID_DEFAULT_MODE_DELETE 70 IB_CACHE_GID_DEFAULT_MODE_DELETE
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 17639117afc6..179e8134d57f 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -672,14 +672,20 @@ EXPORT_SYMBOL(ib_query_port);
672 * @port_num:Port number to query 672 * @port_num:Port number to query
673 * @index:GID table index to query 673 * @index:GID table index to query
674 * @gid:Returned GID 674 * @gid:Returned GID
675 * @attr: Returned GID attributes related to this GID index (only in RoCE).
676 * NULL means ignore.
675 * 677 *
676 * ib_query_gid() fetches the specified GID table entry. 678 * ib_query_gid() fetches the specified GID table entry.
677 */ 679 */
678int ib_query_gid(struct ib_device *device, 680int ib_query_gid(struct ib_device *device,
679 u8 port_num, int index, union ib_gid *gid) 681 u8 port_num, int index, union ib_gid *gid,
682 struct ib_gid_attr *attr)
680{ 683{
681 if (rdma_cap_roce_gid_table(device, port_num)) 684 if (rdma_cap_roce_gid_table(device, port_num))
682 return ib_get_cached_gid(device, port_num, index, gid); 685 return ib_get_cached_gid(device, port_num, index, gid, attr);
686
687 if (attr)
688 return -EINVAL;
683 689
684 return device->query_gid(device, port_num, index, gid); 690 return device->query_gid(device, port_num, index, gid);
685} 691}
@@ -819,27 +825,28 @@ EXPORT_SYMBOL(ib_modify_port);
819 * a specified GID value occurs. 825 * a specified GID value occurs.
820 * @device: The device to query. 826 * @device: The device to query.
821 * @gid: The GID value to search for. 827 * @gid: The GID value to search for.
828 * @ndev: The ndev related to the GID to search for.
822 * @port_num: The port number of the device where the GID value was found. 829 * @port_num: The port number of the device where the GID value was found.
823 * @index: The index into the GID table where the GID was found. This 830 * @index: The index into the GID table where the GID was found. This
824 * parameter may be NULL. 831 * parameter may be NULL.
825 */ 832 */
826int ib_find_gid(struct ib_device *device, union ib_gid *gid, 833int ib_find_gid(struct ib_device *device, union ib_gid *gid,
827 u8 *port_num, u16 *index) 834 struct net_device *ndev, u8 *port_num, u16 *index)
828{ 835{
829 union ib_gid tmp_gid; 836 union ib_gid tmp_gid;
830 int ret, port, i; 837 int ret, port, i;
831 838
832 for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) { 839 for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
833 if (rdma_cap_roce_gid_table(device, port)) { 840 if (rdma_cap_roce_gid_table(device, port)) {
834 if (!ib_cache_gid_find_by_port(device, gid, port, 841 if (!ib_find_cached_gid_by_port(device, gid, port,
835 NULL, index)) { 842 ndev, index)) {
836 *port_num = port; 843 *port_num = port;
837 return 0; 844 return 0;
838 } 845 }
839 } 846 }
840 847
841 for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { 848 for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
842 ret = ib_query_gid(device, port, i, &tmp_gid); 849 ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
843 if (ret) 850 if (ret)
844 return ret; 851 return ret;
845 if (!memcmp(&tmp_gid, gid, sizeof *gid)) { 852 if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 4b5c72311deb..8d8af7a41a30 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -752,7 +752,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
752 struct ib_device *device = mad_agent_priv->agent.device; 752 struct ib_device *device = mad_agent_priv->agent.device;
753 u8 port_num; 753 u8 port_num;
754 struct ib_wc mad_wc; 754 struct ib_wc mad_wc;
755 struct ib_send_wr *send_wr = &mad_send_wr->send_wr; 755 struct ib_ud_wr *send_wr = &mad_send_wr->send_wr;
756 size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv); 756 size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv);
757 u16 out_mad_pkey_index = 0; 757 u16 out_mad_pkey_index = 0;
758 u16 drslid; 758 u16 drslid;
@@ -761,7 +761,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
761 761
762 if (rdma_cap_ib_switch(device) && 762 if (rdma_cap_ib_switch(device) &&
763 smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) 763 smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
764 port_num = send_wr->wr.ud.port_num; 764 port_num = send_wr->port_num;
765 else 765 else
766 port_num = mad_agent_priv->agent.port_num; 766 port_num = mad_agent_priv->agent.port_num;
767 767
@@ -832,9 +832,9 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
832 } 832 }
833 833
834 build_smp_wc(mad_agent_priv->agent.qp, 834 build_smp_wc(mad_agent_priv->agent.qp,
835 send_wr->wr_id, drslid, 835 send_wr->wr.wr_id, drslid,
836 send_wr->wr.ud.pkey_index, 836 send_wr->pkey_index,
837 send_wr->wr.ud.port_num, &mad_wc); 837 send_wr->port_num, &mad_wc);
838 838
839 if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) { 839 if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) {
840 mad_wc.byte_len = mad_send_wr->send_buf.hdr_len 840 mad_wc.byte_len = mad_send_wr->send_buf.hdr_len
@@ -894,7 +894,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
894 894
895 local->mad_send_wr = mad_send_wr; 895 local->mad_send_wr = mad_send_wr;
896 if (opa) { 896 if (opa) {
897 local->mad_send_wr->send_wr.wr.ud.pkey_index = out_mad_pkey_index; 897 local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index;
898 local->return_wc_byte_len = mad_size; 898 local->return_wc_byte_len = mad_size;
899 } 899 }
900 /* Reference MAD agent until send side of local completion handled */ 900 /* Reference MAD agent until send side of local completion handled */
@@ -1039,14 +1039,14 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
1039 1039
1040 mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey; 1040 mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
1041 1041
1042 mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr; 1042 mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr;
1043 mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; 1043 mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
1044 mad_send_wr->send_wr.num_sge = 2; 1044 mad_send_wr->send_wr.wr.num_sge = 2;
1045 mad_send_wr->send_wr.opcode = IB_WR_SEND; 1045 mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
1046 mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED; 1046 mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED;
1047 mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn; 1047 mad_send_wr->send_wr.remote_qpn = remote_qpn;
1048 mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; 1048 mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY;
1049 mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index; 1049 mad_send_wr->send_wr.pkey_index = pkey_index;
1050 1050
1051 if (rmpp_active) { 1051 if (rmpp_active) {
1052 ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask); 1052 ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask);
@@ -1151,7 +1151,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
1151 1151
1152 /* Set WR ID to find mad_send_wr upon completion */ 1152 /* Set WR ID to find mad_send_wr upon completion */
1153 qp_info = mad_send_wr->mad_agent_priv->qp_info; 1153 qp_info = mad_send_wr->mad_agent_priv->qp_info;
1154 mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list; 1154 mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
1155 mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; 1155 mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
1156 1156
1157 mad_agent = mad_send_wr->send_buf.mad_agent; 1157 mad_agent = mad_send_wr->send_buf.mad_agent;
@@ -1179,7 +1179,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
1179 1179
1180 spin_lock_irqsave(&qp_info->send_queue.lock, flags); 1180 spin_lock_irqsave(&qp_info->send_queue.lock, flags);
1181 if (qp_info->send_queue.count < qp_info->send_queue.max_active) { 1181 if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
1182 ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr, 1182 ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr,
1183 &bad_send_wr); 1183 &bad_send_wr);
1184 list = &qp_info->send_queue.list; 1184 list = &qp_info->send_queue.list;
1185 } else { 1185 } else {
@@ -1244,7 +1244,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
1244 * request associated with the completion 1244 * request associated with the completion
1245 */ 1245 */
1246 next_send_buf = send_buf->next; 1246 next_send_buf = send_buf->next;
1247 mad_send_wr->send_wr.wr.ud.ah = send_buf->ah; 1247 mad_send_wr->send_wr.ah = send_buf->ah;
1248 1248
1249 if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class == 1249 if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
1250 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { 1250 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
@@ -1877,7 +1877,7 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
1877 ((1 << lmc) - 1))); 1877 ((1 << lmc) - 1)));
1878 } else { 1878 } else {
1879 if (ib_get_cached_gid(device, port_num, 1879 if (ib_get_cached_gid(device, port_num,
1880 attr.grh.sgid_index, &sgid)) 1880 attr.grh.sgid_index, &sgid, NULL))
1881 return 0; 1881 return 0;
1882 return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, 1882 return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
1883 16); 1883 16);
@@ -2457,7 +2457,7 @@ retry:
2457 ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); 2457 ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
2458 2458
2459 if (queued_send_wr) { 2459 if (queued_send_wr) {
2460 ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr, 2460 ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr,
2461 &bad_send_wr); 2461 &bad_send_wr);
2462 if (ret) { 2462 if (ret) {
2463 dev_err(&port_priv->device->dev, 2463 dev_err(&port_priv->device->dev,
@@ -2515,7 +2515,7 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
2515 struct ib_send_wr *bad_send_wr; 2515 struct ib_send_wr *bad_send_wr;
2516 2516
2517 mad_send_wr->retry = 0; 2517 mad_send_wr->retry = 0;
2518 ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr, 2518 ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
2519 &bad_send_wr); 2519 &bad_send_wr);
2520 if (ret) 2520 if (ret)
2521 ib_mad_send_done_handler(port_priv, wc); 2521 ib_mad_send_done_handler(port_priv, wc);
@@ -2713,7 +2713,7 @@ static void local_completions(struct work_struct *work)
2713 build_smp_wc(recv_mad_agent->agent.qp, 2713 build_smp_wc(recv_mad_agent->agent.qp,
2714 (unsigned long) local->mad_send_wr, 2714 (unsigned long) local->mad_send_wr,
2715 be16_to_cpu(IB_LID_PERMISSIVE), 2715 be16_to_cpu(IB_LID_PERMISSIVE),
2716 local->mad_send_wr->send_wr.wr.ud.pkey_index, 2716 local->mad_send_wr->send_wr.pkey_index,
2717 recv_mad_agent->agent.port_num, &wc); 2717 recv_mad_agent->agent.port_num, &wc);
2718 2718
2719 local->mad_priv->header.recv_wc.wc = &wc; 2719 local->mad_priv->header.recv_wc.wc = &wc;
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 4a4f7aad0978..990698a6ab4b 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -123,7 +123,7 @@ struct ib_mad_send_wr_private {
123 struct ib_mad_send_buf send_buf; 123 struct ib_mad_send_buf send_buf;
124 u64 header_mapping; 124 u64 header_mapping;
125 u64 payload_mapping; 125 u64 payload_mapping;
126 struct ib_send_wr send_wr; 126 struct ib_ud_wr send_wr;
127 struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; 127 struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
128 __be64 tid; 128 __be64 tid;
129 unsigned long timeout; 129 unsigned long timeout;
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index d38d8b2b2979..bb6685fb08c6 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -729,7 +729,8 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
729 u16 gid_index; 729 u16 gid_index;
730 u8 p; 730 u8 p;
731 731
732 ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index); 732 ret = ib_find_cached_gid(device, &rec->port_gid,
733 NULL, &p, &gid_index);
733 if (ret) 734 if (ret)
734 return ret; 735 return ret;
735 736
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 8c014b33d8e0..dcdaa79e3f0f 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1007,26 +1007,29 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
1007 force_grh = rdma_cap_eth_ah(device, port_num); 1007 force_grh = rdma_cap_eth_ah(device, port_num);
1008 1008
1009 if (rec->hop_limit > 1 || force_grh) { 1009 if (rec->hop_limit > 1 || force_grh) {
1010 struct net_device *ndev = ib_get_ndev_from_path(rec);
1011
1010 ah_attr->ah_flags = IB_AH_GRH; 1012 ah_attr->ah_flags = IB_AH_GRH;
1011 ah_attr->grh.dgid = rec->dgid; 1013 ah_attr->grh.dgid = rec->dgid;
1012 1014
1013 ret = ib_find_cached_gid(device, &rec->sgid, &port_num, 1015 ret = ib_find_cached_gid(device, &rec->sgid, ndev, &port_num,
1014 &gid_index); 1016 &gid_index);
1015 if (ret) 1017 if (ret) {
1018 if (ndev)
1019 dev_put(ndev);
1016 return ret; 1020 return ret;
1021 }
1017 1022
1018 ah_attr->grh.sgid_index = gid_index; 1023 ah_attr->grh.sgid_index = gid_index;
1019 ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); 1024 ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
1020 ah_attr->grh.hop_limit = rec->hop_limit; 1025 ah_attr->grh.hop_limit = rec->hop_limit;
1021 ah_attr->grh.traffic_class = rec->traffic_class; 1026 ah_attr->grh.traffic_class = rec->traffic_class;
1027 if (ndev)
1028 dev_put(ndev);
1022 } 1029 }
1023 if (force_grh) { 1030 if (force_grh) {
1024 memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN); 1031 memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
1025 ah_attr->vlan_id = rec->vlan_id;
1026 } else {
1027 ah_attr->vlan_id = 0xffff;
1028 } 1032 }
1029
1030 return 0; 1033 return 0;
1031} 1034}
1032EXPORT_SYMBOL(ib_init_ah_from_path); 1035EXPORT_SYMBOL(ib_init_ah_from_path);
@@ -1150,9 +1153,9 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
1150 1153
1151 ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), 1154 ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
1152 mad->data, &rec); 1155 mad->data, &rec);
1153 rec.vlan_id = 0xffff; 1156 rec.net = NULL;
1157 rec.ifindex = 0;
1154 memset(rec.dmac, 0, ETH_ALEN); 1158 memset(rec.dmac, 0, ETH_ALEN);
1155 memset(rec.smac, 0, ETH_ALEN);
1156 query->callback(status, &rec, query->context); 1159 query->callback(status, &rec, query->context);
1157 } else 1160 } else
1158 query->callback(status, NULL, query->context); 1161 query->callback(status, NULL, query->context);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 34cdd74b0a17..b1f37d4095fa 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -289,7 +289,7 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
289 union ib_gid gid; 289 union ib_gid gid;
290 ssize_t ret; 290 ssize_t ret;
291 291
292 ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid); 292 ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL);
293 if (ret) 293 if (ret)
294 return ret; 294 return ret;
295 295
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 30467d10df91..8b5a934e1133 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -42,6 +42,7 @@
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/sysctl.h> 43#include <linux/sysctl.h>
44#include <linux/module.h> 44#include <linux/module.h>
45#include <linux/nsproxy.h>
45 46
46#include <rdma/rdma_user_cm.h> 47#include <rdma/rdma_user_cm.h>
47#include <rdma/ib_marshall.h> 48#include <rdma/ib_marshall.h>
@@ -472,7 +473,8 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
472 return -ENOMEM; 473 return -ENOMEM;
473 474
474 ctx->uid = cmd.uid; 475 ctx->uid = cmd.uid;
475 ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type); 476 ctx->cm_id = rdma_create_id(current->nsproxy->net_ns,
477 ucma_event_handler, ctx, cmd.ps, qp_type);
476 if (IS_ERR(ctx->cm_id)) { 478 if (IS_ERR(ctx->cm_id)) {
477 ret = PTR_ERR(ctx->cm_id); 479 ret = PTR_ERR(ctx->cm_id);
478 goto err1; 480 goto err1;
@@ -1211,7 +1213,6 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
1211 return -EINVAL; 1213 return -EINVAL;
1212 1214
1213 memset(&sa_path, 0, sizeof(sa_path)); 1215 memset(&sa_path, 0, sizeof(sa_path));
1214 sa_path.vlan_id = 0xffff;
1215 1216
1216 ib_sa_unpack_path(path_data->path_rec, &sa_path); 1217 ib_sa_unpack_path(path_data->path_rec, &sa_path);
1217 ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1); 1218 ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 3863d33c243d..94bbd8c155fc 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -272,5 +272,6 @@ IB_UVERBS_DECLARE_EX_CMD(create_flow);
272IB_UVERBS_DECLARE_EX_CMD(destroy_flow); 272IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
273IB_UVERBS_DECLARE_EX_CMD(query_device); 273IB_UVERBS_DECLARE_EX_CMD(query_device);
274IB_UVERBS_DECLARE_EX_CMD(create_cq); 274IB_UVERBS_DECLARE_EX_CMD(create_cq);
275IB_UVERBS_DECLARE_EX_CMD(create_qp);
275 276
276#endif /* UVERBS_H */ 277#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index be4cb9f04be3..94816aeb95a0 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1478,7 +1478,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
1478 if (copy_from_user(&cmd, buf, sizeof(cmd))) 1478 if (copy_from_user(&cmd, buf, sizeof(cmd)))
1479 return -EFAULT; 1479 return -EFAULT;
1480 1480
1481 INIT_UDATA(&ucore, buf, cmd.response, sizeof(cmd), sizeof(resp)); 1481 INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), sizeof(resp));
1482 1482
1483 INIT_UDATA(&uhw, buf + sizeof(cmd), 1483 INIT_UDATA(&uhw, buf + sizeof(cmd),
1484 (unsigned long)cmd.response + sizeof(resp), 1484 (unsigned long)cmd.response + sizeof(resp),
@@ -1741,66 +1741,65 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
1741 return in_len; 1741 return in_len;
1742} 1742}
1743 1743
1744ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, 1744static int create_qp(struct ib_uverbs_file *file,
1745 struct ib_device *ib_dev, 1745 struct ib_udata *ucore,
1746 const char __user *buf, int in_len, 1746 struct ib_udata *uhw,
1747 int out_len) 1747 struct ib_uverbs_ex_create_qp *cmd,
1748{ 1748 size_t cmd_sz,
1749 struct ib_uverbs_create_qp cmd; 1749 int (*cb)(struct ib_uverbs_file *file,
1750 struct ib_uverbs_create_qp_resp resp; 1750 struct ib_uverbs_ex_create_qp_resp *resp,
1751 struct ib_udata udata; 1751 struct ib_udata *udata),
1752 struct ib_uqp_object *obj; 1752 void *context)
1753 struct ib_device *device; 1753{
1754 struct ib_pd *pd = NULL; 1754 struct ib_uqp_object *obj;
1755 struct ib_xrcd *xrcd = NULL; 1755 struct ib_device *device;
1756 struct ib_uobject *uninitialized_var(xrcd_uobj); 1756 struct ib_pd *pd = NULL;
1757 struct ib_cq *scq = NULL, *rcq = NULL; 1757 struct ib_xrcd *xrcd = NULL;
1758 struct ib_srq *srq = NULL; 1758 struct ib_uobject *uninitialized_var(xrcd_uobj);
1759 struct ib_qp *qp; 1759 struct ib_cq *scq = NULL, *rcq = NULL;
1760 struct ib_qp_init_attr attr; 1760 struct ib_srq *srq = NULL;
1761 int ret; 1761 struct ib_qp *qp;
1762 1762 char *buf;
1763 if (out_len < sizeof resp) 1763 struct ib_qp_init_attr attr;
1764 return -ENOSPC; 1764 struct ib_uverbs_ex_create_qp_resp resp;
1765 1765 int ret;
1766 if (copy_from_user(&cmd, buf, sizeof cmd))
1767 return -EFAULT;
1768 1766
1769 if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) 1767 if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
1770 return -EPERM; 1768 return -EPERM;
1771 1769
1772 INIT_UDATA(&udata, buf + sizeof cmd,
1773 (unsigned long) cmd.response + sizeof resp,
1774 in_len - sizeof cmd, out_len - sizeof resp);
1775
1776 obj = kzalloc(sizeof *obj, GFP_KERNEL); 1770 obj = kzalloc(sizeof *obj, GFP_KERNEL);
1777 if (!obj) 1771 if (!obj)
1778 return -ENOMEM; 1772 return -ENOMEM;
1779 1773
1780 init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); 1774 init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
1775 &qp_lock_class);
1781 down_write(&obj->uevent.uobject.mutex); 1776 down_write(&obj->uevent.uobject.mutex);
1782 1777
1783 if (cmd.qp_type == IB_QPT_XRC_TGT) { 1778 if (cmd->qp_type == IB_QPT_XRC_TGT) {
1784 xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); 1779 xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
1780 &xrcd_uobj);
1785 if (!xrcd) { 1781 if (!xrcd) {
1786 ret = -EINVAL; 1782 ret = -EINVAL;
1787 goto err_put; 1783 goto err_put;
1788 } 1784 }
1789 device = xrcd->device; 1785 device = xrcd->device;
1790 } else { 1786 } else {
1791 if (cmd.qp_type == IB_QPT_XRC_INI) { 1787 if (cmd->qp_type == IB_QPT_XRC_INI) {
1792 cmd.max_recv_wr = cmd.max_recv_sge = 0; 1788 cmd->max_recv_wr = 0;
1789 cmd->max_recv_sge = 0;
1793 } else { 1790 } else {
1794 if (cmd.is_srq) { 1791 if (cmd->is_srq) {
1795 srq = idr_read_srq(cmd.srq_handle, file->ucontext); 1792 srq = idr_read_srq(cmd->srq_handle,
1793 file->ucontext);
1796 if (!srq || srq->srq_type != IB_SRQT_BASIC) { 1794 if (!srq || srq->srq_type != IB_SRQT_BASIC) {
1797 ret = -EINVAL; 1795 ret = -EINVAL;
1798 goto err_put; 1796 goto err_put;
1799 } 1797 }
1800 } 1798 }
1801 1799
1802 if (cmd.recv_cq_handle != cmd.send_cq_handle) { 1800 if (cmd->recv_cq_handle != cmd->send_cq_handle) {
1803 rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0); 1801 rcq = idr_read_cq(cmd->recv_cq_handle,
1802 file->ucontext, 0);
1804 if (!rcq) { 1803 if (!rcq) {
1805 ret = -EINVAL; 1804 ret = -EINVAL;
1806 goto err_put; 1805 goto err_put;
@@ -1808,9 +1807,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1808 } 1807 }
1809 } 1808 }
1810 1809
1811 scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq); 1810 scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
1812 rcq = rcq ?: scq; 1811 rcq = rcq ?: scq;
1813 pd = idr_read_pd(cmd.pd_handle, file->ucontext); 1812 pd = idr_read_pd(cmd->pd_handle, file->ucontext);
1814 if (!pd || !scq) { 1813 if (!pd || !scq) {
1815 ret = -EINVAL; 1814 ret = -EINVAL;
1816 goto err_put; 1815 goto err_put;
@@ -1825,31 +1824,49 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1825 attr.recv_cq = rcq; 1824 attr.recv_cq = rcq;
1826 attr.srq = srq; 1825 attr.srq = srq;
1827 attr.xrcd = xrcd; 1826 attr.xrcd = xrcd;
1828 attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; 1827 attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR :
1829 attr.qp_type = cmd.qp_type; 1828 IB_SIGNAL_REQ_WR;
1829 attr.qp_type = cmd->qp_type;
1830 attr.create_flags = 0; 1830 attr.create_flags = 0;
1831 1831
1832 attr.cap.max_send_wr = cmd.max_send_wr; 1832 attr.cap.max_send_wr = cmd->max_send_wr;
1833 attr.cap.max_recv_wr = cmd.max_recv_wr; 1833 attr.cap.max_recv_wr = cmd->max_recv_wr;
1834 attr.cap.max_send_sge = cmd.max_send_sge; 1834 attr.cap.max_send_sge = cmd->max_send_sge;
1835 attr.cap.max_recv_sge = cmd.max_recv_sge; 1835 attr.cap.max_recv_sge = cmd->max_recv_sge;
1836 attr.cap.max_inline_data = cmd.max_inline_data; 1836 attr.cap.max_inline_data = cmd->max_inline_data;
1837 1837
1838 obj->uevent.events_reported = 0; 1838 obj->uevent.events_reported = 0;
1839 INIT_LIST_HEAD(&obj->uevent.event_list); 1839 INIT_LIST_HEAD(&obj->uevent.event_list);
1840 INIT_LIST_HEAD(&obj->mcast_list); 1840 INIT_LIST_HEAD(&obj->mcast_list);
1841 1841
1842 if (cmd.qp_type == IB_QPT_XRC_TGT) 1842 if (cmd_sz >= offsetof(typeof(*cmd), create_flags) +
1843 sizeof(cmd->create_flags))
1844 attr.create_flags = cmd->create_flags;
1845
1846 if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
1847 ret = -EINVAL;
1848 goto err_put;
1849 }
1850
1851 buf = (void *)cmd + sizeof(*cmd);
1852 if (cmd_sz > sizeof(*cmd))
1853 if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
1854 cmd_sz - sizeof(*cmd) - 1))) {
1855 ret = -EINVAL;
1856 goto err_put;
1857 }
1858
1859 if (cmd->qp_type == IB_QPT_XRC_TGT)
1843 qp = ib_create_qp(pd, &attr); 1860 qp = ib_create_qp(pd, &attr);
1844 else 1861 else
1845 qp = device->create_qp(pd, &attr, &udata); 1862 qp = device->create_qp(pd, &attr, uhw);
1846 1863
1847 if (IS_ERR(qp)) { 1864 if (IS_ERR(qp)) {
1848 ret = PTR_ERR(qp); 1865 ret = PTR_ERR(qp);
1849 goto err_put; 1866 goto err_put;
1850 } 1867 }
1851 1868
1852 if (cmd.qp_type != IB_QPT_XRC_TGT) { 1869 if (cmd->qp_type != IB_QPT_XRC_TGT) {
1853 qp->real_qp = qp; 1870 qp->real_qp = qp;
1854 qp->device = device; 1871 qp->device = device;
1855 qp->pd = pd; 1872 qp->pd = pd;
@@ -1875,19 +1892,20 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1875 goto err_destroy; 1892 goto err_destroy;
1876 1893
1877 memset(&resp, 0, sizeof resp); 1894 memset(&resp, 0, sizeof resp);
1878 resp.qpn = qp->qp_num; 1895 resp.base.qpn = qp->qp_num;
1879 resp.qp_handle = obj->uevent.uobject.id; 1896 resp.base.qp_handle = obj->uevent.uobject.id;
1880 resp.max_recv_sge = attr.cap.max_recv_sge; 1897 resp.base.max_recv_sge = attr.cap.max_recv_sge;
1881 resp.max_send_sge = attr.cap.max_send_sge; 1898 resp.base.max_send_sge = attr.cap.max_send_sge;
1882 resp.max_recv_wr = attr.cap.max_recv_wr; 1899 resp.base.max_recv_wr = attr.cap.max_recv_wr;
1883 resp.max_send_wr = attr.cap.max_send_wr; 1900 resp.base.max_send_wr = attr.cap.max_send_wr;
1884 resp.max_inline_data = attr.cap.max_inline_data; 1901 resp.base.max_inline_data = attr.cap.max_inline_data;
1885 1902
1886 if (copy_to_user((void __user *) (unsigned long) cmd.response, 1903 resp.response_length = offsetof(typeof(resp), response_length) +
1887 &resp, sizeof resp)) { 1904 sizeof(resp.response_length);
1888 ret = -EFAULT; 1905
1889 goto err_copy; 1906 ret = cb(file, &resp, ucore);
1890 } 1907 if (ret)
1908 goto err_cb;
1891 1909
1892 if (xrcd) { 1910 if (xrcd) {
1893 obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, 1911 obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
@@ -1913,9 +1931,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1913 1931
1914 up_write(&obj->uevent.uobject.mutex); 1932 up_write(&obj->uevent.uobject.mutex);
1915 1933
1916 return in_len; 1934 return 0;
1917 1935err_cb:
1918err_copy:
1919 idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); 1936 idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
1920 1937
1921err_destroy: 1938err_destroy:
@@ -1937,6 +1954,113 @@ err_put:
1937 return ret; 1954 return ret;
1938} 1955}
1939 1956
1957static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
1958 struct ib_uverbs_ex_create_qp_resp *resp,
1959 struct ib_udata *ucore)
1960{
1961 if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
1962 return -EFAULT;
1963
1964 return 0;
1965}
1966
1967ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1968 struct ib_device *ib_dev,
1969 const char __user *buf, int in_len,
1970 int out_len)
1971{
1972 struct ib_uverbs_create_qp cmd;
1973 struct ib_uverbs_ex_create_qp cmd_ex;
1974 struct ib_udata ucore;
1975 struct ib_udata uhw;
1976 ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
1977 int err;
1978
1979 if (out_len < resp_size)
1980 return -ENOSPC;
1981
1982 if (copy_from_user(&cmd, buf, sizeof(cmd)))
1983 return -EFAULT;
1984
1985 INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd),
1986 resp_size);
1987 INIT_UDATA(&uhw, buf + sizeof(cmd),
1988 (unsigned long)cmd.response + resp_size,
1989 in_len - sizeof(cmd), out_len - resp_size);
1990
1991 memset(&cmd_ex, 0, sizeof(cmd_ex));
1992 cmd_ex.user_handle = cmd.user_handle;
1993 cmd_ex.pd_handle = cmd.pd_handle;
1994 cmd_ex.send_cq_handle = cmd.send_cq_handle;
1995 cmd_ex.recv_cq_handle = cmd.recv_cq_handle;
1996 cmd_ex.srq_handle = cmd.srq_handle;
1997 cmd_ex.max_send_wr = cmd.max_send_wr;
1998 cmd_ex.max_recv_wr = cmd.max_recv_wr;
1999 cmd_ex.max_send_sge = cmd.max_send_sge;
2000 cmd_ex.max_recv_sge = cmd.max_recv_sge;
2001 cmd_ex.max_inline_data = cmd.max_inline_data;
2002 cmd_ex.sq_sig_all = cmd.sq_sig_all;
2003 cmd_ex.qp_type = cmd.qp_type;
2004 cmd_ex.is_srq = cmd.is_srq;
2005
2006 err = create_qp(file, &ucore, &uhw, &cmd_ex,
2007 offsetof(typeof(cmd_ex), is_srq) +
2008 sizeof(cmd.is_srq), ib_uverbs_create_qp_cb,
2009 NULL);
2010
2011 if (err)
2012 return err;
2013
2014 return in_len;
2015}
2016
2017static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
2018 struct ib_uverbs_ex_create_qp_resp *resp,
2019 struct ib_udata *ucore)
2020{
2021 if (ib_copy_to_udata(ucore, resp, resp->response_length))
2022 return -EFAULT;
2023
2024 return 0;
2025}
2026
2027int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
2028 struct ib_device *ib_dev,
2029 struct ib_udata *ucore,
2030 struct ib_udata *uhw)
2031{
2032 struct ib_uverbs_ex_create_qp_resp resp;
2033 struct ib_uverbs_ex_create_qp cmd = {0};
2034 int err;
2035
2036 if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) +
2037 sizeof(cmd.comp_mask)))
2038 return -EINVAL;
2039
2040 err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
2041 if (err)
2042 return err;
2043
2044 if (cmd.comp_mask)
2045 return -EINVAL;
2046
2047 if (cmd.reserved)
2048 return -EINVAL;
2049
2050 if (ucore->outlen < (offsetof(typeof(resp), response_length) +
2051 sizeof(resp.response_length)))
2052 return -ENOSPC;
2053
2054 err = create_qp(file, ucore, uhw, &cmd,
2055 min(ucore->inlen, sizeof(cmd)),
2056 ib_uverbs_ex_create_qp_cb, NULL);
2057
2058 if (err)
2059 return err;
2060
2061 return 0;
2062}
2063
1940ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, 2064ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
1941 struct ib_device *ib_dev, 2065 struct ib_device *ib_dev,
1942 const char __user *buf, int in_len, int out_len) 2066 const char __user *buf, int in_len, int out_len)
@@ -2221,7 +2345,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
2221 attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; 2345 attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
2222 2346
2223 if (qp->real_qp == qp) { 2347 if (qp->real_qp == qp) {
2224 ret = ib_resolve_eth_l2_attrs(qp, attr, &cmd.attr_mask); 2348 ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask);
2225 if (ret) 2349 if (ret)
2226 goto release_qp; 2350 goto release_qp;
2227 ret = qp->device->modify_qp(qp, attr, 2351 ret = qp->device->modify_qp(qp, attr,
@@ -2303,6 +2427,12 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
2303 return in_len; 2427 return in_len;
2304} 2428}
2305 2429
2430static void *alloc_wr(size_t wr_size, __u32 num_sge)
2431{
2432 return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) +
2433 num_sge * sizeof (struct ib_sge), GFP_KERNEL);
2434};
2435
2306ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, 2436ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
2307 struct ib_device *ib_dev, 2437 struct ib_device *ib_dev,
2308 const char __user *buf, int in_len, 2438 const char __user *buf, int in_len,
@@ -2351,14 +2481,83 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
2351 goto out_put; 2481 goto out_put;
2352 } 2482 }
2353 2483
2354 next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + 2484 if (is_ud) {
2355 user_wr->num_sge * sizeof (struct ib_sge), 2485 struct ib_ud_wr *ud;
2356 GFP_KERNEL); 2486
2357 if (!next) { 2487 if (user_wr->opcode != IB_WR_SEND &&
2358 ret = -ENOMEM; 2488 user_wr->opcode != IB_WR_SEND_WITH_IMM) {
2489 ret = -EINVAL;
2490 goto out_put;
2491 }
2492
2493 ud = alloc_wr(sizeof(*ud), user_wr->num_sge);
2494 if (!ud) {
2495 ret = -ENOMEM;
2496 goto out_put;
2497 }
2498
2499 ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext);
2500 if (!ud->ah) {
2501 kfree(ud);
2502 ret = -EINVAL;
2503 goto out_put;
2504 }
2505 ud->remote_qpn = user_wr->wr.ud.remote_qpn;
2506 ud->remote_qkey = user_wr->wr.ud.remote_qkey;
2507
2508 next = &ud->wr;
2509 } else if (user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2510 user_wr->opcode == IB_WR_RDMA_WRITE ||
2511 user_wr->opcode == IB_WR_RDMA_READ) {
2512 struct ib_rdma_wr *rdma;
2513
2514 rdma = alloc_wr(sizeof(*rdma), user_wr->num_sge);
2515 if (!rdma) {
2516 ret = -ENOMEM;
2517 goto out_put;
2518 }
2519
2520 rdma->remote_addr = user_wr->wr.rdma.remote_addr;
2521 rdma->rkey = user_wr->wr.rdma.rkey;
2522
2523 next = &rdma->wr;
2524 } else if (user_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
2525 user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
2526 struct ib_atomic_wr *atomic;
2527
2528 atomic = alloc_wr(sizeof(*atomic), user_wr->num_sge);
2529 if (!atomic) {
2530 ret = -ENOMEM;
2531 goto out_put;
2532 }
2533
2534 atomic->remote_addr = user_wr->wr.atomic.remote_addr;
2535 atomic->compare_add = user_wr->wr.atomic.compare_add;
2536 atomic->swap = user_wr->wr.atomic.swap;
2537 atomic->rkey = user_wr->wr.atomic.rkey;
2538
2539 next = &atomic->wr;
2540 } else if (user_wr->opcode == IB_WR_SEND ||
2541 user_wr->opcode == IB_WR_SEND_WITH_IMM ||
2542 user_wr->opcode == IB_WR_SEND_WITH_INV) {
2543 next = alloc_wr(sizeof(*next), user_wr->num_sge);
2544 if (!next) {
2545 ret = -ENOMEM;
2546 goto out_put;
2547 }
2548 } else {
2549 ret = -EINVAL;
2359 goto out_put; 2550 goto out_put;
2360 } 2551 }
2361 2552
2553 if (user_wr->opcode == IB_WR_SEND_WITH_IMM ||
2554 user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
2555 next->ex.imm_data =
2556 (__be32 __force) user_wr->ex.imm_data;
2557 } else if (user_wr->opcode == IB_WR_SEND_WITH_INV) {
2558 next->ex.invalidate_rkey = user_wr->ex.invalidate_rkey;
2559 }
2560
2362 if (!last) 2561 if (!last)
2363 wr = next; 2562 wr = next;
2364 else 2563 else
@@ -2371,60 +2570,6 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
2371 next->opcode = user_wr->opcode; 2570 next->opcode = user_wr->opcode;
2372 next->send_flags = user_wr->send_flags; 2571 next->send_flags = user_wr->send_flags;
2373 2572
2374 if (is_ud) {
2375 if (next->opcode != IB_WR_SEND &&
2376 next->opcode != IB_WR_SEND_WITH_IMM) {
2377 ret = -EINVAL;
2378 goto out_put;
2379 }
2380
2381 next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah,
2382 file->ucontext);
2383 if (!next->wr.ud.ah) {
2384 ret = -EINVAL;
2385 goto out_put;
2386 }
2387 next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn;
2388 next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
2389 if (next->opcode == IB_WR_SEND_WITH_IMM)
2390 next->ex.imm_data =
2391 (__be32 __force) user_wr->ex.imm_data;
2392 } else {
2393 switch (next->opcode) {
2394 case IB_WR_RDMA_WRITE_WITH_IMM:
2395 next->ex.imm_data =
2396 (__be32 __force) user_wr->ex.imm_data;
2397 case IB_WR_RDMA_WRITE:
2398 case IB_WR_RDMA_READ:
2399 next->wr.rdma.remote_addr =
2400 user_wr->wr.rdma.remote_addr;
2401 next->wr.rdma.rkey =
2402 user_wr->wr.rdma.rkey;
2403 break;
2404 case IB_WR_SEND_WITH_IMM:
2405 next->ex.imm_data =
2406 (__be32 __force) user_wr->ex.imm_data;
2407 break;
2408 case IB_WR_SEND_WITH_INV:
2409 next->ex.invalidate_rkey =
2410 user_wr->ex.invalidate_rkey;
2411 break;
2412 case IB_WR_ATOMIC_CMP_AND_SWP:
2413 case IB_WR_ATOMIC_FETCH_AND_ADD:
2414 next->wr.atomic.remote_addr =
2415 user_wr->wr.atomic.remote_addr;
2416 next->wr.atomic.compare_add =
2417 user_wr->wr.atomic.compare_add;
2418 next->wr.atomic.swap = user_wr->wr.atomic.swap;
2419 next->wr.atomic.rkey = user_wr->wr.atomic.rkey;
2420 case IB_WR_SEND:
2421 break;
2422 default:
2423 ret = -EINVAL;
2424 goto out_put;
2425 }
2426 }
2427
2428 if (next->num_sge) { 2573 if (next->num_sge) {
2429 next->sg_list = (void *) next + 2574 next->sg_list = (void *) next +
2430 ALIGN(sizeof *next, sizeof (struct ib_sge)); 2575 ALIGN(sizeof *next, sizeof (struct ib_sge));
@@ -2458,8 +2603,8 @@ out_put:
2458 put_qp_read(qp); 2603 put_qp_read(qp);
2459 2604
2460 while (wr) { 2605 while (wr) {
2461 if (is_ud && wr->wr.ud.ah) 2606 if (is_ud && ud_wr(wr)->ah)
2462 put_ah_read(wr->wr.ud.ah); 2607 put_ah_read(ud_wr(wr)->ah);
2463 next = wr->next; 2608 next = wr->next;
2464 kfree(wr); 2609 kfree(wr);
2465 wr = next; 2610 wr = next;
@@ -2698,7 +2843,6 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
2698 attr.grh.sgid_index = cmd.attr.grh.sgid_index; 2843 attr.grh.sgid_index = cmd.attr.grh.sgid_index;
2699 attr.grh.hop_limit = cmd.attr.grh.hop_limit; 2844 attr.grh.hop_limit = cmd.attr.grh.hop_limit;
2700 attr.grh.traffic_class = cmd.attr.grh.traffic_class; 2845 attr.grh.traffic_class = cmd.attr.grh.traffic_class;
2701 attr.vlan_id = 0;
2702 memset(&attr.dmac, 0, sizeof(attr.dmac)); 2846 memset(&attr.dmac, 0, sizeof(attr.dmac));
2703 memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); 2847 memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
2704 2848
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index c29a660c72fe..e3ef28861be6 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -127,6 +127,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
127 [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, 127 [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow,
128 [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, 128 [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device,
129 [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, 129 [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq,
130 [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp,
130}; 131};
131 132
132static void ib_uverbs_add_one(struct ib_device *device); 133static void ib_uverbs_add_one(struct ib_device *device);
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index abd97247443e..7d2f14c9bbef 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -141,8 +141,8 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
141 dst->preference = src->preference; 141 dst->preference = src->preference;
142 dst->packet_life_time_selector = src->packet_life_time_selector; 142 dst->packet_life_time_selector = src->packet_life_time_selector;
143 143
144 memset(dst->smac, 0, sizeof(dst->smac));
145 memset(dst->dmac, 0, sizeof(dst->dmac)); 144 memset(dst->dmac, 0, sizeof(dst->dmac));
146 dst->vlan_id = 0xffff; 145 dst->net = NULL;
146 dst->ifindex = 0;
147} 147}
148EXPORT_SYMBOL(ib_copy_path_rec_from_user); 148EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index e1f2c9887f3f..043a60ee6836 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -41,6 +41,9 @@
41#include <linux/export.h> 41#include <linux/export.h>
42#include <linux/string.h> 42#include <linux/string.h>
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/in.h>
45#include <linux/in6.h>
46#include <net/addrconf.h>
44 47
45#include <rdma/ib_verbs.h> 48#include <rdma/ib_verbs.h>
46#include <rdma/ib_cache.h> 49#include <rdma/ib_cache.h>
@@ -70,7 +73,7 @@ static const char * const ib_events[] = {
70 [IB_EVENT_GID_CHANGE] = "GID changed", 73 [IB_EVENT_GID_CHANGE] = "GID changed",
71}; 74};
72 75
73const char *ib_event_msg(enum ib_event_type event) 76const char *__attribute_const__ ib_event_msg(enum ib_event_type event)
74{ 77{
75 size_t index = event; 78 size_t index = event;
76 79
@@ -104,7 +107,7 @@ static const char * const wc_statuses[] = {
104 [IB_WC_GENERAL_ERR] = "general error", 107 [IB_WC_GENERAL_ERR] = "general error",
105}; 108};
106 109
107const char *ib_wc_status_msg(enum ib_wc_status status) 110const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status)
108{ 111{
109 size_t index = status; 112 size_t index = status;
110 113
@@ -308,6 +311,35 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
308} 311}
309EXPORT_SYMBOL(ib_create_ah); 312EXPORT_SYMBOL(ib_create_ah);
310 313
314struct find_gid_index_context {
315 u16 vlan_id;
316};
317
318static bool find_gid_index(const union ib_gid *gid,
319 const struct ib_gid_attr *gid_attr,
320 void *context)
321{
322 struct find_gid_index_context *ctx =
323 (struct find_gid_index_context *)context;
324
325 if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
326 (is_vlan_dev(gid_attr->ndev) &&
327 vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
328 return false;
329
330 return true;
331}
332
333static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
334 u16 vlan_id, const union ib_gid *sgid,
335 u16 *gid_index)
336{
337 struct find_gid_index_context context = {.vlan_id = vlan_id};
338
339 return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
340 &context, gid_index);
341}
342
311int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, 343int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
312 const struct ib_wc *wc, const struct ib_grh *grh, 344 const struct ib_wc *wc, const struct ib_grh *grh,
313 struct ib_ah_attr *ah_attr) 345 struct ib_ah_attr *ah_attr)
@@ -318,21 +350,30 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
318 350
319 memset(ah_attr, 0, sizeof *ah_attr); 351 memset(ah_attr, 0, sizeof *ah_attr);
320 if (rdma_cap_eth_ah(device, port_num)) { 352 if (rdma_cap_eth_ah(device, port_num)) {
353 u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
354 wc->vlan_id : 0xffff;
355
321 if (!(wc->wc_flags & IB_WC_GRH)) 356 if (!(wc->wc_flags & IB_WC_GRH))
322 return -EPROTOTYPE; 357 return -EPROTOTYPE;
323 358
324 if (wc->wc_flags & IB_WC_WITH_SMAC && 359 if (!(wc->wc_flags & IB_WC_WITH_SMAC) ||
325 wc->wc_flags & IB_WC_WITH_VLAN) { 360 !(wc->wc_flags & IB_WC_WITH_VLAN)) {
326 memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
327 ah_attr->vlan_id = wc->vlan_id;
328 } else {
329 ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid, 361 ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
330 ah_attr->dmac, &ah_attr->vlan_id); 362 ah_attr->dmac,
363 wc->wc_flags & IB_WC_WITH_VLAN ?
364 NULL : &vlan_id,
365 0);
331 if (ret) 366 if (ret)
332 return ret; 367 return ret;
333 } 368 }
334 } else { 369
335 ah_attr->vlan_id = 0xffff; 370 ret = get_sgid_index_from_eth(device, port_num, vlan_id,
371 &grh->dgid, &gid_index);
372 if (ret)
373 return ret;
374
375 if (wc->wc_flags & IB_WC_WITH_SMAC)
376 memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
336 } 377 }
337 378
338 ah_attr->dlid = wc->slid; 379 ah_attr->dlid = wc->slid;
@@ -344,10 +385,13 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
344 ah_attr->ah_flags = IB_AH_GRH; 385 ah_attr->ah_flags = IB_AH_GRH;
345 ah_attr->grh.dgid = grh->sgid; 386 ah_attr->grh.dgid = grh->sgid;
346 387
347 ret = ib_find_cached_gid(device, &grh->dgid, &port_num, 388 if (!rdma_cap_eth_ah(device, port_num)) {
348 &gid_index); 389 ret = ib_find_cached_gid_by_port(device, &grh->dgid,
349 if (ret) 390 port_num, NULL,
350 return ret; 391 &gid_index);
392 if (ret)
393 return ret;
394 }
351 395
352 ah_attr->grh.sgid_index = (u8) gid_index; 396 ah_attr->grh.sgid_index = (u8) gid_index;
353 flow_class = be32_to_cpu(grh->version_tclass_flow); 397 flow_class = be32_to_cpu(grh->version_tclass_flow);
@@ -617,9 +661,7 @@ EXPORT_SYMBOL(ib_create_qp);
617static const struct { 661static const struct {
618 int valid; 662 int valid;
619 enum ib_qp_attr_mask req_param[IB_QPT_MAX]; 663 enum ib_qp_attr_mask req_param[IB_QPT_MAX];
620 enum ib_qp_attr_mask req_param_add_eth[IB_QPT_MAX];
621 enum ib_qp_attr_mask opt_param[IB_QPT_MAX]; 664 enum ib_qp_attr_mask opt_param[IB_QPT_MAX];
622 enum ib_qp_attr_mask opt_param_add_eth[IB_QPT_MAX];
623} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { 665} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
624 [IB_QPS_RESET] = { 666 [IB_QPS_RESET] = {
625 [IB_QPS_RESET] = { .valid = 1 }, 667 [IB_QPS_RESET] = { .valid = 1 },
@@ -700,12 +742,6 @@ static const struct {
700 IB_QP_MAX_DEST_RD_ATOMIC | 742 IB_QP_MAX_DEST_RD_ATOMIC |
701 IB_QP_MIN_RNR_TIMER), 743 IB_QP_MIN_RNR_TIMER),
702 }, 744 },
703 .req_param_add_eth = {
704 [IB_QPT_RC] = (IB_QP_SMAC),
705 [IB_QPT_UC] = (IB_QP_SMAC),
706 [IB_QPT_XRC_INI] = (IB_QP_SMAC),
707 [IB_QPT_XRC_TGT] = (IB_QP_SMAC)
708 },
709 .opt_param = { 745 .opt_param = {
710 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 746 [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
711 IB_QP_QKEY), 747 IB_QP_QKEY),
@@ -726,21 +762,7 @@ static const struct {
726 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 762 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
727 IB_QP_QKEY), 763 IB_QP_QKEY),
728 }, 764 },
729 .opt_param_add_eth = { 765 },
730 [IB_QPT_RC] = (IB_QP_ALT_SMAC |
731 IB_QP_VID |
732 IB_QP_ALT_VID),
733 [IB_QPT_UC] = (IB_QP_ALT_SMAC |
734 IB_QP_VID |
735 IB_QP_ALT_VID),
736 [IB_QPT_XRC_INI] = (IB_QP_ALT_SMAC |
737 IB_QP_VID |
738 IB_QP_ALT_VID),
739 [IB_QPT_XRC_TGT] = (IB_QP_ALT_SMAC |
740 IB_QP_VID |
741 IB_QP_ALT_VID)
742 }
743 }
744 }, 766 },
745 [IB_QPS_RTR] = { 767 [IB_QPS_RTR] = {
746 [IB_QPS_RESET] = { .valid = 1 }, 768 [IB_QPS_RESET] = { .valid = 1 },
@@ -962,13 +984,6 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
962 req_param = qp_state_table[cur_state][next_state].req_param[type]; 984 req_param = qp_state_table[cur_state][next_state].req_param[type];
963 opt_param = qp_state_table[cur_state][next_state].opt_param[type]; 985 opt_param = qp_state_table[cur_state][next_state].opt_param[type];
964 986
965 if (ll == IB_LINK_LAYER_ETHERNET) {
966 req_param |= qp_state_table[cur_state][next_state].
967 req_param_add_eth[type];
968 opt_param |= qp_state_table[cur_state][next_state].
969 opt_param_add_eth[type];
970 }
971
972 if ((mask & req_param) != req_param) 987 if ((mask & req_param) != req_param)
973 return 0; 988 return 0;
974 989
@@ -979,40 +994,52 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
979} 994}
980EXPORT_SYMBOL(ib_modify_qp_is_ok); 995EXPORT_SYMBOL(ib_modify_qp_is_ok);
981 996
982int ib_resolve_eth_l2_attrs(struct ib_qp *qp, 997int ib_resolve_eth_dmac(struct ib_qp *qp,
983 struct ib_qp_attr *qp_attr, int *qp_attr_mask) 998 struct ib_qp_attr *qp_attr, int *qp_attr_mask)
984{ 999{
985 int ret = 0; 1000 int ret = 0;
986 union ib_gid sgid;
987 1001
988 if ((*qp_attr_mask & IB_QP_AV) && 1002 if (*qp_attr_mask & IB_QP_AV) {
989 (rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))) { 1003 if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) ||
990 ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num, 1004 qp_attr->ah_attr.port_num > rdma_end_port(qp->device))
991 qp_attr->ah_attr.grh.sgid_index, &sgid); 1005 return -EINVAL;
992 if (ret) 1006
993 goto out; 1007 if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))
1008 return 0;
1009
994 if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) { 1010 if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
995 rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac); 1011 rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw,
996 rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac); 1012 qp_attr->ah_attr.dmac);
997 if (!(*qp_attr_mask & IB_QP_VID))
998 qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
999 } else { 1013 } else {
1000 ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid, 1014 union ib_gid sgid;
1001 qp_attr->ah_attr.dmac, &qp_attr->vlan_id); 1015 struct ib_gid_attr sgid_attr;
1002 if (ret) 1016 int ifindex;
1003 goto out; 1017
1004 ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr->smac, NULL); 1018 ret = ib_query_gid(qp->device,
1005 if (ret) 1019 qp_attr->ah_attr.port_num,
1020 qp_attr->ah_attr.grh.sgid_index,
1021 &sgid, &sgid_attr);
1022
1023 if (ret || !sgid_attr.ndev) {
1024 if (!ret)
1025 ret = -ENXIO;
1006 goto out; 1026 goto out;
1027 }
1028
1029 ifindex = sgid_attr.ndev->ifindex;
1030
1031 ret = rdma_addr_find_dmac_by_grh(&sgid,
1032 &qp_attr->ah_attr.grh.dgid,
1033 qp_attr->ah_attr.dmac,
1034 NULL, ifindex);
1035
1036 dev_put(sgid_attr.ndev);
1007 } 1037 }
1008 *qp_attr_mask |= IB_QP_SMAC;
1009 if (qp_attr->vlan_id < 0xFFFF)
1010 *qp_attr_mask |= IB_QP_VID;
1011 } 1038 }
1012out: 1039out:
1013 return ret; 1040 return ret;
1014} 1041}
1015EXPORT_SYMBOL(ib_resolve_eth_l2_attrs); 1042EXPORT_SYMBOL(ib_resolve_eth_dmac);
1016 1043
1017 1044
1018int ib_modify_qp(struct ib_qp *qp, 1045int ib_modify_qp(struct ib_qp *qp,
@@ -1021,7 +1048,7 @@ int ib_modify_qp(struct ib_qp *qp,
1021{ 1048{
1022 int ret; 1049 int ret;
1023 1050
1024 ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask); 1051 ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask);
1025 if (ret) 1052 if (ret)
1026 return ret; 1053 return ret;
1027 1054
@@ -1253,31 +1280,6 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
1253} 1280}
1254EXPORT_SYMBOL(ib_alloc_mr); 1281EXPORT_SYMBOL(ib_alloc_mr);
1255 1282
1256struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device *device,
1257 int max_page_list_len)
1258{
1259 struct ib_fast_reg_page_list *page_list;
1260
1261 if (!device->alloc_fast_reg_page_list)
1262 return ERR_PTR(-ENOSYS);
1263
1264 page_list = device->alloc_fast_reg_page_list(device, max_page_list_len);
1265
1266 if (!IS_ERR(page_list)) {
1267 page_list->device = device;
1268 page_list->max_page_list_len = max_page_list_len;
1269 }
1270
1271 return page_list;
1272}
1273EXPORT_SYMBOL(ib_alloc_fast_reg_page_list);
1274
1275void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
1276{
1277 page_list->device->free_fast_reg_page_list(page_list);
1278}
1279EXPORT_SYMBOL(ib_free_fast_reg_page_list);
1280
1281/* Memory windows */ 1283/* Memory windows */
1282 1284
1283struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) 1285struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
@@ -1469,3 +1471,110 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
1469 mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS; 1471 mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
1470} 1472}
1471EXPORT_SYMBOL(ib_check_mr_status); 1473EXPORT_SYMBOL(ib_check_mr_status);
1474
1475/**
1476 * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list
1477 * and set it the memory region.
1478 * @mr: memory region
1479 * @sg: dma mapped scatterlist
1480 * @sg_nents: number of entries in sg
1481 * @page_size: page vector desired page size
1482 *
1483 * Constraints:
1484 * - The first sg element is allowed to have an offset.
1485 * - Each sg element must be aligned to page_size (or physically
1486 * contiguous to the previous element). In case an sg element has a
1487 * non contiguous offset, the mapping prefix will not include it.
1488 * - The last sg element is allowed to have length less than page_size.
1489 * - If sg_nents total byte length exceeds the mr max_num_sge * page_size
1490 * then only max_num_sg entries will be mapped.
1491 *
1492 * Returns the number of sg elements that were mapped to the memory region.
1493 *
1494 * After this completes successfully, the memory region
1495 * is ready for registration.
1496 */
1497int ib_map_mr_sg(struct ib_mr *mr,
1498 struct scatterlist *sg,
1499 int sg_nents,
1500 unsigned int page_size)
1501{
1502 if (unlikely(!mr->device->map_mr_sg))
1503 return -ENOSYS;
1504
1505 mr->page_size = page_size;
1506
1507 return mr->device->map_mr_sg(mr, sg, sg_nents);
1508}
1509EXPORT_SYMBOL(ib_map_mr_sg);
1510
1511/**
1512 * ib_sg_to_pages() - Convert the largest prefix of a sg list
1513 * to a page vector
1514 * @mr: memory region
1515 * @sgl: dma mapped scatterlist
1516 * @sg_nents: number of entries in sg
1517 * @set_page: driver page assignment function pointer
1518 *
1519 * Core service helper for drivers to covert the largest
1520 * prefix of given sg list to a page vector. The sg list
1521 * prefix converted is the prefix that meet the requirements
1522 * of ib_map_mr_sg.
1523 *
1524 * Returns the number of sg elements that were assigned to
1525 * a page vector.
1526 */
1527int ib_sg_to_pages(struct ib_mr *mr,
1528 struct scatterlist *sgl,
1529 int sg_nents,
1530 int (*set_page)(struct ib_mr *, u64))
1531{
1532 struct scatterlist *sg;
1533 u64 last_end_dma_addr = 0, last_page_addr = 0;
1534 unsigned int last_page_off = 0;
1535 u64 page_mask = ~((u64)mr->page_size - 1);
1536 int i;
1537
1538 mr->iova = sg_dma_address(&sgl[0]);
1539 mr->length = 0;
1540
1541 for_each_sg(sgl, sg, sg_nents, i) {
1542 u64 dma_addr = sg_dma_address(sg);
1543 unsigned int dma_len = sg_dma_len(sg);
1544 u64 end_dma_addr = dma_addr + dma_len;
1545 u64 page_addr = dma_addr & page_mask;
1546
1547 if (i && page_addr != dma_addr) {
1548 if (last_end_dma_addr != dma_addr) {
1549 /* gap */
1550 goto done;
1551
1552 } else if (last_page_off + dma_len <= mr->page_size) {
1553 /* chunk this fragment with the last */
1554 mr->length += dma_len;
1555 last_end_dma_addr += dma_len;
1556 last_page_off += dma_len;
1557 continue;
1558 } else {
1559 /* map starting from the next page */
1560 page_addr = last_page_addr + mr->page_size;
1561 dma_len -= mr->page_size - last_page_off;
1562 }
1563 }
1564
1565 do {
1566 if (unlikely(set_page(mr, page_addr)))
1567 goto done;
1568 page_addr += mr->page_size;
1569 } while (page_addr < end_dma_addr);
1570
1571 mr->length += dma_len;
1572 last_end_dma_addr = end_dma_addr;
1573 last_page_addr = end_dma_addr & page_mask;
1574 last_page_off = end_dma_addr & ~page_mask;
1575 }
1576
1577done:
1578 return i;
1579}
1580EXPORT_SYMBOL(ib_sg_to_pages);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index cf5474ae68ff..cfe404925a39 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -123,7 +123,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
123 wc->opcode = IB_WC_LOCAL_INV; 123 wc->opcode = IB_WC_LOCAL_INV;
124 break; 124 break;
125 case T3_FAST_REGISTER: 125 case T3_FAST_REGISTER:
126 wc->opcode = IB_WC_FAST_REG_MR; 126 wc->opcode = IB_WC_REG_MR;
127 break; 127 break;
128 default: 128 default:
129 printk(KERN_ERR MOD "Unexpected opcode %d " 129 printk(KERN_ERR MOD "Unexpected opcode %d "
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 93308c45f298..c34725ca0bb4 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -463,6 +463,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
463 return -EINVAL; 463 return -EINVAL;
464 464
465 mhp = to_iwch_mr(ib_mr); 465 mhp = to_iwch_mr(ib_mr);
466 kfree(mhp->pages);
466 rhp = mhp->rhp; 467 rhp = mhp->rhp;
467 mmid = mhp->attr.stag >> 8; 468 mmid = mhp->attr.stag >> 8;
468 cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, 469 cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
@@ -821,6 +822,12 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd,
821 if (!mhp) 822 if (!mhp)
822 goto err; 823 goto err;
823 824
825 mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
826 if (!mhp->pages) {
827 ret = -ENOMEM;
828 goto pl_err;
829 }
830
824 mhp->rhp = rhp; 831 mhp->rhp = rhp;
825 ret = iwch_alloc_pbl(mhp, max_num_sg); 832 ret = iwch_alloc_pbl(mhp, max_num_sg);
826 if (ret) 833 if (ret)
@@ -847,31 +854,34 @@ err3:
847err2: 854err2:
848 iwch_free_pbl(mhp); 855 iwch_free_pbl(mhp);
849err1: 856err1:
857 kfree(mhp->pages);
858pl_err:
850 kfree(mhp); 859 kfree(mhp);
851err: 860err:
852 return ERR_PTR(ret); 861 return ERR_PTR(ret);
853} 862}
854 863
855static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl( 864static int iwch_set_page(struct ib_mr *ibmr, u64 addr)
856 struct ib_device *device,
857 int page_list_len)
858{ 865{
859 struct ib_fast_reg_page_list *page_list; 866 struct iwch_mr *mhp = to_iwch_mr(ibmr);
860 867
861 page_list = kmalloc(sizeof *page_list + page_list_len * sizeof(u64), 868 if (unlikely(mhp->npages == mhp->attr.pbl_size))
862 GFP_KERNEL); 869 return -ENOMEM;
863 if (!page_list)
864 return ERR_PTR(-ENOMEM);
865 870
866 page_list->page_list = (u64 *)(page_list + 1); 871 mhp->pages[mhp->npages++] = addr;
867 page_list->max_page_list_len = page_list_len;
868 872
869 return page_list; 873 return 0;
870} 874}
871 875
872static void iwch_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list) 876static int iwch_map_mr_sg(struct ib_mr *ibmr,
877 struct scatterlist *sg,
878 int sg_nents)
873{ 879{
874 kfree(page_list); 880 struct iwch_mr *mhp = to_iwch_mr(ibmr);
881
882 mhp->npages = 0;
883
884 return ib_sg_to_pages(ibmr, sg, sg_nents, iwch_set_page);
875} 885}
876 886
877static int iwch_destroy_qp(struct ib_qp *ib_qp) 887static int iwch_destroy_qp(struct ib_qp *ib_qp)
@@ -1450,8 +1460,7 @@ int iwch_register_device(struct iwch_dev *dev)
1450 dev->ibdev.bind_mw = iwch_bind_mw; 1460 dev->ibdev.bind_mw = iwch_bind_mw;
1451 dev->ibdev.dealloc_mw = iwch_dealloc_mw; 1461 dev->ibdev.dealloc_mw = iwch_dealloc_mw;
1452 dev->ibdev.alloc_mr = iwch_alloc_mr; 1462 dev->ibdev.alloc_mr = iwch_alloc_mr;
1453 dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl; 1463 dev->ibdev.map_mr_sg = iwch_map_mr_sg;
1454 dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl;
1455 dev->ibdev.attach_mcast = iwch_multicast_attach; 1464 dev->ibdev.attach_mcast = iwch_multicast_attach;
1456 dev->ibdev.detach_mcast = iwch_multicast_detach; 1465 dev->ibdev.detach_mcast = iwch_multicast_detach;
1457 dev->ibdev.process_mad = iwch_process_mad; 1466 dev->ibdev.process_mad = iwch_process_mad;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 87c14b0c5ac0..2ac85b86a680 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -77,6 +77,8 @@ struct iwch_mr {
77 struct iwch_dev *rhp; 77 struct iwch_dev *rhp;
78 u64 kva; 78 u64 kva;
79 struct tpt_attributes attr; 79 struct tpt_attributes attr;
80 u64 *pages;
81 u32 npages;
80}; 82};
81 83
82typedef struct iwch_mw iwch_mw_handle; 84typedef struct iwch_mw iwch_mw_handle;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index b57c0befd962..d0548fc6395e 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -95,8 +95,8 @@ static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
95 wqe->write.reserved[0] = 0; 95 wqe->write.reserved[0] = 0;
96 wqe->write.reserved[1] = 0; 96 wqe->write.reserved[1] = 0;
97 wqe->write.reserved[2] = 0; 97 wqe->write.reserved[2] = 0;
98 wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); 98 wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
99 wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); 99 wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
100 100
101 if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { 101 if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
102 plen = 4; 102 plen = 4;
@@ -137,8 +137,8 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
137 wqe->read.local_inv = 0; 137 wqe->read.local_inv = 0;
138 wqe->read.reserved[0] = 0; 138 wqe->read.reserved[0] = 0;
139 wqe->read.reserved[1] = 0; 139 wqe->read.reserved[1] = 0;
140 wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); 140 wqe->read.rem_stag = cpu_to_be32(rdma_wr(wr)->rkey);
141 wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr); 141 wqe->read.rem_to = cpu_to_be64(rdma_wr(wr)->remote_addr);
142 wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey); 142 wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey);
143 wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length); 143 wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length);
144 wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr); 144 wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr);
@@ -146,27 +146,28 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
146 return 0; 146 return 0;
147} 147}
148 148
149static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr, 149static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr,
150 u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) 150 u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
151{ 151{
152 struct iwch_mr *mhp = to_iwch_mr(wr->mr);
152 int i; 153 int i;
153 __be64 *p; 154 __be64 *p;
154 155
155 if (wr->wr.fast_reg.page_list_len > T3_MAX_FASTREG_DEPTH) 156 if (mhp->npages > T3_MAX_FASTREG_DEPTH)
156 return -EINVAL; 157 return -EINVAL;
157 *wr_cnt = 1; 158 *wr_cnt = 1;
158 wqe->fastreg.stag = cpu_to_be32(wr->wr.fast_reg.rkey); 159 wqe->fastreg.stag = cpu_to_be32(wr->key);
159 wqe->fastreg.len = cpu_to_be32(wr->wr.fast_reg.length); 160 wqe->fastreg.len = cpu_to_be32(mhp->ibmr.length);
160 wqe->fastreg.va_base_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); 161 wqe->fastreg.va_base_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
161 wqe->fastreg.va_base_lo_fbo = 162 wqe->fastreg.va_base_lo_fbo =
162 cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff); 163 cpu_to_be32(mhp->ibmr.iova & 0xffffffff);
163 wqe->fastreg.page_type_perms = cpu_to_be32( 164 wqe->fastreg.page_type_perms = cpu_to_be32(
164 V_FR_PAGE_COUNT(wr->wr.fast_reg.page_list_len) | 165 V_FR_PAGE_COUNT(mhp->npages) |
165 V_FR_PAGE_SIZE(wr->wr.fast_reg.page_shift-12) | 166 V_FR_PAGE_SIZE(ilog2(wr->mr->page_size) - 12) |
166 V_FR_TYPE(TPT_VATO) | 167 V_FR_TYPE(TPT_VATO) |
167 V_FR_PERMS(iwch_ib_to_tpt_access(wr->wr.fast_reg.access_flags))); 168 V_FR_PERMS(iwch_ib_to_tpt_access(wr->access)));
168 p = &wqe->fastreg.pbl_addrs[0]; 169 p = &wqe->fastreg.pbl_addrs[0];
169 for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) { 170 for (i = 0; i < mhp->npages; i++, p++) {
170 171
171 /* If we need a 2nd WR, then set it up */ 172 /* If we need a 2nd WR, then set it up */
172 if (i == T3_MAX_FASTREG_FRAG) { 173 if (i == T3_MAX_FASTREG_FRAG) {
@@ -175,14 +176,14 @@ static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr,
175 Q_PTR2IDX((wq->wptr+1), wq->size_log2)); 176 Q_PTR2IDX((wq->wptr+1), wq->size_log2));
176 build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0, 177 build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0,
177 Q_GENBIT(wq->wptr + 1, wq->size_log2), 178 Q_GENBIT(wq->wptr + 1, wq->size_log2),
178 0, 1 + wr->wr.fast_reg.page_list_len - T3_MAX_FASTREG_FRAG, 179 0, 1 + mhp->npages - T3_MAX_FASTREG_FRAG,
179 T3_EOP); 180 T3_EOP);
180 181
181 p = &wqe->pbl_frag.pbl_addrs[0]; 182 p = &wqe->pbl_frag.pbl_addrs[0];
182 } 183 }
183 *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); 184 *p = cpu_to_be64((u64)mhp->pages[i]);
184 } 185 }
185 *flit_cnt = 5 + wr->wr.fast_reg.page_list_len; 186 *flit_cnt = 5 + mhp->npages;
186 if (*flit_cnt > 15) 187 if (*flit_cnt > 15)
187 *flit_cnt = 15; 188 *flit_cnt = 15;
188 return 0; 189 return 0;
@@ -414,10 +415,10 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
414 if (!qhp->wq.oldest_read) 415 if (!qhp->wq.oldest_read)
415 qhp->wq.oldest_read = sqp; 416 qhp->wq.oldest_read = sqp;
416 break; 417 break;
417 case IB_WR_FAST_REG_MR: 418 case IB_WR_REG_MR:
418 t3_wr_opcode = T3_WR_FASTREG; 419 t3_wr_opcode = T3_WR_FASTREG;
419 err = build_fastreg(wqe, wr, &t3_wr_flit_cnt, 420 err = build_memreg(wqe, reg_wr(wr), &t3_wr_flit_cnt,
420 &wr_cnt, &qhp->wq); 421 &wr_cnt, &qhp->wq);
421 break; 422 break;
422 case IB_WR_LOCAL_INV: 423 case IB_WR_LOCAL_INV:
423 if (wr->send_flags & IB_SEND_FENCE) 424 if (wr->send_flags & IB_SEND_FENCE)
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index debc39d2cbc2..c9cffced00ca 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -632,22 +632,18 @@ static void best_mtu(const unsigned short *mtus, unsigned short mtu,
632 632
633static int send_connect(struct c4iw_ep *ep) 633static int send_connect(struct c4iw_ep *ep)
634{ 634{
635 struct cpl_act_open_req *req; 635 struct cpl_act_open_req *req = NULL;
636 struct cpl_t5_act_open_req *t5_req; 636 struct cpl_t5_act_open_req *t5req = NULL;
637 struct cpl_act_open_req6 *req6; 637 struct cpl_t6_act_open_req *t6req = NULL;
638 struct cpl_t5_act_open_req6 *t5_req6; 638 struct cpl_act_open_req6 *req6 = NULL;
639 struct cpl_t5_act_open_req6 *t5req6 = NULL;
640 struct cpl_t6_act_open_req6 *t6req6 = NULL;
639 struct sk_buff *skb; 641 struct sk_buff *skb;
640 u64 opt0; 642 u64 opt0;
641 u32 opt2; 643 u32 opt2;
642 unsigned int mtu_idx; 644 unsigned int mtu_idx;
643 int wscale; 645 int wscale;
644 int wrlen; 646 int win, sizev4, sizev6, wrlen;
645 int sizev4 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
646 sizeof(struct cpl_act_open_req) :
647 sizeof(struct cpl_t5_act_open_req);
648 int sizev6 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
649 sizeof(struct cpl_act_open_req6) :
650 sizeof(struct cpl_t5_act_open_req6);
651 struct sockaddr_in *la = (struct sockaddr_in *) 647 struct sockaddr_in *la = (struct sockaddr_in *)
652 &ep->com.mapped_local_addr; 648 &ep->com.mapped_local_addr;
653 struct sockaddr_in *ra = (struct sockaddr_in *) 649 struct sockaddr_in *ra = (struct sockaddr_in *)
@@ -656,8 +652,28 @@ static int send_connect(struct c4iw_ep *ep)
656 &ep->com.mapped_local_addr; 652 &ep->com.mapped_local_addr;
657 struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *) 653 struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
658 &ep->com.mapped_remote_addr; 654 &ep->com.mapped_remote_addr;
659 int win;
660 int ret; 655 int ret;
656 enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
657 u32 isn = (prandom_u32() & ~7UL) - 1;
658
659 switch (CHELSIO_CHIP_VERSION(adapter_type)) {
660 case CHELSIO_T4:
661 sizev4 = sizeof(struct cpl_act_open_req);
662 sizev6 = sizeof(struct cpl_act_open_req6);
663 break;
664 case CHELSIO_T5:
665 sizev4 = sizeof(struct cpl_t5_act_open_req);
666 sizev6 = sizeof(struct cpl_t5_act_open_req6);
667 break;
668 case CHELSIO_T6:
669 sizev4 = sizeof(struct cpl_t6_act_open_req);
670 sizev6 = sizeof(struct cpl_t6_act_open_req6);
671 break;
672 default:
673 pr_err("T%d Chip is not supported\n",
674 CHELSIO_CHIP_VERSION(adapter_type));
675 return -EINVAL;
676 }
661 677
662 wrlen = (ep->com.remote_addr.ss_family == AF_INET) ? 678 wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
663 roundup(sizev4, 16) : 679 roundup(sizev4, 16) :
@@ -706,7 +722,10 @@ static int send_connect(struct c4iw_ep *ep)
706 opt2 |= SACK_EN_F; 722 opt2 |= SACK_EN_F;
707 if (wscale && enable_tcp_window_scaling) 723 if (wscale && enable_tcp_window_scaling)
708 opt2 |= WND_SCALE_EN_F; 724 opt2 |= WND_SCALE_EN_F;
709 if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { 725 if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
726 if (peer2peer)
727 isn += 4;
728
710 opt2 |= T5_OPT_2_VALID_F; 729 opt2 |= T5_OPT_2_VALID_F;
711 opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); 730 opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
712 opt2 |= T5_ISS_F; 731 opt2 |= T5_ISS_F;
@@ -718,102 +737,109 @@ static int send_connect(struct c4iw_ep *ep)
718 737
719 t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure); 738 t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
720 739
721 if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { 740 if (ep->com.remote_addr.ss_family == AF_INET) {
722 if (ep->com.remote_addr.ss_family == AF_INET) { 741 switch (CHELSIO_CHIP_VERSION(adapter_type)) {
723 req = (struct cpl_act_open_req *) skb_put(skb, wrlen); 742 case CHELSIO_T4:
743 req = (struct cpl_act_open_req *)skb_put(skb, wrlen);
724 INIT_TP_WR(req, 0); 744 INIT_TP_WR(req, 0);
725 OPCODE_TID(req) = cpu_to_be32( 745 break;
726 MK_OPCODE_TID(CPL_ACT_OPEN_REQ, 746 case CHELSIO_T5:
727 ((ep->rss_qid << 14) | ep->atid))); 747 t5req = (struct cpl_t5_act_open_req *)skb_put(skb,
728 req->local_port = la->sin_port; 748 wrlen);
729 req->peer_port = ra->sin_port; 749 INIT_TP_WR(t5req, 0);
730 req->local_ip = la->sin_addr.s_addr; 750 req = (struct cpl_act_open_req *)t5req;
731 req->peer_ip = ra->sin_addr.s_addr; 751 break;
732 req->opt0 = cpu_to_be64(opt0); 752 case CHELSIO_T6:
753 t6req = (struct cpl_t6_act_open_req *)skb_put(skb,
754 wrlen);
755 INIT_TP_WR(t6req, 0);
756 req = (struct cpl_act_open_req *)t6req;
757 t5req = (struct cpl_t5_act_open_req *)t6req;
758 break;
759 default:
760 pr_err("T%d Chip is not supported\n",
761 CHELSIO_CHIP_VERSION(adapter_type));
762 ret = -EINVAL;
763 goto clip_release;
764 }
765
766 OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
767 ((ep->rss_qid<<14) | ep->atid)));
768 req->local_port = la->sin_port;
769 req->peer_port = ra->sin_port;
770 req->local_ip = la->sin_addr.s_addr;
771 req->peer_ip = ra->sin_addr.s_addr;
772 req->opt0 = cpu_to_be64(opt0);
773
774 if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
733 req->params = cpu_to_be32(cxgb4_select_ntuple( 775 req->params = cpu_to_be32(cxgb4_select_ntuple(
734 ep->com.dev->rdev.lldi.ports[0], 776 ep->com.dev->rdev.lldi.ports[0],
735 ep->l2t)); 777 ep->l2t));
736 req->opt2 = cpu_to_be32(opt2); 778 req->opt2 = cpu_to_be32(opt2);
737 } else { 779 } else {
780 t5req->params = cpu_to_be64(FILTER_TUPLE_V(
781 cxgb4_select_ntuple(
782 ep->com.dev->rdev.lldi.ports[0],
783 ep->l2t)));
784 t5req->rsvd = cpu_to_be32(isn);
785 PDBG("%s snd_isn %u\n", __func__, t5req->rsvd);
786 t5req->opt2 = cpu_to_be32(opt2);
787 }
788 } else {
789 switch (CHELSIO_CHIP_VERSION(adapter_type)) {
790 case CHELSIO_T4:
738 req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen); 791 req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen);
739
740 INIT_TP_WR(req6, 0); 792 INIT_TP_WR(req6, 0);
741 OPCODE_TID(req6) = cpu_to_be32( 793 break;
742 MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, 794 case CHELSIO_T5:
743 ((ep->rss_qid<<14)|ep->atid))); 795 t5req6 = (struct cpl_t5_act_open_req6 *)skb_put(skb,
744 req6->local_port = la6->sin6_port; 796 wrlen);
745 req6->peer_port = ra6->sin6_port; 797 INIT_TP_WR(t5req6, 0);
746 req6->local_ip_hi = *((__be64 *) 798 req6 = (struct cpl_act_open_req6 *)t5req6;
747 (la6->sin6_addr.s6_addr)); 799 break;
748 req6->local_ip_lo = *((__be64 *) 800 case CHELSIO_T6:
749 (la6->sin6_addr.s6_addr + 8)); 801 t6req6 = (struct cpl_t6_act_open_req6 *)skb_put(skb,
750 req6->peer_ip_hi = *((__be64 *) 802 wrlen);
751 (ra6->sin6_addr.s6_addr)); 803 INIT_TP_WR(t6req6, 0);
752 req6->peer_ip_lo = *((__be64 *) 804 req6 = (struct cpl_act_open_req6 *)t6req6;
753 (ra6->sin6_addr.s6_addr + 8)); 805 t5req6 = (struct cpl_t5_act_open_req6 *)t6req6;
754 req6->opt0 = cpu_to_be64(opt0); 806 break;
807 default:
808 pr_err("T%d Chip is not supported\n",
809 CHELSIO_CHIP_VERSION(adapter_type));
810 ret = -EINVAL;
811 goto clip_release;
812 }
813
814 OPCODE_TID(req6) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
815 ((ep->rss_qid<<14)|ep->atid)));
816 req6->local_port = la6->sin6_port;
817 req6->peer_port = ra6->sin6_port;
818 req6->local_ip_hi = *((__be64 *)(la6->sin6_addr.s6_addr));
819 req6->local_ip_lo = *((__be64 *)(la6->sin6_addr.s6_addr + 8));
820 req6->peer_ip_hi = *((__be64 *)(ra6->sin6_addr.s6_addr));
821 req6->peer_ip_lo = *((__be64 *)(ra6->sin6_addr.s6_addr + 8));
822 req6->opt0 = cpu_to_be64(opt0);
823
824 if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
755 req6->params = cpu_to_be32(cxgb4_select_ntuple( 825 req6->params = cpu_to_be32(cxgb4_select_ntuple(
756 ep->com.dev->rdev.lldi.ports[0], 826 ep->com.dev->rdev.lldi.ports[0],
757 ep->l2t)); 827 ep->l2t));
758 req6->opt2 = cpu_to_be32(opt2); 828 req6->opt2 = cpu_to_be32(opt2);
759 }
760 } else {
761 u32 isn = (prandom_u32() & ~7UL) - 1;
762
763 if (peer2peer)
764 isn += 4;
765
766 if (ep->com.remote_addr.ss_family == AF_INET) {
767 t5_req = (struct cpl_t5_act_open_req *)
768 skb_put(skb, wrlen);
769 INIT_TP_WR(t5_req, 0);
770 OPCODE_TID(t5_req) = cpu_to_be32(
771 MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
772 ((ep->rss_qid << 14) | ep->atid)));
773 t5_req->local_port = la->sin_port;
774 t5_req->peer_port = ra->sin_port;
775 t5_req->local_ip = la->sin_addr.s_addr;
776 t5_req->peer_ip = ra->sin_addr.s_addr;
777 t5_req->opt0 = cpu_to_be64(opt0);
778 t5_req->params = cpu_to_be64(FILTER_TUPLE_V(
779 cxgb4_select_ntuple(
780 ep->com.dev->rdev.lldi.ports[0],
781 ep->l2t)));
782 t5_req->rsvd = cpu_to_be32(isn);
783 PDBG("%s snd_isn %u\n", __func__,
784 be32_to_cpu(t5_req->rsvd));
785 t5_req->opt2 = cpu_to_be32(opt2);
786 } else { 829 } else {
787 t5_req6 = (struct cpl_t5_act_open_req6 *) 830 t5req6->params = cpu_to_be64(FILTER_TUPLE_V(
788 skb_put(skb, wrlen); 831 cxgb4_select_ntuple(
789 INIT_TP_WR(t5_req6, 0);
790 OPCODE_TID(t5_req6) = cpu_to_be32(
791 MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
792 ((ep->rss_qid<<14)|ep->atid)));
793 t5_req6->local_port = la6->sin6_port;
794 t5_req6->peer_port = ra6->sin6_port;
795 t5_req6->local_ip_hi = *((__be64 *)
796 (la6->sin6_addr.s6_addr));
797 t5_req6->local_ip_lo = *((__be64 *)
798 (la6->sin6_addr.s6_addr + 8));
799 t5_req6->peer_ip_hi = *((__be64 *)
800 (ra6->sin6_addr.s6_addr));
801 t5_req6->peer_ip_lo = *((__be64 *)
802 (ra6->sin6_addr.s6_addr + 8));
803 t5_req6->opt0 = cpu_to_be64(opt0);
804 t5_req6->params = cpu_to_be64(FILTER_TUPLE_V(
805 cxgb4_select_ntuple(
806 ep->com.dev->rdev.lldi.ports[0], 832 ep->com.dev->rdev.lldi.ports[0],
807 ep->l2t))); 833 ep->l2t)));
808 t5_req6->rsvd = cpu_to_be32(isn); 834 t5req6->rsvd = cpu_to_be32(isn);
809 PDBG("%s snd_isn %u\n", __func__, 835 PDBG("%s snd_isn %u\n", __func__, t5req6->rsvd);
810 be32_to_cpu(t5_req6->rsvd)); 836 t5req6->opt2 = cpu_to_be32(opt2);
811 t5_req6->opt2 = cpu_to_be32(opt2);
812 } 837 }
813 } 838 }
814 839
815 set_bit(ACT_OPEN_REQ, &ep->com.history); 840 set_bit(ACT_OPEN_REQ, &ep->com.history);
816 ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 841 ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
842clip_release:
817 if (ret && ep->com.remote_addr.ss_family == AF_INET6) 843 if (ret && ep->com.remote_addr.ss_family == AF_INET6)
818 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0], 844 cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
819 (const u32 *)&la6->sin6_addr.s6_addr, 1); 845 (const u32 *)&la6->sin6_addr.s6_addr, 1);
@@ -1196,6 +1222,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
1196 if ((status == 0) || (status == -ECONNREFUSED)) { 1222 if ((status == 0) || (status == -ECONNREFUSED)) {
1197 if (!ep->tried_with_mpa_v1) { 1223 if (!ep->tried_with_mpa_v1) {
1198 /* this means MPA_v2 is used */ 1224 /* this means MPA_v2 is used */
1225 event.ord = ep->ird;
1226 event.ird = ep->ord;
1199 event.private_data_len = ep->plen - 1227 event.private_data_len = ep->plen -
1200 sizeof(struct mpa_v2_conn_params); 1228 sizeof(struct mpa_v2_conn_params);
1201 event.private_data = ep->mpa_pkt + 1229 event.private_data = ep->mpa_pkt +
@@ -1203,6 +1231,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
1203 sizeof(struct mpa_v2_conn_params); 1231 sizeof(struct mpa_v2_conn_params);
1204 } else { 1232 } else {
1205 /* this means MPA_v1 is used */ 1233 /* this means MPA_v1 is used */
1234 event.ord = cur_max_read_depth(ep->com.dev);
1235 event.ird = cur_max_read_depth(ep->com.dev);
1206 event.private_data_len = ep->plen; 1236 event.private_data_len = ep->plen;
1207 event.private_data = ep->mpa_pkt + 1237 event.private_data = ep->mpa_pkt +
1208 sizeof(struct mpa_message); 1238 sizeof(struct mpa_message);
@@ -1265,8 +1295,8 @@ static void established_upcall(struct c4iw_ep *ep)
1265 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 1295 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1266 memset(&event, 0, sizeof(event)); 1296 memset(&event, 0, sizeof(event));
1267 event.event = IW_CM_EVENT_ESTABLISHED; 1297 event.event = IW_CM_EVENT_ESTABLISHED;
1268 event.ird = ep->ird; 1298 event.ird = ep->ord;
1269 event.ord = ep->ord; 1299 event.ord = ep->ird;
1270 if (ep->com.cm_id) { 1300 if (ep->com.cm_id) {
1271 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 1301 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1272 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1302 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
@@ -1898,7 +1928,7 @@ static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
1898 1928
1899static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, 1929static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
1900 struct dst_entry *dst, struct c4iw_dev *cdev, 1930 struct dst_entry *dst, struct c4iw_dev *cdev,
1901 bool clear_mpa_v1) 1931 bool clear_mpa_v1, enum chip_type adapter_type)
1902{ 1932{
1903 struct neighbour *n; 1933 struct neighbour *n;
1904 int err, step; 1934 int err, step;
@@ -1933,7 +1963,8 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
1933 goto out; 1963 goto out;
1934 ep->mtu = pdev->mtu; 1964 ep->mtu = pdev->mtu;
1935 ep->tx_chan = cxgb4_port_chan(pdev); 1965 ep->tx_chan = cxgb4_port_chan(pdev);
1936 ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; 1966 ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
1967 cxgb4_port_viid(pdev));
1937 step = cdev->rdev.lldi.ntxq / 1968 step = cdev->rdev.lldi.ntxq /
1938 cdev->rdev.lldi.nchan; 1969 cdev->rdev.lldi.nchan;
1939 ep->txq_idx = cxgb4_port_idx(pdev) * step; 1970 ep->txq_idx = cxgb4_port_idx(pdev) * step;
@@ -1952,7 +1983,8 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
1952 goto out; 1983 goto out;
1953 ep->mtu = dst_mtu(dst); 1984 ep->mtu = dst_mtu(dst);
1954 ep->tx_chan = cxgb4_port_chan(pdev); 1985 ep->tx_chan = cxgb4_port_chan(pdev);
1955 ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; 1986 ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
1987 cxgb4_port_viid(pdev));
1956 step = cdev->rdev.lldi.ntxq / 1988 step = cdev->rdev.lldi.ntxq /
1957 cdev->rdev.lldi.nchan; 1989 cdev->rdev.lldi.nchan;
1958 ep->txq_idx = cxgb4_port_idx(pdev) * step; 1990 ep->txq_idx = cxgb4_port_idx(pdev) * step;
@@ -2025,7 +2057,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
2025 err = -EHOSTUNREACH; 2057 err = -EHOSTUNREACH;
2026 goto fail3; 2058 goto fail3;
2027 } 2059 }
2028 err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false); 2060 err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false,
2061 ep->com.dev->rdev.lldi.adapter_type);
2029 if (err) { 2062 if (err) {
2030 pr_err("%s - cannot alloc l2e.\n", __func__); 2063 pr_err("%s - cannot alloc l2e.\n", __func__);
2031 goto fail4; 2064 goto fail4;
@@ -2213,13 +2246,14 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
2213 int wscale; 2246 int wscale;
2214 struct cpl_t5_pass_accept_rpl *rpl5 = NULL; 2247 struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
2215 int win; 2248 int win;
2249 enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
2216 2250
2217 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 2251 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2218 BUG_ON(skb_cloned(skb)); 2252 BUG_ON(skb_cloned(skb));
2219 2253
2220 skb_get(skb); 2254 skb_get(skb);
2221 rpl = cplhdr(skb); 2255 rpl = cplhdr(skb);
2222 if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { 2256 if (!is_t4(adapter_type)) {
2223 skb_trim(skb, roundup(sizeof(*rpl5), 16)); 2257 skb_trim(skb, roundup(sizeof(*rpl5), 16));
2224 rpl5 = (void *)rpl; 2258 rpl5 = (void *)rpl;
2225 INIT_TP_WR(rpl5, ep->hwtid); 2259 INIT_TP_WR(rpl5, ep->hwtid);
@@ -2266,12 +2300,16 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
2266 const struct tcphdr *tcph; 2300 const struct tcphdr *tcph;
2267 u32 hlen = ntohl(req->hdr_len); 2301 u32 hlen = ntohl(req->hdr_len);
2268 2302
2269 tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) + 2303 if (CHELSIO_CHIP_VERSION(adapter_type) <= CHELSIO_T5)
2270 IP_HDR_LEN_G(hlen); 2304 tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) +
2305 IP_HDR_LEN_G(hlen);
2306 else
2307 tcph = (const void *)(req + 1) +
2308 T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen);
2271 if (tcph->ece && tcph->cwr) 2309 if (tcph->ece && tcph->cwr)
2272 opt2 |= CCTRL_ECN_V(1); 2310 opt2 |= CCTRL_ECN_V(1);
2273 } 2311 }
2274 if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { 2312 if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
2275 u32 isn = (prandom_u32() & ~7UL) - 1; 2313 u32 isn = (prandom_u32() & ~7UL) - 1;
2276 opt2 |= T5_OPT_2_VALID_F; 2314 opt2 |= T5_OPT_2_VALID_F;
2277 opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE); 2315 opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
@@ -2302,12 +2340,16 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
2302 return; 2340 return;
2303} 2341}
2304 2342
2305static void get_4tuple(struct cpl_pass_accept_req *req, int *iptype, 2343static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type,
2306 __u8 *local_ip, __u8 *peer_ip, 2344 int *iptype, __u8 *local_ip, __u8 *peer_ip,
2307 __be16 *local_port, __be16 *peer_port) 2345 __be16 *local_port, __be16 *peer_port)
2308{ 2346{
2309 int eth_len = ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); 2347 int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
2310 int ip_len = IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); 2348 ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
2349 T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
2350 int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
2351 IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
2352 T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
2311 struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); 2353 struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
2312 struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); 2354 struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
2313 struct tcphdr *tcp = (struct tcphdr *) 2355 struct tcphdr *tcp = (struct tcphdr *)
@@ -2362,7 +2404,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2362 goto reject; 2404 goto reject;
2363 } 2405 }
2364 2406
2365 get_4tuple(req, &iptype, local_ip, peer_ip, &local_port, &peer_port); 2407 get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype,
2408 local_ip, peer_ip, &local_port, &peer_port);
2366 2409
2367 /* Find output route */ 2410 /* Find output route */
2368 if (iptype == 4) { 2411 if (iptype == 4) {
@@ -2397,7 +2440,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2397 goto reject; 2440 goto reject;
2398 } 2441 }
2399 2442
2400 err = import_ep(child_ep, iptype, peer_ip, dst, dev, false); 2443 err = import_ep(child_ep, iptype, peer_ip, dst, dev, false,
2444 parent_ep->com.dev->rdev.lldi.adapter_type);
2401 if (err) { 2445 if (err) {
2402 printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", 2446 printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
2403 __func__); 2447 __func__);
@@ -2929,7 +2973,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2929 } else { 2973 } else {
2930 if (peer2peer && 2974 if (peer2peer &&
2931 (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) && 2975 (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) &&
2932 (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ord == 0) 2976 (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0)
2933 ep->ird = 1; 2977 ep->ird = 1;
2934 } 2978 }
2935 2979
@@ -3189,7 +3233,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3189 goto fail2; 3233 goto fail2;
3190 } 3234 }
3191 3235
3192 err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true); 3236 err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
3237 ep->com.dev->rdev.lldi.adapter_type);
3193 if (err) { 3238 if (err) {
3194 printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); 3239 printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
3195 goto fail3; 3240 goto fail3;
@@ -3260,6 +3305,10 @@ static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3260 sin->sin_addr.s_addr, sin->sin_port, 0, 3305 sin->sin_addr.s_addr, sin->sin_port, 0,
3261 ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0); 3306 ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0);
3262 if (err == -EBUSY) { 3307 if (err == -EBUSY) {
3308 if (c4iw_fatal_error(&ep->com.dev->rdev)) {
3309 err = -EIO;
3310 break;
3311 }
3263 set_current_state(TASK_UNINTERRUPTIBLE); 3312 set_current_state(TASK_UNINTERRUPTIBLE);
3264 schedule_timeout(usecs_to_jiffies(100)); 3313 schedule_timeout(usecs_to_jiffies(100));
3265 } 3314 }
@@ -3593,20 +3642,23 @@ static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
3593 3642
3594static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) 3643static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
3595{ 3644{
3596 u32 l2info; 3645 __be32 l2info;
3597 u16 vlantag, len, hdr_len, eth_hdr_len; 3646 __be16 hdr_len, vlantag, len;
3647 u16 eth_hdr_len;
3648 int tcp_hdr_len, ip_hdr_len;
3598 u8 intf; 3649 u8 intf;
3599 struct cpl_rx_pkt *cpl = cplhdr(skb); 3650 struct cpl_rx_pkt *cpl = cplhdr(skb);
3600 struct cpl_pass_accept_req *req; 3651 struct cpl_pass_accept_req *req;
3601 struct tcp_options_received tmp_opt; 3652 struct tcp_options_received tmp_opt;
3602 struct c4iw_dev *dev; 3653 struct c4iw_dev *dev;
3654 enum chip_type type;
3603 3655
3604 dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); 3656 dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
3605 /* Store values from cpl_rx_pkt in temporary location. */ 3657 /* Store values from cpl_rx_pkt in temporary location. */
3606 vlantag = (__force u16) cpl->vlan; 3658 vlantag = cpl->vlan;
3607 len = (__force u16) cpl->len; 3659 len = cpl->len;
3608 l2info = (__force u32) cpl->l2info; 3660 l2info = cpl->l2info;
3609 hdr_len = (__force u16) cpl->hdr_len; 3661 hdr_len = cpl->hdr_len;
3610 intf = cpl->iff; 3662 intf = cpl->iff;
3611 3663
3612 __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header)); 3664 __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header));
@@ -3623,20 +3675,28 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
3623 memset(req, 0, sizeof(*req)); 3675 memset(req, 0, sizeof(*req));
3624 req->l2info = cpu_to_be16(SYN_INTF_V(intf) | 3676 req->l2info = cpu_to_be16(SYN_INTF_V(intf) |
3625 SYN_MAC_IDX_V(RX_MACIDX_G( 3677 SYN_MAC_IDX_V(RX_MACIDX_G(
3626 (__force int) htonl(l2info))) | 3678 be32_to_cpu(l2info))) |
3627 SYN_XACT_MATCH_F); 3679 SYN_XACT_MATCH_F);
3628 eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ? 3680 type = dev->rdev.lldi.adapter_type;
3629 RX_ETHHDR_LEN_G((__force int)htonl(l2info)) : 3681 tcp_hdr_len = RX_TCPHDR_LEN_G(be16_to_cpu(hdr_len));
3630 RX_T5_ETHHDR_LEN_G((__force int)htonl(l2info)); 3682 ip_hdr_len = RX_IPHDR_LEN_G(be16_to_cpu(hdr_len));
3631 req->hdr_len = cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G( 3683 req->hdr_len =
3632 (__force int) htonl(l2info))) | 3684 cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(be32_to_cpu(l2info))));
3633 TCP_HDR_LEN_V(RX_TCPHDR_LEN_G( 3685 if (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) {
3634 (__force int) htons(hdr_len))) | 3686 eth_hdr_len = is_t4(type) ?
3635 IP_HDR_LEN_V(RX_IPHDR_LEN_G( 3687 RX_ETHHDR_LEN_G(be32_to_cpu(l2info)) :
3636 (__force int) htons(hdr_len))) | 3688 RX_T5_ETHHDR_LEN_G(be32_to_cpu(l2info));
3637 ETH_HDR_LEN_V(RX_ETHHDR_LEN_G(eth_hdr_len))); 3689 req->hdr_len |= cpu_to_be32(TCP_HDR_LEN_V(tcp_hdr_len) |
3638 req->vlan = (__force __be16) vlantag; 3690 IP_HDR_LEN_V(ip_hdr_len) |
3639 req->len = (__force __be16) len; 3691 ETH_HDR_LEN_V(eth_hdr_len));
3692 } else { /* T6 and later */
3693 eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(l2info));
3694 req->hdr_len |= cpu_to_be32(T6_TCP_HDR_LEN_V(tcp_hdr_len) |
3695 T6_IP_HDR_LEN_V(ip_hdr_len) |
3696 T6_ETH_HDR_LEN_V(eth_hdr_len));
3697 }
3698 req->vlan = vlantag;
3699 req->len = len;
3640 req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) | 3700 req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) |
3641 PASS_OPEN_TOS_V(tos)); 3701 PASS_OPEN_TOS_V(tos));
3642 req->tcpopt.mss = htons(tmp_opt.mss_clamp); 3702 req->tcpopt.mss = htons(tmp_opt.mss_clamp);
@@ -3755,9 +3815,22 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
3755 goto reject; 3815 goto reject;
3756 } 3816 }
3757 3817
3758 eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ? 3818 switch (CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)) {
3759 RX_ETHHDR_LEN_G(htonl(cpl->l2info)) : 3819 case CHELSIO_T4:
3760 RX_T5_ETHHDR_LEN_G(htonl(cpl->l2info)); 3820 eth_hdr_len = RX_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
3821 break;
3822 case CHELSIO_T5:
3823 eth_hdr_len = RX_T5_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
3824 break;
3825 case CHELSIO_T6:
3826 eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
3827 break;
3828 default:
3829 pr_err("T%d Chip is not supported\n",
3830 CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type));
3831 goto reject;
3832 }
3833
3761 if (eth_hdr_len == ETH_HLEN) { 3834 if (eth_hdr_len == ETH_HLEN) {
3762 eh = (struct ethhdr *)(req + 1); 3835 eh = (struct ethhdr *)(req + 1);
3763 iph = (struct iphdr *)(eh + 1); 3836 iph = (struct iphdr *)(eh + 1);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 92d518382a9f..de9cd6901752 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -752,7 +752,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
752 wc->opcode = IB_WC_LOCAL_INV; 752 wc->opcode = IB_WC_LOCAL_INV;
753 break; 753 break;
754 case FW_RI_FAST_REGISTER: 754 case FW_RI_FAST_REGISTER:
755 wc->opcode = IB_WC_FAST_REG_MR; 755 wc->opcode = IB_WC_REG_MR;
756 break; 756 break;
757 default: 757 default:
758 printk(KERN_ERR MOD "Unexpected opcode %d " 758 printk(KERN_ERR MOD "Unexpected opcode %d "
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 1a297391b54c..58fce1742b8d 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -962,12 +962,12 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
962 devp->rdev.lldi.sge_egrstatuspagesize; 962 devp->rdev.lldi.sge_egrstatuspagesize;
963 963
964 /* 964 /*
965 * For T5 devices, we map all of BAR2 with WC. 965 * For T5/T6 devices, we map all of BAR2 with WC.
966 * For T4 devices with onchip qp mem, we map only that part 966 * For T4 devices with onchip qp mem, we map only that part
967 * of BAR2 with WC. 967 * of BAR2 with WC.
968 */ 968 */
969 devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2); 969 devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2);
970 if (is_t5(devp->rdev.lldi.adapter_type)) { 970 if (!is_t4(devp->rdev.lldi.adapter_type)) {
971 devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa, 971 devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
972 pci_resource_len(devp->rdev.lldi.pdev, 2)); 972 pci_resource_len(devp->rdev.lldi.pdev, 2));
973 if (!devp->rdev.bar2_kva) { 973 if (!devp->rdev.bar2_kva) {
@@ -1267,11 +1267,9 @@ static int enable_qp_db(int id, void *p, void *data)
1267static void resume_rc_qp(struct c4iw_qp *qp) 1267static void resume_rc_qp(struct c4iw_qp *qp)
1268{ 1268{
1269 spin_lock(&qp->lock); 1269 spin_lock(&qp->lock);
1270 t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, 1270 t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, NULL);
1271 is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
1272 qp->wq.sq.wq_pidx_inc = 0; 1271 qp->wq.sq.wq_pidx_inc = 0;
1273 t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, 1272 t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, NULL);
1274 is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
1275 qp->wq.rq.wq_pidx_inc = 0; 1273 qp->wq.rq.wq_pidx_inc = 0;
1276 spin_unlock(&qp->lock); 1274 spin_unlock(&qp->lock);
1277} 1275}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index c7bb38c931a5..00e55faa086a 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -386,6 +386,10 @@ struct c4iw_mr {
386 struct c4iw_dev *rhp; 386 struct c4iw_dev *rhp;
387 u64 kva; 387 u64 kva;
388 struct tpt_attributes attr; 388 struct tpt_attributes attr;
389 u64 *mpl;
390 dma_addr_t mpl_addr;
391 u32 max_mpl_len;
392 u32 mpl_len;
389}; 393};
390 394
391static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr) 395static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr)
@@ -405,20 +409,6 @@ static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw)
405 return container_of(ibmw, struct c4iw_mw, ibmw); 409 return container_of(ibmw, struct c4iw_mw, ibmw);
406} 410}
407 411
408struct c4iw_fr_page_list {
409 struct ib_fast_reg_page_list ibpl;
410 DEFINE_DMA_UNMAP_ADDR(mapping);
411 dma_addr_t dma_addr;
412 struct c4iw_dev *dev;
413 int pll_len;
414};
415
416static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list(
417 struct ib_fast_reg_page_list *ibpl)
418{
419 return container_of(ibpl, struct c4iw_fr_page_list, ibpl);
420}
421
422struct c4iw_cq { 412struct c4iw_cq {
423 struct ib_cq ibcq; 413 struct ib_cq ibcq;
424 struct c4iw_dev *rhp; 414 struct c4iw_dev *rhp;
@@ -966,13 +956,12 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
966int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); 956int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
967void c4iw_qp_add_ref(struct ib_qp *qp); 957void c4iw_qp_add_ref(struct ib_qp *qp);
968void c4iw_qp_rem_ref(struct ib_qp *qp); 958void c4iw_qp_rem_ref(struct ib_qp *qp);
969void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list);
970struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(
971 struct ib_device *device,
972 int page_list_len);
973struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, 959struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
974 enum ib_mr_type mr_type, 960 enum ib_mr_type mr_type,
975 u32 max_num_sg); 961 u32 max_num_sg);
962int c4iw_map_mr_sg(struct ib_mr *ibmr,
963 struct scatterlist *sg,
964 int sg_nents);
976int c4iw_dealloc_mw(struct ib_mw *mw); 965int c4iw_dealloc_mw(struct ib_mw *mw);
977struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); 966struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
978struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, 967struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 026b91ebd5e2..e1629ab58db7 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -144,7 +144,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
144 if (i == (num_wqe-1)) { 144 if (i == (num_wqe-1)) {
145 req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) | 145 req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) |
146 FW_WR_COMPL_F); 146 FW_WR_COMPL_F);
147 req->wr.wr_lo = (__force __be64)&wr_wait; 147 req->wr.wr_lo = (__force __be64)(unsigned long)&wr_wait;
148 } else 148 } else
149 req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR)); 149 req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR));
150 req->wr.wr_mid = cpu_to_be32( 150 req->wr.wr_mid = cpu_to_be32(
@@ -863,6 +863,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
863 u32 mmid; 863 u32 mmid;
864 u32 stag = 0; 864 u32 stag = 0;
865 int ret = 0; 865 int ret = 0;
866 int length = roundup(max_num_sg * sizeof(u64), 32);
866 867
867 if (mr_type != IB_MR_TYPE_MEM_REG || 868 if (mr_type != IB_MR_TYPE_MEM_REG ||
868 max_num_sg > t4_max_fr_depth(use_dsgl)) 869 max_num_sg > t4_max_fr_depth(use_dsgl))
@@ -876,6 +877,14 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
876 goto err; 877 goto err;
877 } 878 }
878 879
880 mhp->mpl = dma_alloc_coherent(&rhp->rdev.lldi.pdev->dev,
881 length, &mhp->mpl_addr, GFP_KERNEL);
882 if (!mhp->mpl) {
883 ret = -ENOMEM;
884 goto err_mpl;
885 }
886 mhp->max_mpl_len = length;
887
879 mhp->rhp = rhp; 888 mhp->rhp = rhp;
880 ret = alloc_pbl(mhp, max_num_sg); 889 ret = alloc_pbl(mhp, max_num_sg);
881 if (ret) 890 if (ret)
@@ -905,54 +914,35 @@ err2:
905 c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, 914 c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
906 mhp->attr.pbl_size << 3); 915 mhp->attr.pbl_size << 3);
907err1: 916err1:
917 dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev,
918 mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr);
919err_mpl:
908 kfree(mhp); 920 kfree(mhp);
909err: 921err:
910 return ERR_PTR(ret); 922 return ERR_PTR(ret);
911} 923}
912 924
913struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device, 925static int c4iw_set_page(struct ib_mr *ibmr, u64 addr)
914 int page_list_len)
915{ 926{
916 struct c4iw_fr_page_list *c4pl; 927 struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
917 struct c4iw_dev *dev = to_c4iw_dev(device);
918 dma_addr_t dma_addr;
919 int pll_len = roundup(page_list_len * sizeof(u64), 32);
920
921 c4pl = kmalloc(sizeof(*c4pl), GFP_KERNEL);
922 if (!c4pl)
923 return ERR_PTR(-ENOMEM);
924 928
925 c4pl->ibpl.page_list = dma_alloc_coherent(&dev->rdev.lldi.pdev->dev, 929 if (unlikely(mhp->mpl_len == mhp->max_mpl_len))
926 pll_len, &dma_addr, 930 return -ENOMEM;
927 GFP_KERNEL);
928 if (!c4pl->ibpl.page_list) {
929 kfree(c4pl);
930 return ERR_PTR(-ENOMEM);
931 }
932 dma_unmap_addr_set(c4pl, mapping, dma_addr);
933 c4pl->dma_addr = dma_addr;
934 c4pl->dev = dev;
935 c4pl->pll_len = pll_len;
936 931
937 PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", 932 mhp->mpl[mhp->mpl_len++] = addr;
938 __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list,
939 &c4pl->dma_addr);
940 933
941 return &c4pl->ibpl; 934 return 0;
942} 935}
943 936
944void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl) 937int c4iw_map_mr_sg(struct ib_mr *ibmr,
938 struct scatterlist *sg,
939 int sg_nents)
945{ 940{
946 struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl); 941 struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
947 942
948 PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", 943 mhp->mpl_len = 0;
949 __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list,
950 &c4pl->dma_addr);
951 944
952 dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev, 945 return ib_sg_to_pages(ibmr, sg, sg_nents, c4iw_set_page);
953 c4pl->pll_len,
954 c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping));
955 kfree(c4pl);
956} 946}
957 947
958int c4iw_dereg_mr(struct ib_mr *ib_mr) 948int c4iw_dereg_mr(struct ib_mr *ib_mr)
@@ -970,6 +960,9 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
970 rhp = mhp->rhp; 960 rhp = mhp->rhp;
971 mmid = mhp->attr.stag >> 8; 961 mmid = mhp->attr.stag >> 8;
972 remove_handle(rhp, &rhp->mmidr, mmid); 962 remove_handle(rhp, &rhp->mmidr, mmid);
963 if (mhp->mpl)
964 dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev,
965 mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr);
973 dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, 966 dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
974 mhp->attr.pbl_addr); 967 mhp->attr.pbl_addr);
975 if (mhp->attr.pbl_size) 968 if (mhp->attr.pbl_size)
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 7746113552e7..0a7d99818b17 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -209,7 +209,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
209 if (addr >= rdev->oc_mw_pa) 209 if (addr >= rdev->oc_mw_pa)
210 vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); 210 vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot);
211 else { 211 else {
212 if (is_t5(rdev->lldi.adapter_type)) 212 if (!is_t4(rdev->lldi.adapter_type))
213 vma->vm_page_prot = 213 vma->vm_page_prot =
214 t4_pgprot_wc(vma->vm_page_prot); 214 t4_pgprot_wc(vma->vm_page_prot);
215 else 215 else
@@ -557,8 +557,7 @@ int c4iw_register_device(struct c4iw_dev *dev)
557 dev->ibdev.bind_mw = c4iw_bind_mw; 557 dev->ibdev.bind_mw = c4iw_bind_mw;
558 dev->ibdev.dealloc_mw = c4iw_dealloc_mw; 558 dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
559 dev->ibdev.alloc_mr = c4iw_alloc_mr; 559 dev->ibdev.alloc_mr = c4iw_alloc_mr;
560 dev->ibdev.alloc_fast_reg_page_list = c4iw_alloc_fastreg_pbl; 560 dev->ibdev.map_mr_sg = c4iw_map_mr_sg;
561 dev->ibdev.free_fast_reg_page_list = c4iw_free_fastreg_pbl;
562 dev->ibdev.attach_mcast = c4iw_multicast_attach; 561 dev->ibdev.attach_mcast = c4iw_multicast_attach;
563 dev->ibdev.detach_mcast = c4iw_multicast_detach; 562 dev->ibdev.detach_mcast = c4iw_multicast_detach;
564 dev->ibdev.process_mad = c4iw_process_mad; 563 dev->ibdev.process_mad = c4iw_process_mad;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 6517e1208ccb..aa515afee724 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -528,8 +528,8 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
528 if (wr->num_sge > T4_MAX_SEND_SGE) 528 if (wr->num_sge > T4_MAX_SEND_SGE)
529 return -EINVAL; 529 return -EINVAL;
530 wqe->write.r2 = 0; 530 wqe->write.r2 = 0;
531 wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); 531 wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
532 wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); 532 wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
533 if (wr->num_sge) { 533 if (wr->num_sge) {
534 if (wr->send_flags & IB_SEND_INLINE) { 534 if (wr->send_flags & IB_SEND_INLINE) {
535 ret = build_immd(sq, wqe->write.u.immd_src, wr, 535 ret = build_immd(sq, wqe->write.u.immd_src, wr,
@@ -566,10 +566,10 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
566 if (wr->num_sge > 1) 566 if (wr->num_sge > 1)
567 return -EINVAL; 567 return -EINVAL;
568 if (wr->num_sge) { 568 if (wr->num_sge) {
569 wqe->read.stag_src = cpu_to_be32(wr->wr.rdma.rkey); 569 wqe->read.stag_src = cpu_to_be32(rdma_wr(wr)->rkey);
570 wqe->read.to_src_hi = cpu_to_be32((u32)(wr->wr.rdma.remote_addr 570 wqe->read.to_src_hi = cpu_to_be32((u32)(rdma_wr(wr)->remote_addr
571 >> 32)); 571 >> 32));
572 wqe->read.to_src_lo = cpu_to_be32((u32)wr->wr.rdma.remote_addr); 572 wqe->read.to_src_lo = cpu_to_be32((u32)rdma_wr(wr)->remote_addr);
573 wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey); 573 wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey);
574 wqe->read.plen = cpu_to_be32(wr->sg_list[0].length); 574 wqe->read.plen = cpu_to_be32(wr->sg_list[0].length);
575 wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr 575 wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr
@@ -605,47 +605,41 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
605 return 0; 605 return 0;
606} 606}
607 607
608static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, 608static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
609 struct ib_send_wr *wr, u8 *len16, u8 t5dev) 609 struct ib_reg_wr *wr, u8 *len16, u8 t5dev)
610{ 610{
611 611 struct c4iw_mr *mhp = to_c4iw_mr(wr->mr);
612 struct fw_ri_immd *imdp; 612 struct fw_ri_immd *imdp;
613 __be64 *p; 613 __be64 *p;
614 int i; 614 int i;
615 int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32); 615 int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32);
616 int rem; 616 int rem;
617 617
618 if (wr->wr.fast_reg.page_list_len > 618 if (mhp->mpl_len > t4_max_fr_depth(use_dsgl))
619 t4_max_fr_depth(use_dsgl))
620 return -EINVAL; 619 return -EINVAL;
621 620
622 wqe->fr.qpbinde_to_dcacpu = 0; 621 wqe->fr.qpbinde_to_dcacpu = 0;
623 wqe->fr.pgsz_shift = wr->wr.fast_reg.page_shift - 12; 622 wqe->fr.pgsz_shift = ilog2(wr->mr->page_size) - 12;
624 wqe->fr.addr_type = FW_RI_VA_BASED_TO; 623 wqe->fr.addr_type = FW_RI_VA_BASED_TO;
625 wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->wr.fast_reg.access_flags); 624 wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->access);
626 wqe->fr.len_hi = 0; 625 wqe->fr.len_hi = 0;
627 wqe->fr.len_lo = cpu_to_be32(wr->wr.fast_reg.length); 626 wqe->fr.len_lo = cpu_to_be32(mhp->ibmr.length);
628 wqe->fr.stag = cpu_to_be32(wr->wr.fast_reg.rkey); 627 wqe->fr.stag = cpu_to_be32(wr->key);
629 wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); 628 wqe->fr.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
630 wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start & 629 wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova &
631 0xffffffff); 630 0xffffffff);
632 631
633 if (t5dev && use_dsgl && (pbllen > max_fr_immd)) { 632 if (t5dev && use_dsgl && (pbllen > max_fr_immd)) {
634 struct c4iw_fr_page_list *c4pl =
635 to_c4iw_fr_page_list(wr->wr.fast_reg.page_list);
636 struct fw_ri_dsgl *sglp; 633 struct fw_ri_dsgl *sglp;
637 634
638 for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { 635 for (i = 0; i < mhp->mpl_len; i++)
639 wr->wr.fast_reg.page_list->page_list[i] = (__force u64) 636 mhp->mpl[i] = (__force u64)cpu_to_be64((u64)mhp->mpl[i]);
640 cpu_to_be64((u64)
641 wr->wr.fast_reg.page_list->page_list[i]);
642 }
643 637
644 sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1); 638 sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1);
645 sglp->op = FW_RI_DATA_DSGL; 639 sglp->op = FW_RI_DATA_DSGL;
646 sglp->r1 = 0; 640 sglp->r1 = 0;
647 sglp->nsge = cpu_to_be16(1); 641 sglp->nsge = cpu_to_be16(1);
648 sglp->addr0 = cpu_to_be64(c4pl->dma_addr); 642 sglp->addr0 = cpu_to_be64(mhp->mpl_addr);
649 sglp->len0 = cpu_to_be32(pbllen); 643 sglp->len0 = cpu_to_be32(pbllen);
650 644
651 *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16); 645 *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16);
@@ -657,9 +651,8 @@ static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe,
657 imdp->immdlen = cpu_to_be32(pbllen); 651 imdp->immdlen = cpu_to_be32(pbllen);
658 p = (__be64 *)(imdp + 1); 652 p = (__be64 *)(imdp + 1);
659 rem = pbllen; 653 rem = pbllen;
660 for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { 654 for (i = 0; i < mhp->mpl_len; i++) {
661 *p = cpu_to_be64( 655 *p = cpu_to_be64((u64)mhp->mpl[i]);
662 (u64)wr->wr.fast_reg.page_list->page_list[i]);
663 rem -= sizeof(*p); 656 rem -= sizeof(*p);
664 if (++p == (__be64 *)&sq->queue[sq->size]) 657 if (++p == (__be64 *)&sq->queue[sq->size])
665 p = (__be64 *)sq->queue; 658 p = (__be64 *)sq->queue;
@@ -712,8 +705,7 @@ static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc)
712 spin_lock_irqsave(&qhp->rhp->lock, flags); 705 spin_lock_irqsave(&qhp->rhp->lock, flags);
713 spin_lock(&qhp->lock); 706 spin_lock(&qhp->lock);
714 if (qhp->rhp->db_state == NORMAL) 707 if (qhp->rhp->db_state == NORMAL)
715 t4_ring_sq_db(&qhp->wq, inc, 708 t4_ring_sq_db(&qhp->wq, inc, NULL);
716 is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
717 else { 709 else {
718 add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); 710 add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
719 qhp->wq.sq.wq_pidx_inc += inc; 711 qhp->wq.sq.wq_pidx_inc += inc;
@@ -730,8 +722,7 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
730 spin_lock_irqsave(&qhp->rhp->lock, flags); 722 spin_lock_irqsave(&qhp->rhp->lock, flags);
731 spin_lock(&qhp->lock); 723 spin_lock(&qhp->lock);
732 if (qhp->rhp->db_state == NORMAL) 724 if (qhp->rhp->db_state == NORMAL)
733 t4_ring_rq_db(&qhp->wq, inc, 725 t4_ring_rq_db(&qhp->wq, inc, NULL);
734 is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
735 else { 726 else {
736 add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); 727 add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
737 qhp->wq.rq.wq_pidx_inc += inc; 728 qhp->wq.rq.wq_pidx_inc += inc;
@@ -813,13 +804,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
813 if (!qhp->wq.sq.oldest_read) 804 if (!qhp->wq.sq.oldest_read)
814 qhp->wq.sq.oldest_read = swsqe; 805 qhp->wq.sq.oldest_read = swsqe;
815 break; 806 break;
816 case IB_WR_FAST_REG_MR: 807 case IB_WR_REG_MR:
817 fw_opcode = FW_RI_FR_NSMR_WR; 808 fw_opcode = FW_RI_FR_NSMR_WR;
818 swsqe->opcode = FW_RI_FAST_REGISTER; 809 swsqe->opcode = FW_RI_FAST_REGISTER;
819 err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16, 810 err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), &len16,
820 is_t5( 811 is_t5(
821 qhp->rhp->rdev.lldi.adapter_type) ? 812 qhp->rhp->rdev.lldi.adapter_type) ?
822 1 : 0); 813 1 : 0);
823 break; 814 break;
824 case IB_WR_LOCAL_INV: 815 case IB_WR_LOCAL_INV:
825 if (wr->send_flags & IB_SEND_FENCE) 816 if (wr->send_flags & IB_SEND_FENCE)
@@ -860,8 +851,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
860 idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); 851 idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
861 } 852 }
862 if (!qhp->rhp->rdev.status_page->db_off) { 853 if (!qhp->rhp->rdev.status_page->db_off) {
863 t4_ring_sq_db(&qhp->wq, idx, 854 t4_ring_sq_db(&qhp->wq, idx, wqe);
864 is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
865 spin_unlock_irqrestore(&qhp->lock, flag); 855 spin_unlock_irqrestore(&qhp->lock, flag);
866 } else { 856 } else {
867 spin_unlock_irqrestore(&qhp->lock, flag); 857 spin_unlock_irqrestore(&qhp->lock, flag);
@@ -934,8 +924,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
934 num_wrs--; 924 num_wrs--;
935 } 925 }
936 if (!qhp->rhp->rdev.status_page->db_off) { 926 if (!qhp->rhp->rdev.status_page->db_off) {
937 t4_ring_rq_db(&qhp->wq, idx, 927 t4_ring_rq_db(&qhp->wq, idx, wqe);
938 is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
939 spin_unlock_irqrestore(&qhp->lock, flag); 928 spin_unlock_irqrestore(&qhp->lock, flag);
940 } else { 929 } else {
941 spin_unlock_irqrestore(&qhp->lock, flag); 930 spin_unlock_irqrestore(&qhp->lock, flag);
@@ -1875,7 +1864,7 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1875 attrs.rq_db_inc = attr->rq_psn; 1864 attrs.rq_db_inc = attr->rq_psn;
1876 mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0; 1865 mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
1877 mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0; 1866 mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;
1878 if (is_t5(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) && 1867 if (!is_t4(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) &&
1879 (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB))) 1868 (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB)))
1880 return -EINVAL; 1869 return -EINVAL;
1881 1870
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 274a7ab13bef..1092a2d1f607 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -455,8 +455,7 @@ static inline void pio_copy(u64 __iomem *dst, u64 *src)
455 } 455 }
456} 456}
457 457
458static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5, 458static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe)
459 union t4_wr *wqe)
460{ 459{
461 460
462 /* Flush host queue memory writes. */ 461 /* Flush host queue memory writes. */
@@ -482,7 +481,7 @@ static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5,
482 writel(QID_V(wq->sq.qid) | PIDX_V(inc), wq->db); 481 writel(QID_V(wq->sq.qid) | PIDX_V(inc), wq->db);
483} 482}
484 483
485static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5, 484static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc,
486 union t4_recv_wr *wqe) 485 union t4_recv_wr *wqe)
487{ 486{
488 487
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 1688a17de4fe..86af71351d9a 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -76,7 +76,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
76 struct mlx4_dev *dev = ibdev->dev; 76 struct mlx4_dev *dev = ibdev->dev;
77 int is_mcast = 0; 77 int is_mcast = 0;
78 struct in6_addr in6; 78 struct in6_addr in6;
79 u16 vlan_tag; 79 u16 vlan_tag = 0xffff;
80 union ib_gid sgid;
81 struct ib_gid_attr gid_attr;
82 int ret;
80 83
81 memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6)); 84 memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
82 if (rdma_is_multicast_addr(&in6)) { 85 if (rdma_is_multicast_addr(&in6)) {
@@ -85,7 +88,17 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
85 } else { 88 } else {
86 memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN); 89 memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN);
87 } 90 }
88 vlan_tag = ah_attr->vlan_id; 91 ret = ib_get_cached_gid(pd->device, ah_attr->port_num,
92 ah_attr->grh.sgid_index, &sgid, &gid_attr);
93 if (ret)
94 return ERR_PTR(ret);
95 memset(ah->av.eth.s_mac, 0, ETH_ALEN);
96 if (gid_attr.ndev) {
97 if (is_vlan_dev(gid_attr.ndev))
98 vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
99 memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN);
100 dev_put(gid_attr.ndev);
101 }
89 if (vlan_tag < 0x1000) 102 if (vlan_tag < 0x1000)
90 vlan_tag |= (ah_attr->sl & 7) << 13; 103 vlan_tag |= (ah_attr->sl & 7) << 13;
91 ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); 104 ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 5fd49f9435f9..b88fc8f5ab18 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -818,7 +818,7 @@ repoll:
818 wc->opcode = IB_WC_LSO; 818 wc->opcode = IB_WC_LSO;
819 break; 819 break;
820 case MLX4_OPCODE_FMR: 820 case MLX4_OPCODE_FMR:
821 wc->opcode = IB_WC_FAST_REG_MR; 821 wc->opcode = IB_WC_REG_MR;
822 break; 822 break;
823 case MLX4_OPCODE_LOCAL_INVAL: 823 case MLX4_OPCODE_LOCAL_INVAL:
824 wc->opcode = IB_WC_LOCAL_INV; 824 wc->opcode = IB_WC_LOCAL_INV;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 1cd75ff02251..870e56b6b25f 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -457,7 +457,8 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
457 struct ib_grh *grh, struct ib_mad *mad) 457 struct ib_grh *grh, struct ib_mad *mad)
458{ 458{
459 struct ib_sge list; 459 struct ib_sge list;
460 struct ib_send_wr wr, *bad_wr; 460 struct ib_ud_wr wr;
461 struct ib_send_wr *bad_wr;
461 struct mlx4_ib_demux_pv_ctx *tun_ctx; 462 struct mlx4_ib_demux_pv_ctx *tun_ctx;
462 struct mlx4_ib_demux_pv_qp *tun_qp; 463 struct mlx4_ib_demux_pv_qp *tun_qp;
463 struct mlx4_rcv_tunnel_mad *tun_mad; 464 struct mlx4_rcv_tunnel_mad *tun_mad;
@@ -582,18 +583,18 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
582 list.length = sizeof (struct mlx4_rcv_tunnel_mad); 583 list.length = sizeof (struct mlx4_rcv_tunnel_mad);
583 list.lkey = tun_ctx->pd->local_dma_lkey; 584 list.lkey = tun_ctx->pd->local_dma_lkey;
584 585
585 wr.wr.ud.ah = ah; 586 wr.ah = ah;
586 wr.wr.ud.port_num = port; 587 wr.port_num = port;
587 wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; 588 wr.remote_qkey = IB_QP_SET_QKEY;
588 wr.wr.ud.remote_qpn = dqpn; 589 wr.remote_qpn = dqpn;
589 wr.next = NULL; 590 wr.wr.next = NULL;
590 wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt); 591 wr.wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
591 wr.sg_list = &list; 592 wr.wr.sg_list = &list;
592 wr.num_sge = 1; 593 wr.wr.num_sge = 1;
593 wr.opcode = IB_WR_SEND; 594 wr.wr.opcode = IB_WR_SEND;
594 wr.send_flags = IB_SEND_SIGNALED; 595 wr.wr.send_flags = IB_SEND_SIGNALED;
595 596
596 ret = ib_post_send(src_qp, &wr, &bad_wr); 597 ret = ib_post_send(src_qp, &wr.wr, &bad_wr);
597out: 598out:
598 if (ret) 599 if (ret)
599 ib_destroy_ah(ah); 600 ib_destroy_ah(ah);
@@ -824,18 +825,29 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
824{ 825{
825 struct mlx4_counter counter_stats; 826 struct mlx4_counter counter_stats;
826 struct mlx4_ib_dev *dev = to_mdev(ibdev); 827 struct mlx4_ib_dev *dev = to_mdev(ibdev);
827 int err; 828 struct counter_index *tmp_counter;
829 int err = IB_MAD_RESULT_FAILURE, stats_avail = 0;
828 830
829 if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) 831 if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
830 return -EINVAL; 832 return -EINVAL;
831 833
832 memset(&counter_stats, 0, sizeof(counter_stats)); 834 memset(&counter_stats, 0, sizeof(counter_stats));
833 err = mlx4_get_counter_stats(dev->dev, 835 mutex_lock(&dev->counters_table[port_num - 1].mutex);
834 dev->counters[port_num - 1].index, 836 list_for_each_entry(tmp_counter,
835 &counter_stats, 0); 837 &dev->counters_table[port_num - 1].counters_list,
836 if (err) 838 list) {
837 err = IB_MAD_RESULT_FAILURE; 839 err = mlx4_get_counter_stats(dev->dev,
838 else { 840 tmp_counter->index,
841 &counter_stats, 0);
842 if (err) {
843 err = IB_MAD_RESULT_FAILURE;
844 stats_avail = 0;
845 break;
846 }
847 stats_avail = 1;
848 }
849 mutex_unlock(&dev->counters_table[port_num - 1].mutex);
850 if (stats_avail) {
839 memset(out_mad->data, 0, sizeof out_mad->data); 851 memset(out_mad->data, 0, sizeof out_mad->data);
840 switch (counter_stats.counter_mode & 0xf) { 852 switch (counter_stats.counter_mode & 0xf) {
841 case 0: 853 case 0:
@@ -1172,10 +1184,11 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
1172int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, 1184int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
1173 enum ib_qp_type dest_qpt, u16 pkey_index, 1185 enum ib_qp_type dest_qpt, u16 pkey_index,
1174 u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr, 1186 u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr,
1175 u8 *s_mac, struct ib_mad *mad) 1187 u8 *s_mac, u16 vlan_id, struct ib_mad *mad)
1176{ 1188{
1177 struct ib_sge list; 1189 struct ib_sge list;
1178 struct ib_send_wr wr, *bad_wr; 1190 struct ib_ud_wr wr;
1191 struct ib_send_wr *bad_wr;
1179 struct mlx4_ib_demux_pv_ctx *sqp_ctx; 1192 struct mlx4_ib_demux_pv_ctx *sqp_ctx;
1180 struct mlx4_ib_demux_pv_qp *sqp; 1193 struct mlx4_ib_demux_pv_qp *sqp;
1181 struct mlx4_mad_snd_buf *sqp_mad; 1194 struct mlx4_mad_snd_buf *sqp_mad;
@@ -1246,22 +1259,25 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
1246 list.length = sizeof (struct mlx4_mad_snd_buf); 1259 list.length = sizeof (struct mlx4_mad_snd_buf);
1247 list.lkey = sqp_ctx->pd->local_dma_lkey; 1260 list.lkey = sqp_ctx->pd->local_dma_lkey;
1248 1261
1249 wr.wr.ud.ah = ah; 1262 wr.ah = ah;
1250 wr.wr.ud.port_num = port; 1263 wr.port_num = port;
1251 wr.wr.ud.pkey_index = wire_pkey_ix; 1264 wr.pkey_index = wire_pkey_ix;
1252 wr.wr.ud.remote_qkey = qkey; 1265 wr.remote_qkey = qkey;
1253 wr.wr.ud.remote_qpn = remote_qpn; 1266 wr.remote_qpn = remote_qpn;
1254 wr.next = NULL; 1267 wr.wr.next = NULL;
1255 wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum); 1268 wr.wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
1256 wr.sg_list = &list; 1269 wr.wr.sg_list = &list;
1257 wr.num_sge = 1; 1270 wr.wr.num_sge = 1;
1258 wr.opcode = IB_WR_SEND; 1271 wr.wr.opcode = IB_WR_SEND;
1259 wr.send_flags = IB_SEND_SIGNALED; 1272 wr.wr.send_flags = IB_SEND_SIGNALED;
1260 if (s_mac) 1273 if (s_mac)
1261 memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6); 1274 memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
1275 if (vlan_id < 0x1000)
1276 vlan_id |= (attr->sl & 7) << 13;
1277 to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id);
1262 1278
1263 1279
1264 ret = ib_post_send(send_qp, &wr, &bad_wr); 1280 ret = ib_post_send(send_qp, &wr.wr, &bad_wr);
1265out: 1281out:
1266 if (ret) 1282 if (ret)
1267 ib_destroy_ah(ah); 1283 ib_destroy_ah(ah);
@@ -1295,6 +1311,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
1295 u8 *slave_id; 1311 u8 *slave_id;
1296 int slave; 1312 int slave;
1297 int port; 1313 int port;
1314 u16 vlan_id;
1298 1315
1299 /* Get slave that sent this packet */ 1316 /* Get slave that sent this packet */
1300 if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn || 1317 if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
@@ -1383,10 +1400,10 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
1383 fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr); 1400 fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr);
1384 1401
1385 memcpy(ah_attr.dmac, tunnel->hdr.mac, 6); 1402 memcpy(ah_attr.dmac, tunnel->hdr.mac, 6);
1386 ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan); 1403 vlan_id = be16_to_cpu(tunnel->hdr.vlan);
1387 /* if slave have default vlan use it */ 1404 /* if slave have default vlan use it */
1388 mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave, 1405 mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
1389 &ah_attr.vlan_id, &ah_attr.sl); 1406 &vlan_id, &ah_attr.sl);
1390 1407
1391 mlx4_ib_send_to_wire(dev, slave, ctx->port, 1408 mlx4_ib_send_to_wire(dev, slave, ctx->port,
1392 is_proxy_qp0(dev, wc->src_qp, slave) ? 1409 is_proxy_qp0(dev, wc->src_qp, slave) ?
@@ -1394,7 +1411,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
1394 be16_to_cpu(tunnel->hdr.pkey_index), 1411 be16_to_cpu(tunnel->hdr.pkey_index),
1395 be32_to_cpu(tunnel->hdr.remote_qpn), 1412 be32_to_cpu(tunnel->hdr.remote_qpn),
1396 be32_to_cpu(tunnel->hdr.qkey), 1413 be32_to_cpu(tunnel->hdr.qkey),
1397 &ah_attr, wc->smac, &tunnel->mad); 1414 &ah_attr, wc->smac, vlan_id, &tunnel->mad);
1398} 1415}
1399 1416
1400static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, 1417static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index efecdf0216d8..f567160a4a56 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -335,7 +335,7 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
335 if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num)) 335 if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
336 return index; 336 return index;
337 337
338 ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid); 338 ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, NULL);
339 if (ret) 339 if (ret)
340 return ret; 340 return ret;
341 341
@@ -442,6 +442,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
442 props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; 442 props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
443 } 443 }
444 444
445 props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
446
445 props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 447 props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
446 0xffffff; 448 0xffffff;
447 props->vendor_part_id = dev->dev->persist->pdev->device; 449 props->vendor_part_id = dev->dev->persist->pdev->device;
@@ -754,7 +756,7 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
754 if (!rdma_cap_roce_gid_table(ibdev, port)) 756 if (!rdma_cap_roce_gid_table(ibdev, port))
755 return -ENODEV; 757 return -ENODEV;
756 758
757 ret = ib_get_cached_gid(ibdev, port, index, gid); 759 ret = ib_get_cached_gid(ibdev, port, index, gid, NULL);
758 if (ret == -EAGAIN) { 760 if (ret == -EAGAIN) {
759 memcpy(gid, &zgid, sizeof(*gid)); 761 memcpy(gid, &zgid, sizeof(*gid));
760 return 0; 762 return 0;
@@ -1247,6 +1249,22 @@ static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
1247 return 0; 1249 return 0;
1248} 1250}
1249 1251
1252static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev,
1253 struct mlx4_ib_counters *ctr_table)
1254{
1255 struct counter_index *counter, *tmp_count;
1256
1257 mutex_lock(&ctr_table->mutex);
1258 list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list,
1259 list) {
1260 if (counter->allocated)
1261 mlx4_counter_free(ibdev->dev, counter->index);
1262 list_del(&counter->list);
1263 kfree(counter);
1264 }
1265 mutex_unlock(&ctr_table->mutex);
1266}
1267
1250int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, 1268int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
1251 union ib_gid *gid) 1269 union ib_gid *gid)
1252{ 1270{
@@ -2131,6 +2149,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
2131 int num_req_counters; 2149 int num_req_counters;
2132 int allocated; 2150 int allocated;
2133 u32 counter_index; 2151 u32 counter_index;
2152 struct counter_index *new_counter_index = NULL;
2134 2153
2135 pr_info_once("%s", mlx4_ib_version); 2154 pr_info_once("%s", mlx4_ib_version);
2136 2155
@@ -2247,8 +2266,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
2247 ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr; 2266 ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr;
2248 ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; 2267 ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
2249 ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr; 2268 ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr;
2250 ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; 2269 ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg;
2251 ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list;
2252 ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; 2270 ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
2253 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; 2271 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
2254 ibdev->ib_dev.process_mad = mlx4_ib_process_mad; 2272 ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
@@ -2293,7 +2311,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
2293 2311
2294 ibdev->ib_dev.uverbs_ex_cmd_mask |= 2312 ibdev->ib_dev.uverbs_ex_cmd_mask |=
2295 (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | 2313 (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
2296 (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ); 2314 (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
2315 (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
2297 2316
2298 mlx4_ib_alloc_eqs(dev, ibdev); 2317 mlx4_ib_alloc_eqs(dev, ibdev);
2299 2318
@@ -2302,6 +2321,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
2302 if (init_node_data(ibdev)) 2321 if (init_node_data(ibdev))
2303 goto err_map; 2322 goto err_map;
2304 2323
2324 for (i = 0; i < ibdev->num_ports; ++i) {
2325 mutex_init(&ibdev->counters_table[i].mutex);
2326 INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
2327 }
2328
2305 num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports; 2329 num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
2306 for (i = 0; i < num_req_counters; ++i) { 2330 for (i = 0; i < num_req_counters; ++i) {
2307 mutex_init(&ibdev->qp1_proxy_lock[i]); 2331 mutex_init(&ibdev->qp1_proxy_lock[i]);
@@ -2320,15 +2344,34 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
2320 counter_index = mlx4_get_default_counter_index(dev, 2344 counter_index = mlx4_get_default_counter_index(dev,
2321 i + 1); 2345 i + 1);
2322 } 2346 }
2323 ibdev->counters[i].index = counter_index; 2347 new_counter_index = kmalloc(sizeof(*new_counter_index),
2324 ibdev->counters[i].allocated = allocated; 2348 GFP_KERNEL);
2349 if (!new_counter_index) {
2350 if (allocated)
2351 mlx4_counter_free(ibdev->dev, counter_index);
2352 goto err_counter;
2353 }
2354 new_counter_index->index = counter_index;
2355 new_counter_index->allocated = allocated;
2356 list_add_tail(&new_counter_index->list,
2357 &ibdev->counters_table[i].counters_list);
2358 ibdev->counters_table[i].default_counter = counter_index;
2325 pr_info("counter index %d for port %d allocated %d\n", 2359 pr_info("counter index %d for port %d allocated %d\n",
2326 counter_index, i + 1, allocated); 2360 counter_index, i + 1, allocated);
2327 } 2361 }
2328 if (mlx4_is_bonded(dev)) 2362 if (mlx4_is_bonded(dev))
2329 for (i = 1; i < ibdev->num_ports ; ++i) { 2363 for (i = 1; i < ibdev->num_ports ; ++i) {
2330 ibdev->counters[i].index = ibdev->counters[0].index; 2364 new_counter_index =
2331 ibdev->counters[i].allocated = 0; 2365 kmalloc(sizeof(struct counter_index),
2366 GFP_KERNEL);
2367 if (!new_counter_index)
2368 goto err_counter;
2369 new_counter_index->index = counter_index;
2370 new_counter_index->allocated = 0;
2371 list_add_tail(&new_counter_index->list,
2372 &ibdev->counters_table[i].counters_list);
2373 ibdev->counters_table[i].default_counter =
2374 counter_index;
2332 } 2375 }
2333 2376
2334 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) 2377 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
@@ -2437,12 +2480,9 @@ err_steer_qp_release:
2437 mlx4_qp_release_range(dev, ibdev->steer_qpn_base, 2480 mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
2438 ibdev->steer_qpn_count); 2481 ibdev->steer_qpn_count);
2439err_counter: 2482err_counter:
2440 for (i = 0; i < ibdev->num_ports; ++i) { 2483 for (i = 0; i < ibdev->num_ports; ++i)
2441 if (ibdev->counters[i].index != -1 && 2484 mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
2442 ibdev->counters[i].allocated) 2485
2443 mlx4_counter_free(ibdev->dev,
2444 ibdev->counters[i].index);
2445 }
2446err_map: 2486err_map:
2447 iounmap(ibdev->uar_map); 2487 iounmap(ibdev->uar_map);
2448 2488
@@ -2546,9 +2586,8 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
2546 2586
2547 iounmap(ibdev->uar_map); 2587 iounmap(ibdev->uar_map);
2548 for (p = 0; p < ibdev->num_ports; ++p) 2588 for (p = 0; p < ibdev->num_ports; ++p)
2549 if (ibdev->counters[p].index != -1 && 2589 mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]);
2550 ibdev->counters[p].allocated) 2590
2551 mlx4_counter_free(ibdev->dev, ibdev->counters[p].index);
2552 mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) 2591 mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
2553 mlx4_CLOSE_PORT(dev, p); 2592 mlx4_CLOSE_PORT(dev, p);
2554 2593
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 2d5bccd71fc6..99451d887266 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -222,7 +222,7 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
222 spin_unlock_irqrestore(&dev->sm_lock, flags); 222 spin_unlock_irqrestore(&dev->sm_lock, flags);
223 return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), 223 return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev),
224 ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY, 224 ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY,
225 &ah_attr, NULL, mad); 225 &ah_attr, NULL, 0xffff, mad);
226} 226}
227 227
228static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx, 228static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 1e7b23bb2eb0..1caa11edac03 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -129,10 +129,17 @@ struct mlx4_ib_cq {
129 struct list_head recv_qp_list; 129 struct list_head recv_qp_list;
130}; 130};
131 131
132#define MLX4_MR_PAGES_ALIGN 0x40
133
132struct mlx4_ib_mr { 134struct mlx4_ib_mr {
133 struct ib_mr ibmr; 135 struct ib_mr ibmr;
136 __be64 *pages;
137 dma_addr_t page_map;
138 u32 npages;
139 u32 max_pages;
134 struct mlx4_mr mmr; 140 struct mlx4_mr mmr;
135 struct ib_umem *umem; 141 struct ib_umem *umem;
142 void *pages_alloc;
136}; 143};
137 144
138struct mlx4_ib_mw { 145struct mlx4_ib_mw {
@@ -140,12 +147,6 @@ struct mlx4_ib_mw {
140 struct mlx4_mw mmw; 147 struct mlx4_mw mmw;
141}; 148};
142 149
143struct mlx4_ib_fast_reg_page_list {
144 struct ib_fast_reg_page_list ibfrpl;
145 __be64 *mapped_page_list;
146 dma_addr_t map;
147};
148
149struct mlx4_ib_fmr { 150struct mlx4_ib_fmr {
150 struct ib_fmr ibfmr; 151 struct ib_fmr ibfmr;
151 struct mlx4_fmr mfmr; 152 struct mlx4_fmr mfmr;
@@ -320,6 +321,7 @@ struct mlx4_ib_qp {
320 struct list_head qps_list; 321 struct list_head qps_list;
321 struct list_head cq_recv_list; 322 struct list_head cq_recv_list;
322 struct list_head cq_send_list; 323 struct list_head cq_send_list;
324 struct counter_index *counter_index;
323}; 325};
324 326
325struct mlx4_ib_srq { 327struct mlx4_ib_srq {
@@ -528,10 +530,17 @@ struct mlx4_ib_iov_port {
528}; 530};
529 531
530struct counter_index { 532struct counter_index {
533 struct list_head list;
531 u32 index; 534 u32 index;
532 u8 allocated; 535 u8 allocated;
533}; 536};
534 537
538struct mlx4_ib_counters {
539 struct list_head counters_list;
540 struct mutex mutex; /* mutex for accessing counters list */
541 u32 default_counter;
542};
543
535struct mlx4_ib_dev { 544struct mlx4_ib_dev {
536 struct ib_device ib_dev; 545 struct ib_device ib_dev;
537 struct mlx4_dev *dev; 546 struct mlx4_dev *dev;
@@ -550,7 +559,7 @@ struct mlx4_ib_dev {
550 struct mutex cap_mask_mutex; 559 struct mutex cap_mask_mutex;
551 bool ib_active; 560 bool ib_active;
552 struct mlx4_ib_iboe iboe; 561 struct mlx4_ib_iboe iboe;
553 struct counter_index counters[MLX4_MAX_PORTS]; 562 struct mlx4_ib_counters counters_table[MLX4_MAX_PORTS];
554 int *eq_table; 563 int *eq_table;
555 struct kobject *iov_parent; 564 struct kobject *iov_parent;
556 struct kobject *ports_parent; 565 struct kobject *ports_parent;
@@ -638,11 +647,6 @@ static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw)
638 return container_of(ibmw, struct mlx4_ib_mw, ibmw); 647 return container_of(ibmw, struct mlx4_ib_mw, ibmw);
639} 648}
640 649
641static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
642{
643 return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl);
644}
645
646static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) 650static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
647{ 651{
648 return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); 652 return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
@@ -706,10 +710,9 @@ int mlx4_ib_dealloc_mw(struct ib_mw *mw);
706struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, 710struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
707 enum ib_mr_type mr_type, 711 enum ib_mr_type mr_type,
708 u32 max_num_sg); 712 u32 max_num_sg);
709struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, 713int mlx4_ib_map_mr_sg(struct ib_mr *ibmr,
710 int page_list_len); 714 struct scatterlist *sg,
711void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); 715 int sg_nents);
712
713int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); 716int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
714int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); 717int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
715struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, 718struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
@@ -813,7 +816,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
813int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, 816int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
814 enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, 817 enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
815 u32 qkey, struct ib_ah_attr *attr, u8 *s_mac, 818 u32 qkey, struct ib_ah_attr *attr, u8 *s_mac,
816 struct ib_mad *mad); 819 u16 vlan_id, struct ib_mad *mad);
817 820
818__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx); 821__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
819 822
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 2542fd3c1a49..4d1e1c632603 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -59,7 +59,7 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
59 struct mlx4_ib_mr *mr; 59 struct mlx4_ib_mr *mr;
60 int err; 60 int err;
61 61
62 mr = kmalloc(sizeof *mr, GFP_KERNEL); 62 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
63 if (!mr) 63 if (!mr)
64 return ERR_PTR(-ENOMEM); 64 return ERR_PTR(-ENOMEM);
65 65
@@ -140,7 +140,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
140 int err; 140 int err;
141 int n; 141 int n;
142 142
143 mr = kmalloc(sizeof *mr, GFP_KERNEL); 143 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
144 if (!mr) 144 if (!mr)
145 return ERR_PTR(-ENOMEM); 145 return ERR_PTR(-ENOMEM);
146 146
@@ -271,11 +271,59 @@ release_mpt_entry:
271 return err; 271 return err;
272} 272}
273 273
274static int
275mlx4_alloc_priv_pages(struct ib_device *device,
276 struct mlx4_ib_mr *mr,
277 int max_pages)
278{
279 int size = max_pages * sizeof(u64);
280 int add_size;
281 int ret;
282
283 add_size = max_t(int, MLX4_MR_PAGES_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
284
285 mr->pages_alloc = kzalloc(size + add_size, GFP_KERNEL);
286 if (!mr->pages_alloc)
287 return -ENOMEM;
288
289 mr->pages = PTR_ALIGN(mr->pages_alloc, MLX4_MR_PAGES_ALIGN);
290
291 mr->page_map = dma_map_single(device->dma_device, mr->pages,
292 size, DMA_TO_DEVICE);
293
294 if (dma_mapping_error(device->dma_device, mr->page_map)) {
295 ret = -ENOMEM;
296 goto err;
297 }
298
299 return 0;
300err:
301 kfree(mr->pages_alloc);
302
303 return ret;
304}
305
306static void
307mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
308{
309 if (mr->pages) {
310 struct ib_device *device = mr->ibmr.device;
311 int size = mr->max_pages * sizeof(u64);
312
313 dma_unmap_single(device->dma_device, mr->page_map,
314 size, DMA_TO_DEVICE);
315 kfree(mr->pages_alloc);
316 mr->pages = NULL;
317 }
318}
319
274int mlx4_ib_dereg_mr(struct ib_mr *ibmr) 320int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
275{ 321{
276 struct mlx4_ib_mr *mr = to_mmr(ibmr); 322 struct mlx4_ib_mr *mr = to_mmr(ibmr);
277 int ret; 323 int ret;
278 324
325 mlx4_free_priv_pages(mr);
326
279 ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); 327 ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
280 if (ret) 328 if (ret)
281 return ret; 329 return ret;
@@ -321,21 +369,21 @@ err_free:
321int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, 369int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
322 struct ib_mw_bind *mw_bind) 370 struct ib_mw_bind *mw_bind)
323{ 371{
324 struct ib_send_wr wr; 372 struct ib_bind_mw_wr wr;
325 struct ib_send_wr *bad_wr; 373 struct ib_send_wr *bad_wr;
326 int ret; 374 int ret;
327 375
328 memset(&wr, 0, sizeof(wr)); 376 memset(&wr, 0, sizeof(wr));
329 wr.opcode = IB_WR_BIND_MW; 377 wr.wr.opcode = IB_WR_BIND_MW;
330 wr.wr_id = mw_bind->wr_id; 378 wr.wr.wr_id = mw_bind->wr_id;
331 wr.send_flags = mw_bind->send_flags; 379 wr.wr.send_flags = mw_bind->send_flags;
332 wr.wr.bind_mw.mw = mw; 380 wr.mw = mw;
333 wr.wr.bind_mw.bind_info = mw_bind->bind_info; 381 wr.bind_info = mw_bind->bind_info;
334 wr.wr.bind_mw.rkey = ib_inc_rkey(mw->rkey); 382 wr.rkey = ib_inc_rkey(mw->rkey);
335 383
336 ret = mlx4_ib_post_send(qp, &wr, &bad_wr); 384 ret = mlx4_ib_post_send(qp, &wr.wr, &bad_wr);
337 if (!ret) 385 if (!ret)
338 mw->rkey = wr.wr.bind_mw.rkey; 386 mw->rkey = wr.rkey;
339 387
340 return ret; 388 return ret;
341} 389}
@@ -362,7 +410,7 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
362 max_num_sg > MLX4_MAX_FAST_REG_PAGES) 410 max_num_sg > MLX4_MAX_FAST_REG_PAGES)
363 return ERR_PTR(-EINVAL); 411 return ERR_PTR(-EINVAL);
364 412
365 mr = kmalloc(sizeof *mr, GFP_KERNEL); 413 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
366 if (!mr) 414 if (!mr)
367 return ERR_PTR(-ENOMEM); 415 return ERR_PTR(-ENOMEM);
368 416
@@ -371,71 +419,30 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
371 if (err) 419 if (err)
372 goto err_free; 420 goto err_free;
373 421
422 err = mlx4_alloc_priv_pages(pd->device, mr, max_num_sg);
423 if (err)
424 goto err_free_mr;
425
426 mr->max_pages = max_num_sg;
427
374 err = mlx4_mr_enable(dev->dev, &mr->mmr); 428 err = mlx4_mr_enable(dev->dev, &mr->mmr);
375 if (err) 429 if (err)
376 goto err_mr; 430 goto err_free_pl;
377 431
378 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; 432 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
379 mr->umem = NULL; 433 mr->umem = NULL;
380 434
381 return &mr->ibmr; 435 return &mr->ibmr;
382 436
383err_mr: 437err_free_pl:
438 mlx4_free_priv_pages(mr);
439err_free_mr:
384 (void) mlx4_mr_free(dev->dev, &mr->mmr); 440 (void) mlx4_mr_free(dev->dev, &mr->mmr);
385
386err_free: 441err_free:
387 kfree(mr); 442 kfree(mr);
388 return ERR_PTR(err); 443 return ERR_PTR(err);
389} 444}
390 445
391struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
392 int page_list_len)
393{
394 struct mlx4_ib_dev *dev = to_mdev(ibdev);
395 struct mlx4_ib_fast_reg_page_list *mfrpl;
396 int size = page_list_len * sizeof (u64);
397
398 if (page_list_len > MLX4_MAX_FAST_REG_PAGES)
399 return ERR_PTR(-EINVAL);
400
401 mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
402 if (!mfrpl)
403 return ERR_PTR(-ENOMEM);
404
405 mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
406 if (!mfrpl->ibfrpl.page_list)
407 goto err_free;
408
409 mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->persist->
410 pdev->dev,
411 size, &mfrpl->map,
412 GFP_KERNEL);
413 if (!mfrpl->mapped_page_list)
414 goto err_free;
415
416 WARN_ON(mfrpl->map & 0x3f);
417
418 return &mfrpl->ibfrpl;
419
420err_free:
421 kfree(mfrpl->ibfrpl.page_list);
422 kfree(mfrpl);
423 return ERR_PTR(-ENOMEM);
424}
425
426void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
427{
428 struct mlx4_ib_dev *dev = to_mdev(page_list->device);
429 struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
430 int size = page_list->max_page_list_len * sizeof (u64);
431
432 dma_free_coherent(&dev->dev->persist->pdev->dev, size,
433 mfrpl->mapped_page_list,
434 mfrpl->map);
435 kfree(mfrpl->ibfrpl.page_list);
436 kfree(mfrpl);
437}
438
439struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, 446struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
440 struct ib_fmr_attr *fmr_attr) 447 struct ib_fmr_attr *fmr_attr)
441{ 448{
@@ -528,3 +535,37 @@ int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
528 535
529 return err; 536 return err;
530} 537}
538
539static int mlx4_set_page(struct ib_mr *ibmr, u64 addr)
540{
541 struct mlx4_ib_mr *mr = to_mmr(ibmr);
542
543 if (unlikely(mr->npages == mr->max_pages))
544 return -ENOMEM;
545
546 mr->pages[mr->npages++] = cpu_to_be64(addr | MLX4_MTT_FLAG_PRESENT);
547
548 return 0;
549}
550
551int mlx4_ib_map_mr_sg(struct ib_mr *ibmr,
552 struct scatterlist *sg,
553 int sg_nents)
554{
555 struct mlx4_ib_mr *mr = to_mmr(ibmr);
556 int rc;
557
558 mr->npages = 0;
559
560 ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map,
561 sizeof(u64) * mr->max_pages,
562 DMA_TO_DEVICE);
563
564 rc = ib_sg_to_pages(ibmr, sg, sg_nents, mlx4_set_page);
565
566 ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
567 sizeof(u64) * mr->max_pages,
568 DMA_TO_DEVICE);
569
570 return rc;
571}
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 4ad9be3ad61c..a2e4ca56da44 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -111,7 +111,7 @@ static const __be32 mlx4_ib_opcode[] = {
111 [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), 111 [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
112 [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), 112 [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
113 [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), 113 [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
114 [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), 114 [IB_WR_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
115 [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), 115 [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
116 [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), 116 [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
117 [IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW), 117 [IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW),
@@ -617,6 +617,18 @@ static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn)
617 return 0; 617 return 0;
618} 618}
619 619
620static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev,
621 struct mlx4_ib_qp *qp)
622{
623 mutex_lock(&dev->counters_table[qp->port - 1].mutex);
624 mlx4_counter_free(dev->dev, qp->counter_index->index);
625 list_del(&qp->counter_index->list);
626 mutex_unlock(&dev->counters_table[qp->port - 1].mutex);
627
628 kfree(qp->counter_index);
629 qp->counter_index = NULL;
630}
631
620static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, 632static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
621 struct ib_qp_init_attr *init_attr, 633 struct ib_qp_init_attr *init_attr,
622 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp, 634 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
@@ -746,9 +758,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
746 } else { 758 } else {
747 qp->sq_no_prefetch = 0; 759 qp->sq_no_prefetch = 0;
748 760
749 if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
750 qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
751
752 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) 761 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
753 qp->flags |= MLX4_IB_QP_LSO; 762 qp->flags |= MLX4_IB_QP_LSO;
754 763
@@ -822,6 +831,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
822 goto err_proxy; 831 goto err_proxy;
823 } 832 }
824 833
834 if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
835 qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
836
825 err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp); 837 err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
826 if (err) 838 if (err)
827 goto err_qpn; 839 goto err_qpn;
@@ -1086,6 +1098,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
1086{ 1098{
1087 struct mlx4_ib_qp *qp = NULL; 1099 struct mlx4_ib_qp *qp = NULL;
1088 int err; 1100 int err;
1101 int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
1089 u16 xrcdn = 0; 1102 u16 xrcdn = 0;
1090 gfp_t gfp; 1103 gfp_t gfp;
1091 1104
@@ -1109,8 +1122,10 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
1109 } 1122 }
1110 1123
1111 if (init_attr->create_flags && 1124 if (init_attr->create_flags &&
1112 (udata || 1125 ((udata && init_attr->create_flags & ~(sup_u_create_flags)) ||
1113 ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) && 1126 ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
1127 MLX4_IB_QP_CREATE_USE_GFP_NOIO |
1128 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)) &&
1114 init_attr->qp_type != IB_QPT_UD) || 1129 init_attr->qp_type != IB_QPT_UD) ||
1115 ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) && 1130 ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
1116 init_attr->qp_type > IB_QPT_GSI))) 1131 init_attr->qp_type > IB_QPT_GSI)))
@@ -1189,6 +1204,9 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
1189 mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]); 1204 mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]);
1190 } 1205 }
1191 1206
1207 if (mqp->counter_index)
1208 mlx4_ib_free_qp_counter(dev, mqp);
1209
1192 pd = get_pd(mqp); 1210 pd = get_pd(mqp);
1193 destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); 1211 destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
1194 1212
@@ -1391,11 +1409,12 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
1391static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp, 1409static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp,
1392 enum ib_qp_attr_mask qp_attr_mask, 1410 enum ib_qp_attr_mask qp_attr_mask,
1393 struct mlx4_ib_qp *mqp, 1411 struct mlx4_ib_qp *mqp,
1394 struct mlx4_qp_path *path, u8 port) 1412 struct mlx4_qp_path *path, u8 port,
1413 u16 vlan_id, u8 *smac)
1395{ 1414{
1396 return _mlx4_set_path(dev, &qp->ah_attr, 1415 return _mlx4_set_path(dev, &qp->ah_attr,
1397 mlx4_mac_to_u64((u8 *)qp->smac), 1416 mlx4_mac_to_u64(smac),
1398 (qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff, 1417 vlan_id,
1399 path, &mqp->pri, port); 1418 path, &mqp->pri, port);
1400} 1419}
1401 1420
@@ -1406,9 +1425,8 @@ static int mlx4_set_alt_path(struct mlx4_ib_dev *dev,
1406 struct mlx4_qp_path *path, u8 port) 1425 struct mlx4_qp_path *path, u8 port)
1407{ 1426{
1408 return _mlx4_set_path(dev, &qp->alt_ah_attr, 1427 return _mlx4_set_path(dev, &qp->alt_ah_attr,
1409 mlx4_mac_to_u64((u8 *)qp->alt_smac), 1428 0,
1410 (qp_attr_mask & IB_QP_ALT_VID) ? 1429 0xffff,
1411 qp->alt_vlan_id : 0xffff,
1412 path, &mqp->alt, port); 1430 path, &mqp->alt, port);
1413} 1431}
1414 1432
@@ -1424,7 +1442,8 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
1424 } 1442 }
1425} 1443}
1426 1444
1427static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac, 1445static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev,
1446 struct mlx4_ib_qp *qp,
1428 struct mlx4_qp_context *context) 1447 struct mlx4_qp_context *context)
1429{ 1448{
1430 u64 u64_mac; 1449 u64 u64_mac;
@@ -1447,6 +1466,40 @@ static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *
1447 return 0; 1466 return 0;
1448} 1467}
1449 1468
1469static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
1470{
1471 struct counter_index *new_counter_index;
1472 int err;
1473 u32 tmp_idx;
1474
1475 if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) !=
1476 IB_LINK_LAYER_ETHERNET ||
1477 !(qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) ||
1478 !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK))
1479 return 0;
1480
1481 err = mlx4_counter_alloc(dev->dev, &tmp_idx);
1482 if (err)
1483 return err;
1484
1485 new_counter_index = kmalloc(sizeof(*new_counter_index), GFP_KERNEL);
1486 if (!new_counter_index) {
1487 mlx4_counter_free(dev->dev, tmp_idx);
1488 return -ENOMEM;
1489 }
1490
1491 new_counter_index->index = tmp_idx;
1492 new_counter_index->allocated = 1;
1493 qp->counter_index = new_counter_index;
1494
1495 mutex_lock(&dev->counters_table[qp->port - 1].mutex);
1496 list_add_tail(&new_counter_index->list,
1497 &dev->counters_table[qp->port - 1].counters_list);
1498 mutex_unlock(&dev->counters_table[qp->port - 1].mutex);
1499
1500 return 0;
1501}
1502
1450static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, 1503static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1451 const struct ib_qp_attr *attr, int attr_mask, 1504 const struct ib_qp_attr *attr, int attr_mask,
1452 enum ib_qp_state cur_state, enum ib_qp_state new_state) 1505 enum ib_qp_state cur_state, enum ib_qp_state new_state)
@@ -1460,6 +1513,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1460 int sqd_event; 1513 int sqd_event;
1461 int steer_qp = 0; 1514 int steer_qp = 0;
1462 int err = -EINVAL; 1515 int err = -EINVAL;
1516 int counter_index;
1463 1517
1464 /* APM is not supported under RoCE */ 1518 /* APM is not supported under RoCE */
1465 if (attr_mask & IB_QP_ALT_PATH && 1519 if (attr_mask & IB_QP_ALT_PATH &&
@@ -1519,6 +1573,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1519 context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; 1573 context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
1520 context->sq_size_stride |= qp->sq.wqe_shift - 4; 1574 context->sq_size_stride |= qp->sq.wqe_shift - 4;
1521 1575
1576 if (new_state == IB_QPS_RESET && qp->counter_index)
1577 mlx4_ib_free_qp_counter(dev, qp);
1578
1522 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { 1579 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
1523 context->sq_size_stride |= !!qp->sq_no_prefetch << 7; 1580 context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
1524 context->xrcd = cpu_to_be32((u32) qp->xrcdn); 1581 context->xrcd = cpu_to_be32((u32) qp->xrcdn);
@@ -1543,10 +1600,24 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1543 } 1600 }
1544 1601
1545 if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { 1602 if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
1546 if (dev->counters[qp->port - 1].index != -1) { 1603 err = create_qp_lb_counter(dev, qp);
1547 context->pri_path.counter_index = 1604 if (err)
1548 dev->counters[qp->port - 1].index; 1605 goto out;
1606
1607 counter_index =
1608 dev->counters_table[qp->port - 1].default_counter;
1609 if (qp->counter_index)
1610 counter_index = qp->counter_index->index;
1611
1612 if (counter_index != -1) {
1613 context->pri_path.counter_index = counter_index;
1549 optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX; 1614 optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
1615 if (qp->counter_index) {
1616 context->pri_path.fl |=
1617 MLX4_FL_ETH_SRC_CHECK_MC_LB;
1618 context->pri_path.vlan_control |=
1619 MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER;
1620 }
1550 } else 1621 } else
1551 context->pri_path.counter_index = 1622 context->pri_path.counter_index =
1552 MLX4_SINK_COUNTER_INDEX(dev->dev); 1623 MLX4_SINK_COUNTER_INDEX(dev->dev);
@@ -1565,9 +1636,33 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1565 } 1636 }
1566 1637
1567 if (attr_mask & IB_QP_AV) { 1638 if (attr_mask & IB_QP_AV) {
1639 u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 :
1640 attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
1641 union ib_gid gid;
1642 struct ib_gid_attr gid_attr;
1643 u16 vlan = 0xffff;
1644 u8 smac[ETH_ALEN];
1645 int status = 0;
1646
1647 if (rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
1648 attr->ah_attr.ah_flags & IB_AH_GRH) {
1649 int index = attr->ah_attr.grh.sgid_index;
1650
1651 status = ib_get_cached_gid(ibqp->device, port_num,
1652 index, &gid, &gid_attr);
1653 if (!status && !memcmp(&gid, &zgid, sizeof(gid)))
1654 status = -ENOENT;
1655 if (!status && gid_attr.ndev) {
1656 vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev);
1657 memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN);
1658 dev_put(gid_attr.ndev);
1659 }
1660 }
1661 if (status)
1662 goto out;
1663
1568 if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path, 1664 if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path,
1569 attr_mask & IB_QP_PORT ? 1665 port_num, vlan, smac))
1570 attr->port_num : qp->port))
1571 goto out; 1666 goto out;
1572 1667
1573 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | 1668 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
@@ -1704,7 +1799,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1704 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD || 1799 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD ||
1705 qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI || 1800 qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI ||
1706 qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) { 1801 qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) {
1707 err = handle_eth_ud_smac_index(dev, qp, (u8 *)attr->smac, context); 1802 err = handle_eth_ud_smac_index(dev, qp, context);
1708 if (err) { 1803 if (err) {
1709 err = -EINVAL; 1804 err = -EINVAL;
1710 goto out; 1805 goto out;
@@ -1848,6 +1943,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1848 } 1943 }
1849 } 1944 }
1850out: 1945out:
1946 if (err && qp->counter_index)
1947 mlx4_ib_free_qp_counter(dev, qp);
1851 if (err && steer_qp) 1948 if (err && steer_qp)
1852 mlx4_ib_steer_qp_reg(dev, qp, 0); 1949 mlx4_ib_steer_qp_reg(dev, qp, 0);
1853 kfree(context); 1950 kfree(context);
@@ -2036,14 +2133,14 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
2036} 2133}
2037 2134
2038static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, 2135static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
2039 struct ib_send_wr *wr, 2136 struct ib_ud_wr *wr,
2040 void *wqe, unsigned *mlx_seg_len) 2137 void *wqe, unsigned *mlx_seg_len)
2041{ 2138{
2042 struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device); 2139 struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
2043 struct ib_device *ib_dev = &mdev->ib_dev; 2140 struct ib_device *ib_dev = &mdev->ib_dev;
2044 struct mlx4_wqe_mlx_seg *mlx = wqe; 2141 struct mlx4_wqe_mlx_seg *mlx = wqe;
2045 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; 2142 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
2046 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); 2143 struct mlx4_ib_ah *ah = to_mah(wr->ah);
2047 u16 pkey; 2144 u16 pkey;
2048 u32 qkey; 2145 u32 qkey;
2049 int send_size; 2146 int send_size;
@@ -2051,13 +2148,13 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
2051 int spc; 2148 int spc;
2052 int i; 2149 int i;
2053 2150
2054 if (wr->opcode != IB_WR_SEND) 2151 if (wr->wr.opcode != IB_WR_SEND)
2055 return -EINVAL; 2152 return -EINVAL;
2056 2153
2057 send_size = 0; 2154 send_size = 0;
2058 2155
2059 for (i = 0; i < wr->num_sge; ++i) 2156 for (i = 0; i < wr->wr.num_sge; ++i)
2060 send_size += wr->sg_list[i].length; 2157 send_size += wr->wr.sg_list[i].length;
2061 2158
2062 /* for proxy-qp0 sends, need to add in size of tunnel header */ 2159 /* for proxy-qp0 sends, need to add in size of tunnel header */
2063 /* for tunnel-qp0 sends, tunnel header is already in s/g list */ 2160 /* for tunnel-qp0 sends, tunnel header is already in s/g list */
@@ -2082,11 +2179,11 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
2082 mlx->rlid = sqp->ud_header.lrh.destination_lid; 2179 mlx->rlid = sqp->ud_header.lrh.destination_lid;
2083 2180
2084 sqp->ud_header.lrh.virtual_lane = 0; 2181 sqp->ud_header.lrh.virtual_lane = 0;
2085 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); 2182 sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
2086 ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey); 2183 ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
2087 sqp->ud_header.bth.pkey = cpu_to_be16(pkey); 2184 sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
2088 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) 2185 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
2089 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); 2186 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
2090 else 2187 else
2091 sqp->ud_header.bth.destination_qpn = 2188 sqp->ud_header.bth.destination_qpn =
2092 cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]); 2189 cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
@@ -2158,14 +2255,14 @@ static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac)
2158 } 2255 }
2159} 2256}
2160 2257
2161static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, 2258static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
2162 void *wqe, unsigned *mlx_seg_len) 2259 void *wqe, unsigned *mlx_seg_len)
2163{ 2260{
2164 struct ib_device *ib_dev = sqp->qp.ibqp.device; 2261 struct ib_device *ib_dev = sqp->qp.ibqp.device;
2165 struct mlx4_wqe_mlx_seg *mlx = wqe; 2262 struct mlx4_wqe_mlx_seg *mlx = wqe;
2166 struct mlx4_wqe_ctrl_seg *ctrl = wqe; 2263 struct mlx4_wqe_ctrl_seg *ctrl = wqe;
2167 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; 2264 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
2168 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); 2265 struct mlx4_ib_ah *ah = to_mah(wr->ah);
2169 union ib_gid sgid; 2266 union ib_gid sgid;
2170 u16 pkey; 2267 u16 pkey;
2171 int send_size; 2268 int send_size;
@@ -2179,8 +2276,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
2179 bool is_grh; 2276 bool is_grh;
2180 2277
2181 send_size = 0; 2278 send_size = 0;
2182 for (i = 0; i < wr->num_sge; ++i) 2279 for (i = 0; i < wr->wr.num_sge; ++i)
2183 send_size += wr->sg_list[i].length; 2280 send_size += wr->wr.sg_list[i].length;
2184 2281
2185 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; 2282 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
2186 is_grh = mlx4_ib_ah_grh_present(ah); 2283 is_grh = mlx4_ib_ah_grh_present(ah);
@@ -2197,7 +2294,10 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
2197 } else { 2294 } else {
2198 err = ib_get_cached_gid(ib_dev, 2295 err = ib_get_cached_gid(ib_dev,
2199 be32_to_cpu(ah->av.ib.port_pd) >> 24, 2296 be32_to_cpu(ah->av.ib.port_pd) >> 24,
2200 ah->av.ib.gid_index, &sgid); 2297 ah->av.ib.gid_index, &sgid,
2298 NULL);
2299 if (!err && !memcmp(&sgid, &zgid, sizeof(sgid)))
2300 err = -ENOENT;
2201 if (err) 2301 if (err)
2202 return err; 2302 return err;
2203 } 2303 }
@@ -2239,7 +2339,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
2239 ib_get_cached_gid(ib_dev, 2339 ib_get_cached_gid(ib_dev,
2240 be32_to_cpu(ah->av.ib.port_pd) >> 24, 2340 be32_to_cpu(ah->av.ib.port_pd) >> 24,
2241 ah->av.ib.gid_index, 2341 ah->av.ib.gid_index,
2242 &sqp->ud_header.grh.source_gid); 2342 &sqp->ud_header.grh.source_gid, NULL);
2243 } 2343 }
2244 memcpy(sqp->ud_header.grh.destination_gid.raw, 2344 memcpy(sqp->ud_header.grh.destination_gid.raw,
2245 ah->av.ib.dgid, 16); 2345 ah->av.ib.dgid, 16);
@@ -2257,7 +2357,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
2257 mlx->rlid = sqp->ud_header.lrh.destination_lid; 2357 mlx->rlid = sqp->ud_header.lrh.destination_lid;
2258 } 2358 }
2259 2359
2260 switch (wr->opcode) { 2360 switch (wr->wr.opcode) {
2261 case IB_WR_SEND: 2361 case IB_WR_SEND:
2262 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; 2362 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
2263 sqp->ud_header.immediate_present = 0; 2363 sqp->ud_header.immediate_present = 0;
@@ -2265,7 +2365,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
2265 case IB_WR_SEND_WITH_IMM: 2365 case IB_WR_SEND_WITH_IMM:
2266 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; 2366 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
2267 sqp->ud_header.immediate_present = 1; 2367 sqp->ud_header.immediate_present = 1;
2268 sqp->ud_header.immediate_data = wr->ex.imm_data; 2368 sqp->ud_header.immediate_data = wr->wr.ex.imm_data;
2269 break; 2369 break;
2270 default: 2370 default:
2271 return -EINVAL; 2371 return -EINVAL;
@@ -2308,16 +2408,16 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
2308 if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) 2408 if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
2309 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; 2409 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
2310 } 2410 }
2311 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); 2411 sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
2312 if (!sqp->qp.ibqp.qp_num) 2412 if (!sqp->qp.ibqp.qp_num)
2313 ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); 2413 ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
2314 else 2414 else
2315 ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey); 2415 ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, &pkey);
2316 sqp->ud_header.bth.pkey = cpu_to_be16(pkey); 2416 sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
2317 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); 2417 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
2318 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); 2418 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
2319 sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? 2419 sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
2320 sqp->qkey : wr->wr.ud.remote_qkey); 2420 sqp->qkey : wr->remote_qkey);
2321 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); 2421 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
2322 2422
2323 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); 2423 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
@@ -2405,43 +2505,39 @@ static __be32 convert_access(int acc)
2405 cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); 2505 cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
2406} 2506}
2407 2507
2408static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) 2508static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg,
2509 struct ib_reg_wr *wr)
2409{ 2510{
2410 struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); 2511 struct mlx4_ib_mr *mr = to_mmr(wr->mr);
2411 int i;
2412
2413 for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i)
2414 mfrpl->mapped_page_list[i] =
2415 cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] |
2416 MLX4_MTT_FLAG_PRESENT);
2417 2512
2418 fseg->flags = convert_access(wr->wr.fast_reg.access_flags); 2513 fseg->flags = convert_access(wr->access);
2419 fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey); 2514 fseg->mem_key = cpu_to_be32(wr->key);
2420 fseg->buf_list = cpu_to_be64(mfrpl->map); 2515 fseg->buf_list = cpu_to_be64(mr->page_map);
2421 fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); 2516 fseg->start_addr = cpu_to_be64(mr->ibmr.iova);
2422 fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length); 2517 fseg->reg_len = cpu_to_be64(mr->ibmr.length);
2423 fseg->offset = 0; /* XXX -- is this just for ZBVA? */ 2518 fseg->offset = 0; /* XXX -- is this just for ZBVA? */
2424 fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift); 2519 fseg->page_size = cpu_to_be32(ilog2(mr->ibmr.page_size));
2425 fseg->reserved[0] = 0; 2520 fseg->reserved[0] = 0;
2426 fseg->reserved[1] = 0; 2521 fseg->reserved[1] = 0;
2427} 2522}
2428 2523
2429static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr) 2524static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg,
2525 struct ib_bind_mw_wr *wr)
2430{ 2526{
2431 bseg->flags1 = 2527 bseg->flags1 =
2432 convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) & 2528 convert_access(wr->bind_info.mw_access_flags) &
2433 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ | 2529 cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ |
2434 MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE | 2530 MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
2435 MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC); 2531 MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
2436 bseg->flags2 = 0; 2532 bseg->flags2 = 0;
2437 if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2) 2533 if (wr->mw->type == IB_MW_TYPE_2)
2438 bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2); 2534 bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
2439 if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED) 2535 if (wr->bind_info.mw_access_flags & IB_ZERO_BASED)
2440 bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED); 2536 bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
2441 bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey); 2537 bseg->new_rkey = cpu_to_be32(wr->rkey);
2442 bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey); 2538 bseg->lkey = cpu_to_be32(wr->bind_info.mr->lkey);
2443 bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr); 2539 bseg->addr = cpu_to_be64(wr->bind_info.addr);
2444 bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length); 2540 bseg->length = cpu_to_be64(wr->bind_info.length);
2445} 2541}
2446 2542
2447static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) 2543static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
@@ -2458,46 +2554,47 @@ static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
2458 rseg->reserved = 0; 2554 rseg->reserved = 0;
2459} 2555}
2460 2556
2461static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr) 2557static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg,
2558 struct ib_atomic_wr *wr)
2462{ 2559{
2463 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 2560 if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
2464 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); 2561 aseg->swap_add = cpu_to_be64(wr->swap);
2465 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); 2562 aseg->compare = cpu_to_be64(wr->compare_add);
2466 } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { 2563 } else if (wr->wr.opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
2467 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); 2564 aseg->swap_add = cpu_to_be64(wr->compare_add);
2468 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask); 2565 aseg->compare = cpu_to_be64(wr->compare_add_mask);
2469 } else { 2566 } else {
2470 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); 2567 aseg->swap_add = cpu_to_be64(wr->compare_add);
2471 aseg->compare = 0; 2568 aseg->compare = 0;
2472 } 2569 }
2473 2570
2474} 2571}
2475 2572
2476static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, 2573static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
2477 struct ib_send_wr *wr) 2574 struct ib_atomic_wr *wr)
2478{ 2575{
2479 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); 2576 aseg->swap_add = cpu_to_be64(wr->swap);
2480 aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask); 2577 aseg->swap_add_mask = cpu_to_be64(wr->swap_mask);
2481 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); 2578 aseg->compare = cpu_to_be64(wr->compare_add);
2482 aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask); 2579 aseg->compare_mask = cpu_to_be64(wr->compare_add_mask);
2483} 2580}
2484 2581
2485static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, 2582static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
2486 struct ib_send_wr *wr) 2583 struct ib_ud_wr *wr)
2487{ 2584{
2488 memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); 2585 memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av));
2489 dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); 2586 dseg->dqpn = cpu_to_be32(wr->remote_qpn);
2490 dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); 2587 dseg->qkey = cpu_to_be32(wr->remote_qkey);
2491 dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan; 2588 dseg->vlan = to_mah(wr->ah)->av.eth.vlan;
2492 memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); 2589 memcpy(dseg->mac, to_mah(wr->ah)->av.eth.mac, 6);
2493} 2590}
2494 2591
2495static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, 2592static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
2496 struct mlx4_wqe_datagram_seg *dseg, 2593 struct mlx4_wqe_datagram_seg *dseg,
2497 struct ib_send_wr *wr, 2594 struct ib_ud_wr *wr,
2498 enum mlx4_ib_qp_type qpt) 2595 enum mlx4_ib_qp_type qpt)
2499{ 2596{
2500 union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av; 2597 union mlx4_ext_av *av = &to_mah(wr->ah)->av;
2501 struct mlx4_av sqp_av = {0}; 2598 struct mlx4_av sqp_av = {0};
2502 int port = *((u8 *) &av->ib.port_pd) & 0x3; 2599 int port = *((u8 *) &av->ib.port_pd) & 0x3;
2503 2600
@@ -2516,18 +2613,18 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
2516 dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); 2613 dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
2517} 2614}
2518 2615
2519static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) 2616static void build_tunnel_header(struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len)
2520{ 2617{
2521 struct mlx4_wqe_inline_seg *inl = wqe; 2618 struct mlx4_wqe_inline_seg *inl = wqe;
2522 struct mlx4_ib_tunnel_header hdr; 2619 struct mlx4_ib_tunnel_header hdr;
2523 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); 2620 struct mlx4_ib_ah *ah = to_mah(wr->ah);
2524 int spc; 2621 int spc;
2525 int i; 2622 int i;
2526 2623
2527 memcpy(&hdr.av, &ah->av, sizeof hdr.av); 2624 memcpy(&hdr.av, &ah->av, sizeof hdr.av);
2528 hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); 2625 hdr.remote_qpn = cpu_to_be32(wr->remote_qpn);
2529 hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index); 2626 hdr.pkey_index = cpu_to_be16(wr->pkey_index);
2530 hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); 2627 hdr.qkey = cpu_to_be32(wr->remote_qkey);
2531 memcpy(hdr.mac, ah->av.eth.mac, 6); 2628 memcpy(hdr.mac, ah->av.eth.mac, 6);
2532 hdr.vlan = ah->av.eth.vlan; 2629 hdr.vlan = ah->av.eth.vlan;
2533 2630
@@ -2599,22 +2696,22 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
2599 dseg->addr = cpu_to_be64(sg->addr); 2696 dseg->addr = cpu_to_be64(sg->addr);
2600} 2697}
2601 2698
2602static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, 2699static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr,
2603 struct mlx4_ib_qp *qp, unsigned *lso_seg_len, 2700 struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
2604 __be32 *lso_hdr_sz, __be32 *blh) 2701 __be32 *lso_hdr_sz, __be32 *blh)
2605{ 2702{
2606 unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); 2703 unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16);
2607 2704
2608 if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE)) 2705 if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
2609 *blh = cpu_to_be32(1 << 6); 2706 *blh = cpu_to_be32(1 << 6);
2610 2707
2611 if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) && 2708 if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
2612 wr->num_sge > qp->sq.max_gs - (halign >> 4))) 2709 wr->wr.num_sge > qp->sq.max_gs - (halign >> 4)))
2613 return -EINVAL; 2710 return -EINVAL;
2614 2711
2615 memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); 2712 memcpy(wqe->header, wr->header, wr->hlen);
2616 2713
2617 *lso_hdr_sz = cpu_to_be32(wr->wr.ud.mss << 16 | wr->wr.ud.hlen); 2714 *lso_hdr_sz = cpu_to_be32(wr->mss << 16 | wr->hlen);
2618 *lso_seg_len = halign; 2715 *lso_seg_len = halign;
2619 return 0; 2716 return 0;
2620} 2717}
@@ -2713,11 +2810,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2713 case IB_WR_ATOMIC_CMP_AND_SWP: 2810 case IB_WR_ATOMIC_CMP_AND_SWP:
2714 case IB_WR_ATOMIC_FETCH_AND_ADD: 2811 case IB_WR_ATOMIC_FETCH_AND_ADD:
2715 case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: 2812 case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
2716 set_raddr_seg(wqe, wr->wr.atomic.remote_addr, 2813 set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
2717 wr->wr.atomic.rkey); 2814 atomic_wr(wr)->rkey);
2718 wqe += sizeof (struct mlx4_wqe_raddr_seg); 2815 wqe += sizeof (struct mlx4_wqe_raddr_seg);
2719 2816
2720 set_atomic_seg(wqe, wr); 2817 set_atomic_seg(wqe, atomic_wr(wr));
2721 wqe += sizeof (struct mlx4_wqe_atomic_seg); 2818 wqe += sizeof (struct mlx4_wqe_atomic_seg);
2722 2819
2723 size += (sizeof (struct mlx4_wqe_raddr_seg) + 2820 size += (sizeof (struct mlx4_wqe_raddr_seg) +
@@ -2726,11 +2823,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2726 break; 2823 break;
2727 2824
2728 case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: 2825 case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
2729 set_raddr_seg(wqe, wr->wr.atomic.remote_addr, 2826 set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
2730 wr->wr.atomic.rkey); 2827 atomic_wr(wr)->rkey);
2731 wqe += sizeof (struct mlx4_wqe_raddr_seg); 2828 wqe += sizeof (struct mlx4_wqe_raddr_seg);
2732 2829
2733 set_masked_atomic_seg(wqe, wr); 2830 set_masked_atomic_seg(wqe, atomic_wr(wr));
2734 wqe += sizeof (struct mlx4_wqe_masked_atomic_seg); 2831 wqe += sizeof (struct mlx4_wqe_masked_atomic_seg);
2735 2832
2736 size += (sizeof (struct mlx4_wqe_raddr_seg) + 2833 size += (sizeof (struct mlx4_wqe_raddr_seg) +
@@ -2741,8 +2838,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2741 case IB_WR_RDMA_READ: 2838 case IB_WR_RDMA_READ:
2742 case IB_WR_RDMA_WRITE: 2839 case IB_WR_RDMA_WRITE:
2743 case IB_WR_RDMA_WRITE_WITH_IMM: 2840 case IB_WR_RDMA_WRITE_WITH_IMM:
2744 set_raddr_seg(wqe, wr->wr.rdma.remote_addr, 2841 set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
2745 wr->wr.rdma.rkey); 2842 rdma_wr(wr)->rkey);
2746 wqe += sizeof (struct mlx4_wqe_raddr_seg); 2843 wqe += sizeof (struct mlx4_wqe_raddr_seg);
2747 size += sizeof (struct mlx4_wqe_raddr_seg) / 16; 2844 size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
2748 break; 2845 break;
@@ -2755,18 +2852,18 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2755 size += sizeof (struct mlx4_wqe_local_inval_seg) / 16; 2852 size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
2756 break; 2853 break;
2757 2854
2758 case IB_WR_FAST_REG_MR: 2855 case IB_WR_REG_MR:
2759 ctrl->srcrb_flags |= 2856 ctrl->srcrb_flags |=
2760 cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); 2857 cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
2761 set_fmr_seg(wqe, wr); 2858 set_reg_seg(wqe, reg_wr(wr));
2762 wqe += sizeof (struct mlx4_wqe_fmr_seg); 2859 wqe += sizeof(struct mlx4_wqe_fmr_seg);
2763 size += sizeof (struct mlx4_wqe_fmr_seg) / 16; 2860 size += sizeof(struct mlx4_wqe_fmr_seg) / 16;
2764 break; 2861 break;
2765 2862
2766 case IB_WR_BIND_MW: 2863 case IB_WR_BIND_MW:
2767 ctrl->srcrb_flags |= 2864 ctrl->srcrb_flags |=
2768 cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); 2865 cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
2769 set_bind_seg(wqe, wr); 2866 set_bind_seg(wqe, bind_mw_wr(wr));
2770 wqe += sizeof(struct mlx4_wqe_bind_seg); 2867 wqe += sizeof(struct mlx4_wqe_bind_seg);
2771 size += sizeof(struct mlx4_wqe_bind_seg) / 16; 2868 size += sizeof(struct mlx4_wqe_bind_seg) / 16;
2772 break; 2869 break;
@@ -2777,7 +2874,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2777 break; 2874 break;
2778 2875
2779 case MLX4_IB_QPT_TUN_SMI_OWNER: 2876 case MLX4_IB_QPT_TUN_SMI_OWNER:
2780 err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); 2877 err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
2878 ctrl, &seglen);
2781 if (unlikely(err)) { 2879 if (unlikely(err)) {
2782 *bad_wr = wr; 2880 *bad_wr = wr;
2783 goto out; 2881 goto out;
@@ -2788,19 +2886,20 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2788 case MLX4_IB_QPT_TUN_SMI: 2886 case MLX4_IB_QPT_TUN_SMI:
2789 case MLX4_IB_QPT_TUN_GSI: 2887 case MLX4_IB_QPT_TUN_GSI:
2790 /* this is a UD qp used in MAD responses to slaves. */ 2888 /* this is a UD qp used in MAD responses to slaves. */
2791 set_datagram_seg(wqe, wr); 2889 set_datagram_seg(wqe, ud_wr(wr));
2792 /* set the forced-loopback bit in the data seg av */ 2890 /* set the forced-loopback bit in the data seg av */
2793 *(__be32 *) wqe |= cpu_to_be32(0x80000000); 2891 *(__be32 *) wqe |= cpu_to_be32(0x80000000);
2794 wqe += sizeof (struct mlx4_wqe_datagram_seg); 2892 wqe += sizeof (struct mlx4_wqe_datagram_seg);
2795 size += sizeof (struct mlx4_wqe_datagram_seg) / 16; 2893 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2796 break; 2894 break;
2797 case MLX4_IB_QPT_UD: 2895 case MLX4_IB_QPT_UD:
2798 set_datagram_seg(wqe, wr); 2896 set_datagram_seg(wqe, ud_wr(wr));
2799 wqe += sizeof (struct mlx4_wqe_datagram_seg); 2897 wqe += sizeof (struct mlx4_wqe_datagram_seg);
2800 size += sizeof (struct mlx4_wqe_datagram_seg) / 16; 2898 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2801 2899
2802 if (wr->opcode == IB_WR_LSO) { 2900 if (wr->opcode == IB_WR_LSO) {
2803 err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh); 2901 err = build_lso_seg(wqe, ud_wr(wr), qp, &seglen,
2902 &lso_hdr_sz, &blh);
2804 if (unlikely(err)) { 2903 if (unlikely(err)) {
2805 *bad_wr = wr; 2904 *bad_wr = wr;
2806 goto out; 2905 goto out;
@@ -2812,7 +2911,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2812 break; 2911 break;
2813 2912
2814 case MLX4_IB_QPT_PROXY_SMI_OWNER: 2913 case MLX4_IB_QPT_PROXY_SMI_OWNER:
2815 err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); 2914 err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
2915 ctrl, &seglen);
2816 if (unlikely(err)) { 2916 if (unlikely(err)) {
2817 *bad_wr = wr; 2917 *bad_wr = wr;
2818 goto out; 2918 goto out;
@@ -2823,7 +2923,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2823 add_zero_len_inline(wqe); 2923 add_zero_len_inline(wqe);
2824 wqe += 16; 2924 wqe += 16;
2825 size++; 2925 size++;
2826 build_tunnel_header(wr, wqe, &seglen); 2926 build_tunnel_header(ud_wr(wr), wqe, &seglen);
2827 wqe += seglen; 2927 wqe += seglen;
2828 size += seglen / 16; 2928 size += seglen / 16;
2829 break; 2929 break;
@@ -2833,18 +2933,20 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2833 * In this case we first add a UD segment targeting 2933 * In this case we first add a UD segment targeting
2834 * the tunnel qp, and then add a header with address 2934 * the tunnel qp, and then add a header with address
2835 * information */ 2935 * information */
2836 set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, 2936 set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe,
2937 ud_wr(wr),
2837 qp->mlx4_ib_qp_type); 2938 qp->mlx4_ib_qp_type);
2838 wqe += sizeof (struct mlx4_wqe_datagram_seg); 2939 wqe += sizeof (struct mlx4_wqe_datagram_seg);
2839 size += sizeof (struct mlx4_wqe_datagram_seg) / 16; 2940 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2840 build_tunnel_header(wr, wqe, &seglen); 2941 build_tunnel_header(ud_wr(wr), wqe, &seglen);
2841 wqe += seglen; 2942 wqe += seglen;
2842 size += seglen / 16; 2943 size += seglen / 16;
2843 break; 2944 break;
2844 2945
2845 case MLX4_IB_QPT_SMI: 2946 case MLX4_IB_QPT_SMI:
2846 case MLX4_IB_QPT_GSI: 2947 case MLX4_IB_QPT_GSI:
2847 err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen); 2948 err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl,
2949 &seglen);
2848 if (unlikely(err)) { 2950 if (unlikely(err)) {
2849 *bad_wr = wr; 2951 *bad_wr = wr;
2850 goto out; 2952 goto out;
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 2d0dbbf38ceb..3dfd287256d6 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -109,8 +109,8 @@ static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx)
109 case IB_WR_LOCAL_INV: 109 case IB_WR_LOCAL_INV:
110 return IB_WC_LOCAL_INV; 110 return IB_WC_LOCAL_INV;
111 111
112 case IB_WR_FAST_REG_MR: 112 case IB_WR_REG_MR:
113 return IB_WC_FAST_REG_MR; 113 return IB_WC_REG_MR;
114 114
115 default: 115 default:
116 pr_warn("unknown completion status\n"); 116 pr_warn("unknown completion status\n");
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 68508d528ba0..7e97cb55a6bf 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1425,8 +1425,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
1425 dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; 1425 dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
1426 dev->ib_dev.process_mad = mlx5_ib_process_mad; 1426 dev->ib_dev.process_mad = mlx5_ib_process_mad;
1427 dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr; 1427 dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr;
1428 dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; 1428 dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
1429 dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list;
1430 dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; 1429 dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
1431 dev->ib_dev.get_port_immutable = mlx5_port_immutable; 1430 dev->ib_dev.get_port_immutable = mlx5_port_immutable;
1432 1431
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 22123b79d550..633347260b79 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -245,6 +245,7 @@ enum mlx5_ib_qp_flags {
245}; 245};
246 246
247struct mlx5_umr_wr { 247struct mlx5_umr_wr {
248 struct ib_send_wr wr;
248 union { 249 union {
249 u64 virt_addr; 250 u64 virt_addr;
250 u64 offset; 251 u64 offset;
@@ -257,6 +258,11 @@ struct mlx5_umr_wr {
257 u32 mkey; 258 u32 mkey;
258}; 259};
259 260
261static inline struct mlx5_umr_wr *umr_wr(struct ib_send_wr *wr)
262{
263 return container_of(wr, struct mlx5_umr_wr, wr);
264}
265
260struct mlx5_shared_mr_info { 266struct mlx5_shared_mr_info {
261 int mr_id; 267 int mr_id;
262 struct ib_umem *umem; 268 struct ib_umem *umem;
@@ -313,6 +319,11 @@ enum mlx5_ib_mtt_access_flags {
313 319
314struct mlx5_ib_mr { 320struct mlx5_ib_mr {
315 struct ib_mr ibmr; 321 struct ib_mr ibmr;
322 void *descs;
323 dma_addr_t desc_map;
324 int ndescs;
325 int max_descs;
326 int desc_size;
316 struct mlx5_core_mr mmr; 327 struct mlx5_core_mr mmr;
317 struct ib_umem *umem; 328 struct ib_umem *umem;
318 struct mlx5_shared_mr_info *smr_info; 329 struct mlx5_shared_mr_info *smr_info;
@@ -324,12 +335,7 @@ struct mlx5_ib_mr {
324 struct mlx5_create_mkey_mbox_out out; 335 struct mlx5_create_mkey_mbox_out out;
325 struct mlx5_core_sig_ctx *sig; 336 struct mlx5_core_sig_ctx *sig;
326 int live; 337 int live;
327}; 338 void *descs_alloc;
328
329struct mlx5_ib_fast_reg_page_list {
330 struct ib_fast_reg_page_list ibfrpl;
331 __be64 *mapped_page_list;
332 dma_addr_t map;
333}; 339};
334 340
335struct mlx5_ib_umr_context { 341struct mlx5_ib_umr_context {
@@ -358,20 +364,6 @@ enum {
358 MLX5_FMR_BUSY, 364 MLX5_FMR_BUSY,
359}; 365};
360 366
361struct mlx5_ib_fmr {
362 struct ib_fmr ibfmr;
363 struct mlx5_core_mr mr;
364 int access_flags;
365 int state;
366 /* protect fmr state
367 */
368 spinlock_t lock;
369 u64 wrid;
370 struct ib_send_wr wr[2];
371 u8 page_shift;
372 struct ib_fast_reg_page_list page_list;
373};
374
375struct mlx5_cache_ent { 367struct mlx5_cache_ent {
376 struct list_head head; 368 struct list_head head;
377 /* sync access to the cahce entry 369 /* sync access to the cahce entry
@@ -456,11 +448,6 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev)
456 return container_of(ibdev, struct mlx5_ib_dev, ib_dev); 448 return container_of(ibdev, struct mlx5_ib_dev, ib_dev);
457} 449}
458 450
459static inline struct mlx5_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
460{
461 return container_of(ibfmr, struct mlx5_ib_fmr, ibfmr);
462}
463
464static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq) 451static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
465{ 452{
466 return container_of(ibcq, struct mlx5_ib_cq, ibcq); 453 return container_of(ibcq, struct mlx5_ib_cq, ibcq);
@@ -501,11 +488,6 @@ static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
501 return container_of(ibmr, struct mlx5_ib_mr, ibmr); 488 return container_of(ibmr, struct mlx5_ib_mr, ibmr);
502} 489}
503 490
504static inline struct mlx5_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
505{
506 return container_of(ibfrpl, struct mlx5_ib_fast_reg_page_list, ibfrpl);
507}
508
509struct mlx5_ib_ah { 491struct mlx5_ib_ah {
510 struct ib_ah ibah; 492 struct ib_ah ibah;
511 struct mlx5_av av; 493 struct mlx5_av av;
@@ -573,15 +555,9 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
573struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, 555struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
574 enum ib_mr_type mr_type, 556 enum ib_mr_type mr_type,
575 u32 max_num_sg); 557 u32 max_num_sg);
576struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, 558int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
577 int page_list_len); 559 struct scatterlist *sg,
578void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); 560 int sg_nents);
579struct ib_fmr *mlx5_ib_fmr_alloc(struct ib_pd *pd, int acc,
580 struct ib_fmr_attr *fmr_attr);
581int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
582 int npages, u64 iova);
583int mlx5_ib_unmap_fmr(struct list_head *fmr_list);
584int mlx5_ib_fmr_dealloc(struct ib_fmr *ibfmr);
585int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, 561int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
586 const struct ib_wc *in_wc, const struct ib_grh *in_grh, 562 const struct ib_wc *in_wc, const struct ib_grh *in_grh,
587 const struct ib_mad_hdr *in, size_t in_mad_size, 563 const struct ib_mad_hdr *in, size_t in_mad_size,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 54a15b5d336d..ec8993a7b3be 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -687,7 +687,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
687 int access_flags) 687 int access_flags)
688{ 688{
689 struct mlx5_ib_dev *dev = to_mdev(pd->device); 689 struct mlx5_ib_dev *dev = to_mdev(pd->device);
690 struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; 690 struct mlx5_umr_wr *umrwr = umr_wr(wr);
691 691
692 sg->addr = dma; 692 sg->addr = dma;
693 sg->length = ALIGN(sizeof(u64) * n, 64); 693 sg->length = ALIGN(sizeof(u64) * n, 64);
@@ -715,7 +715,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
715static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, 715static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
716 struct ib_send_wr *wr, u32 key) 716 struct ib_send_wr *wr, u32 key)
717{ 717{
718 struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; 718 struct mlx5_umr_wr *umrwr = umr_wr(wr);
719 719
720 wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE; 720 wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
721 wr->opcode = MLX5_IB_WR_UMR; 721 wr->opcode = MLX5_IB_WR_UMR;
@@ -752,7 +752,8 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
752 struct device *ddev = dev->ib_dev.dma_device; 752 struct device *ddev = dev->ib_dev.dma_device;
753 struct umr_common *umrc = &dev->umrc; 753 struct umr_common *umrc = &dev->umrc;
754 struct mlx5_ib_umr_context umr_context; 754 struct mlx5_ib_umr_context umr_context;
755 struct ib_send_wr wr, *bad; 755 struct mlx5_umr_wr umrwr;
756 struct ib_send_wr *bad;
756 struct mlx5_ib_mr *mr; 757 struct mlx5_ib_mr *mr;
757 struct ib_sge sg; 758 struct ib_sge sg;
758 int size; 759 int size;
@@ -798,14 +799,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
798 goto free_pas; 799 goto free_pas;
799 } 800 }
800 801
801 memset(&wr, 0, sizeof(wr)); 802 memset(&umrwr, 0, sizeof(umrwr));
802 wr.wr_id = (u64)(unsigned long)&umr_context; 803 umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
803 prep_umr_reg_wqe(pd, &wr, &sg, dma, npages, mr->mmr.key, page_shift, 804 prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key,
804 virt_addr, len, access_flags); 805 page_shift, virt_addr, len, access_flags);
805 806
806 mlx5_ib_init_umr_context(&umr_context); 807 mlx5_ib_init_umr_context(&umr_context);
807 down(&umrc->sem); 808 down(&umrc->sem);
808 err = ib_post_send(umrc->qp, &wr, &bad); 809 err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
809 if (err) { 810 if (err) {
810 mlx5_ib_warn(dev, "post send failed, err %d\n", err); 811 mlx5_ib_warn(dev, "post send failed, err %d\n", err);
811 goto unmap_dma; 812 goto unmap_dma;
@@ -851,8 +852,8 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
851 int size; 852 int size;
852 __be64 *pas; 853 __be64 *pas;
853 dma_addr_t dma; 854 dma_addr_t dma;
854 struct ib_send_wr wr, *bad; 855 struct ib_send_wr *bad;
855 struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg; 856 struct mlx5_umr_wr wr;
856 struct ib_sge sg; 857 struct ib_sge sg;
857 int err = 0; 858 int err = 0;
858 const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64); 859 const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
@@ -917,26 +918,26 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
917 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); 918 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
918 919
919 memset(&wr, 0, sizeof(wr)); 920 memset(&wr, 0, sizeof(wr));
920 wr.wr_id = (u64)(unsigned long)&umr_context; 921 wr.wr.wr_id = (u64)(unsigned long)&umr_context;
921 922
922 sg.addr = dma; 923 sg.addr = dma;
923 sg.length = ALIGN(npages * sizeof(u64), 924 sg.length = ALIGN(npages * sizeof(u64),
924 MLX5_UMR_MTT_ALIGNMENT); 925 MLX5_UMR_MTT_ALIGNMENT);
925 sg.lkey = dev->umrc.pd->local_dma_lkey; 926 sg.lkey = dev->umrc.pd->local_dma_lkey;
926 927
927 wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | 928 wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
928 MLX5_IB_SEND_UMR_UPDATE_MTT; 929 MLX5_IB_SEND_UMR_UPDATE_MTT;
929 wr.sg_list = &sg; 930 wr.wr.sg_list = &sg;
930 wr.num_sge = 1; 931 wr.wr.num_sge = 1;
931 wr.opcode = MLX5_IB_WR_UMR; 932 wr.wr.opcode = MLX5_IB_WR_UMR;
932 umrwr->npages = sg.length / sizeof(u64); 933 wr.npages = sg.length / sizeof(u64);
933 umrwr->page_shift = PAGE_SHIFT; 934 wr.page_shift = PAGE_SHIFT;
934 umrwr->mkey = mr->mmr.key; 935 wr.mkey = mr->mmr.key;
935 umrwr->target.offset = start_page_index; 936 wr.target.offset = start_page_index;
936 937
937 mlx5_ib_init_umr_context(&umr_context); 938 mlx5_ib_init_umr_context(&umr_context);
938 down(&umrc->sem); 939 down(&umrc->sem);
939 err = ib_post_send(umrc->qp, &wr, &bad); 940 err = ib_post_send(umrc->qp, &wr.wr, &bad);
940 if (err) { 941 if (err) {
941 mlx5_ib_err(dev, "UMR post send failed, err %d\n", err); 942 mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
942 } else { 943 } else {
@@ -1122,16 +1123,17 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1122{ 1123{
1123 struct umr_common *umrc = &dev->umrc; 1124 struct umr_common *umrc = &dev->umrc;
1124 struct mlx5_ib_umr_context umr_context; 1125 struct mlx5_ib_umr_context umr_context;
1125 struct ib_send_wr wr, *bad; 1126 struct mlx5_umr_wr umrwr;
1127 struct ib_send_wr *bad;
1126 int err; 1128 int err;
1127 1129
1128 memset(&wr, 0, sizeof(wr)); 1130 memset(&umrwr.wr, 0, sizeof(umrwr));
1129 wr.wr_id = (u64)(unsigned long)&umr_context; 1131 umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
1130 prep_umr_unreg_wqe(dev, &wr, mr->mmr.key); 1132 prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmr.key);
1131 1133
1132 mlx5_ib_init_umr_context(&umr_context); 1134 mlx5_ib_init_umr_context(&umr_context);
1133 down(&umrc->sem); 1135 down(&umrc->sem);
1134 err = ib_post_send(umrc->qp, &wr, &bad); 1136 err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
1135 if (err) { 1137 if (err) {
1136 up(&umrc->sem); 1138 up(&umrc->sem);
1137 mlx5_ib_dbg(dev, "err %d\n", err); 1139 mlx5_ib_dbg(dev, "err %d\n", err);
@@ -1151,6 +1153,52 @@ error:
1151 return err; 1153 return err;
1152} 1154}
1153 1155
1156static int
1157mlx5_alloc_priv_descs(struct ib_device *device,
1158 struct mlx5_ib_mr *mr,
1159 int ndescs,
1160 int desc_size)
1161{
1162 int size = ndescs * desc_size;
1163 int add_size;
1164 int ret;
1165
1166 add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
1167
1168 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
1169 if (!mr->descs_alloc)
1170 return -ENOMEM;
1171
1172 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
1173
1174 mr->desc_map = dma_map_single(device->dma_device, mr->descs,
1175 size, DMA_TO_DEVICE);
1176 if (dma_mapping_error(device->dma_device, mr->desc_map)) {
1177 ret = -ENOMEM;
1178 goto err;
1179 }
1180
1181 return 0;
1182err:
1183 kfree(mr->descs_alloc);
1184
1185 return ret;
1186}
1187
1188static void
1189mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
1190{
1191 if (mr->descs) {
1192 struct ib_device *device = mr->ibmr.device;
1193 int size = mr->max_descs * mr->desc_size;
1194
1195 dma_unmap_single(device->dma_device, mr->desc_map,
1196 size, DMA_TO_DEVICE);
1197 kfree(mr->descs_alloc);
1198 mr->descs = NULL;
1199 }
1200}
1201
1154static int clean_mr(struct mlx5_ib_mr *mr) 1202static int clean_mr(struct mlx5_ib_mr *mr)
1155{ 1203{
1156 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1204 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
@@ -1170,6 +1218,8 @@ static int clean_mr(struct mlx5_ib_mr *mr)
1170 mr->sig = NULL; 1218 mr->sig = NULL;
1171 } 1219 }
1172 1220
1221 mlx5_free_priv_descs(mr);
1222
1173 if (!umred) { 1223 if (!umred) {
1174 err = destroy_mkey(dev, mr); 1224 err = destroy_mkey(dev, mr);
1175 if (err) { 1225 if (err) {
@@ -1259,6 +1309,14 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
1259 if (mr_type == IB_MR_TYPE_MEM_REG) { 1309 if (mr_type == IB_MR_TYPE_MEM_REG) {
1260 access_mode = MLX5_ACCESS_MODE_MTT; 1310 access_mode = MLX5_ACCESS_MODE_MTT;
1261 in->seg.log2_page_size = PAGE_SHIFT; 1311 in->seg.log2_page_size = PAGE_SHIFT;
1312
1313 err = mlx5_alloc_priv_descs(pd->device, mr,
1314 ndescs, sizeof(u64));
1315 if (err)
1316 goto err_free_in;
1317
1318 mr->desc_size = sizeof(u64);
1319 mr->max_descs = ndescs;
1262 } else if (mr_type == IB_MR_TYPE_SIGNATURE) { 1320 } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
1263 u32 psv_index[2]; 1321 u32 psv_index[2];
1264 1322
@@ -1315,6 +1373,7 @@ err_destroy_psv:
1315 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1373 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1316 mr->sig->psv_wire.psv_idx); 1374 mr->sig->psv_wire.psv_idx);
1317 } 1375 }
1376 mlx5_free_priv_descs(mr);
1318err_free_sig: 1377err_free_sig:
1319 kfree(mr->sig); 1378 kfree(mr->sig);
1320err_free_in: 1379err_free_in:
@@ -1324,48 +1383,6 @@ err_free:
1324 return ERR_PTR(err); 1383 return ERR_PTR(err);
1325} 1384}
1326 1385
1327struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
1328 int page_list_len)
1329{
1330 struct mlx5_ib_fast_reg_page_list *mfrpl;
1331 int size = page_list_len * sizeof(u64);
1332
1333 mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
1334 if (!mfrpl)
1335 return ERR_PTR(-ENOMEM);
1336
1337 mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
1338 if (!mfrpl->ibfrpl.page_list)
1339 goto err_free;
1340
1341 mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
1342 size, &mfrpl->map,
1343 GFP_KERNEL);
1344 if (!mfrpl->mapped_page_list)
1345 goto err_free;
1346
1347 WARN_ON(mfrpl->map & 0x3f);
1348
1349 return &mfrpl->ibfrpl;
1350
1351err_free:
1352 kfree(mfrpl->ibfrpl.page_list);
1353 kfree(mfrpl);
1354 return ERR_PTR(-ENOMEM);
1355}
1356
1357void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
1358{
1359 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
1360 struct mlx5_ib_dev *dev = to_mdev(page_list->device);
1361 int size = page_list->max_page_list_len * sizeof(u64);
1362
1363 dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list,
1364 mfrpl->map);
1365 kfree(mfrpl->ibfrpl.page_list);
1366 kfree(mfrpl);
1367}
1368
1369int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 1386int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1370 struct ib_mr_status *mr_status) 1387 struct ib_mr_status *mr_status)
1371{ 1388{
@@ -1406,3 +1423,39 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1406done: 1423done:
1407 return ret; 1424 return ret;
1408} 1425}
1426
1427static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
1428{
1429 struct mlx5_ib_mr *mr = to_mmr(ibmr);
1430 __be64 *descs;
1431
1432 if (unlikely(mr->ndescs == mr->max_descs))
1433 return -ENOMEM;
1434
1435 descs = mr->descs;
1436 descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
1437
1438 return 0;
1439}
1440
1441int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
1442 struct scatterlist *sg,
1443 int sg_nents)
1444{
1445 struct mlx5_ib_mr *mr = to_mmr(ibmr);
1446 int n;
1447
1448 mr->ndescs = 0;
1449
1450 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
1451 mr->desc_size * mr->max_descs,
1452 DMA_TO_DEVICE);
1453
1454 n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page);
1455
1456 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
1457 mr->desc_size * mr->max_descs,
1458 DMA_TO_DEVICE);
1459
1460 return n;
1461}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 6f521a3418e8..307bdbca8938 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -64,7 +64,7 @@ static const u32 mlx5_ib_opcode[] = {
64 [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA, 64 [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA,
65 [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL, 65 [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL,
66 [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR, 66 [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR,
67 [IB_WR_FAST_REG_MR] = MLX5_OPCODE_UMR, 67 [IB_WR_REG_MR] = MLX5_OPCODE_UMR,
68 [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS, 68 [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS,
69 [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA, 69 [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA,
70 [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, 70 [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR,
@@ -1838,9 +1838,9 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
1838static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, 1838static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
1839 struct ib_send_wr *wr) 1839 struct ib_send_wr *wr)
1840{ 1840{
1841 memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av)); 1841 memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
1842 dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV); 1842 dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
1843 dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); 1843 dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey);
1844} 1844}
1845 1845
1846static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg) 1846static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
@@ -1896,22 +1896,24 @@ static __be64 sig_mkey_mask(void)
1896 return cpu_to_be64(result); 1896 return cpu_to_be64(result);
1897} 1897}
1898 1898
1899static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, 1899static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
1900 struct ib_send_wr *wr, int li) 1900 struct mlx5_ib_mr *mr)
1901{ 1901{
1902 memset(umr, 0, sizeof(*umr)); 1902 int ndescs = mr->ndescs;
1903
1904 if (li) {
1905 umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
1906 umr->flags = 1 << 7;
1907 return;
1908 }
1909 1903
1910 umr->flags = (1 << 5); /* fail if not free */ 1904 memset(umr, 0, sizeof(*umr));
1911 umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len); 1905 umr->flags = MLX5_UMR_CHECK_NOT_FREE;
1906 umr->klm_octowords = get_klm_octo(ndescs);
1912 umr->mkey_mask = frwr_mkey_mask(); 1907 umr->mkey_mask = frwr_mkey_mask();
1913} 1908}
1914 1909
1910static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
1911{
1912 memset(umr, 0, sizeof(*umr));
1913 umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
1914 umr->flags = 1 << 7;
1915}
1916
1915static __be64 get_umr_reg_mr_mask(void) 1917static __be64 get_umr_reg_mr_mask(void)
1916{ 1918{
1917 u64 result; 1919 u64 result;
@@ -1952,7 +1954,7 @@ static __be64 get_umr_update_mtt_mask(void)
1952static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, 1954static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
1953 struct ib_send_wr *wr) 1955 struct ib_send_wr *wr)
1954{ 1956{
1955 struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; 1957 struct mlx5_umr_wr *umrwr = umr_wr(wr);
1956 1958
1957 memset(umr, 0, sizeof(*umr)); 1959 memset(umr, 0, sizeof(*umr));
1958 1960
@@ -1987,29 +1989,31 @@ static u8 get_umr_flags(int acc)
1987 MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN; 1989 MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
1988} 1990}
1989 1991
1990static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, 1992static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
1991 int li, int *writ) 1993 struct mlx5_ib_mr *mr,
1994 u32 key, int access)
1992{ 1995{
1993 memset(seg, 0, sizeof(*seg)); 1996 int ndescs = ALIGN(mr->ndescs, 8) >> 1;
1994 if (li) {
1995 seg->status = MLX5_MKEY_STATUS_FREE;
1996 return;
1997 }
1998 1997
1999 seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) | 1998 memset(seg, 0, sizeof(*seg));
2000 MLX5_ACCESS_MODE_MTT; 1999 seg->flags = get_umr_flags(access) | MLX5_ACCESS_MODE_MTT;
2001 *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE); 2000 seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
2002 seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00);
2003 seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); 2001 seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
2004 seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); 2002 seg->start_addr = cpu_to_be64(mr->ibmr.iova);
2005 seg->len = cpu_to_be64(wr->wr.fast_reg.length); 2003 seg->len = cpu_to_be64(mr->ibmr.length);
2006 seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2); 2004 seg->xlt_oct_size = cpu_to_be32(ndescs);
2007 seg->log2_page_size = wr->wr.fast_reg.page_shift; 2005 seg->log2_page_size = ilog2(mr->ibmr.page_size);
2006}
2007
2008static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
2009{
2010 memset(seg, 0, sizeof(*seg));
2011 seg->status = MLX5_MKEY_STATUS_FREE;
2008} 2012}
2009 2013
2010static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr) 2014static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
2011{ 2015{
2012 struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; 2016 struct mlx5_umr_wr *umrwr = umr_wr(wr);
2013 2017
2014 memset(seg, 0, sizeof(*seg)); 2018 memset(seg, 0, sizeof(*seg));
2015 if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) { 2019 if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
@@ -2028,21 +2032,14 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
2028 mlx5_mkey_variant(umrwr->mkey)); 2032 mlx5_mkey_variant(umrwr->mkey));
2029} 2033}
2030 2034
2031static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg, 2035static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
2032 struct ib_send_wr *wr, 2036 struct mlx5_ib_mr *mr,
2033 struct mlx5_core_dev *mdev, 2037 struct mlx5_ib_pd *pd)
2034 struct mlx5_ib_pd *pd,
2035 int writ)
2036{ 2038{
2037 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); 2039 int bcount = mr->desc_size * mr->ndescs;
2038 u64 *page_list = wr->wr.fast_reg.page_list->page_list;
2039 u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0);
2040 int i;
2041 2040
2042 for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) 2041 dseg->addr = cpu_to_be64(mr->desc_map);
2043 mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm); 2042 dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
2044 dseg->addr = cpu_to_be64(mfrpl->map);
2045 dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64));
2046 dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); 2043 dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
2047} 2044}
2048 2045
@@ -2224,22 +2221,22 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
2224 return 0; 2221 return 0;
2225} 2222}
2226 2223
2227static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, 2224static int set_sig_data_segment(struct ib_sig_handover_wr *wr,
2228 void **seg, int *size) 2225 struct mlx5_ib_qp *qp, void **seg, int *size)
2229{ 2226{
2230 struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs; 2227 struct ib_sig_attrs *sig_attrs = wr->sig_attrs;
2231 struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; 2228 struct ib_mr *sig_mr = wr->sig_mr;
2232 struct mlx5_bsf *bsf; 2229 struct mlx5_bsf *bsf;
2233 u32 data_len = wr->sg_list->length; 2230 u32 data_len = wr->wr.sg_list->length;
2234 u32 data_key = wr->sg_list->lkey; 2231 u32 data_key = wr->wr.sg_list->lkey;
2235 u64 data_va = wr->sg_list->addr; 2232 u64 data_va = wr->wr.sg_list->addr;
2236 int ret; 2233 int ret;
2237 int wqe_size; 2234 int wqe_size;
2238 2235
2239 if (!wr->wr.sig_handover.prot || 2236 if (!wr->prot ||
2240 (data_key == wr->wr.sig_handover.prot->lkey && 2237 (data_key == wr->prot->lkey &&
2241 data_va == wr->wr.sig_handover.prot->addr && 2238 data_va == wr->prot->addr &&
2242 data_len == wr->wr.sig_handover.prot->length)) { 2239 data_len == wr->prot->length)) {
2243 /** 2240 /**
2244 * Source domain doesn't contain signature information 2241 * Source domain doesn't contain signature information
2245 * or data and protection are interleaved in memory. 2242 * or data and protection are interleaved in memory.
@@ -2273,8 +2270,8 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
2273 struct mlx5_stride_block_ctrl_seg *sblock_ctrl; 2270 struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
2274 struct mlx5_stride_block_entry *data_sentry; 2271 struct mlx5_stride_block_entry *data_sentry;
2275 struct mlx5_stride_block_entry *prot_sentry; 2272 struct mlx5_stride_block_entry *prot_sentry;
2276 u32 prot_key = wr->wr.sig_handover.prot->lkey; 2273 u32 prot_key = wr->prot->lkey;
2277 u64 prot_va = wr->wr.sig_handover.prot->addr; 2274 u64 prot_va = wr->prot->addr;
2278 u16 block_size = sig_attrs->mem.sig.dif.pi_interval; 2275 u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
2279 int prot_size; 2276 int prot_size;
2280 2277
@@ -2326,16 +2323,16 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
2326} 2323}
2327 2324
2328static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, 2325static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
2329 struct ib_send_wr *wr, u32 nelements, 2326 struct ib_sig_handover_wr *wr, u32 nelements,
2330 u32 length, u32 pdn) 2327 u32 length, u32 pdn)
2331{ 2328{
2332 struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; 2329 struct ib_mr *sig_mr = wr->sig_mr;
2333 u32 sig_key = sig_mr->rkey; 2330 u32 sig_key = sig_mr->rkey;
2334 u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; 2331 u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
2335 2332
2336 memset(seg, 0, sizeof(*seg)); 2333 memset(seg, 0, sizeof(*seg));
2337 2334
2338 seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) | 2335 seg->flags = get_umr_flags(wr->access_flags) |
2339 MLX5_ACCESS_MODE_KLM; 2336 MLX5_ACCESS_MODE_KLM;
2340 seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); 2337 seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
2341 seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | 2338 seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
@@ -2346,7 +2343,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
2346} 2343}
2347 2344
2348static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, 2345static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
2349 struct ib_send_wr *wr, u32 nelements) 2346 u32 nelements)
2350{ 2347{
2351 memset(umr, 0, sizeof(*umr)); 2348 memset(umr, 0, sizeof(*umr));
2352 2349
@@ -2357,37 +2354,37 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
2357} 2354}
2358 2355
2359 2356
2360static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, 2357static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp,
2361 void **seg, int *size) 2358 void **seg, int *size)
2362{ 2359{
2363 struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr); 2360 struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
2361 struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
2364 u32 pdn = get_pd(qp)->pdn; 2362 u32 pdn = get_pd(qp)->pdn;
2365 u32 klm_oct_size; 2363 u32 klm_oct_size;
2366 int region_len, ret; 2364 int region_len, ret;
2367 2365
2368 if (unlikely(wr->num_sge != 1) || 2366 if (unlikely(wr->wr.num_sge != 1) ||
2369 unlikely(wr->wr.sig_handover.access_flags & 2367 unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) ||
2370 IB_ACCESS_REMOTE_ATOMIC) ||
2371 unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) || 2368 unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) ||
2372 unlikely(!sig_mr->sig->sig_status_checked)) 2369 unlikely(!sig_mr->sig->sig_status_checked))
2373 return -EINVAL; 2370 return -EINVAL;
2374 2371
2375 /* length of the protected region, data + protection */ 2372 /* length of the protected region, data + protection */
2376 region_len = wr->sg_list->length; 2373 region_len = wr->wr.sg_list->length;
2377 if (wr->wr.sig_handover.prot && 2374 if (wr->prot &&
2378 (wr->wr.sig_handover.prot->lkey != wr->sg_list->lkey || 2375 (wr->prot->lkey != wr->wr.sg_list->lkey ||
2379 wr->wr.sig_handover.prot->addr != wr->sg_list->addr || 2376 wr->prot->addr != wr->wr.sg_list->addr ||
2380 wr->wr.sig_handover.prot->length != wr->sg_list->length)) 2377 wr->prot->length != wr->wr.sg_list->length))
2381 region_len += wr->wr.sig_handover.prot->length; 2378 region_len += wr->prot->length;
2382 2379
2383 /** 2380 /**
2384 * KLM octoword size - if protection was provided 2381 * KLM octoword size - if protection was provided
2385 * then we use strided block format (3 octowords), 2382 * then we use strided block format (3 octowords),
2386 * else we use single KLM (1 octoword) 2383 * else we use single KLM (1 octoword)
2387 **/ 2384 **/
2388 klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1; 2385 klm_oct_size = wr->prot ? 3 : 1;
2389 2386
2390 set_sig_umr_segment(*seg, wr, klm_oct_size); 2387 set_sig_umr_segment(*seg, klm_oct_size);
2391 *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); 2388 *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
2392 *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; 2389 *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
2393 if (unlikely((*seg == qp->sq.qend))) 2390 if (unlikely((*seg == qp->sq.qend)))
@@ -2433,38 +2430,52 @@ static int set_psv_wr(struct ib_sig_domain *domain,
2433 return 0; 2430 return 0;
2434} 2431}
2435 2432
2436static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size, 2433static int set_reg_wr(struct mlx5_ib_qp *qp,
2437 struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp) 2434 struct ib_reg_wr *wr,
2435 void **seg, int *size)
2438{ 2436{
2439 int writ = 0; 2437 struct mlx5_ib_mr *mr = to_mmr(wr->mr);
2440 int li; 2438 struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
2441 2439
2442 li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0; 2440 if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
2443 if (unlikely(wr->send_flags & IB_SEND_INLINE)) 2441 mlx5_ib_warn(to_mdev(qp->ibqp.device),
2442 "Invalid IB_SEND_INLINE send flag\n");
2444 return -EINVAL; 2443 return -EINVAL;
2444 }
2445 2445
2446 set_frwr_umr_segment(*seg, wr, li); 2446 set_reg_umr_seg(*seg, mr);
2447 *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); 2447 *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
2448 *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; 2448 *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
2449 if (unlikely((*seg == qp->sq.qend))) 2449 if (unlikely((*seg == qp->sq.qend)))
2450 *seg = mlx5_get_send_wqe(qp, 0); 2450 *seg = mlx5_get_send_wqe(qp, 0);
2451 set_mkey_segment(*seg, wr, li, &writ); 2451
2452 set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
2452 *seg += sizeof(struct mlx5_mkey_seg); 2453 *seg += sizeof(struct mlx5_mkey_seg);
2453 *size += sizeof(struct mlx5_mkey_seg) / 16; 2454 *size += sizeof(struct mlx5_mkey_seg) / 16;
2454 if (unlikely((*seg == qp->sq.qend))) 2455 if (unlikely((*seg == qp->sq.qend)))
2455 *seg = mlx5_get_send_wqe(qp, 0); 2456 *seg = mlx5_get_send_wqe(qp, 0);
2456 if (!li) {
2457 if (unlikely(wr->wr.fast_reg.page_list_len >
2458 wr->wr.fast_reg.page_list->max_page_list_len))
2459 return -ENOMEM;
2460 2457
2461 set_frwr_pages(*seg, wr, mdev, pd, writ); 2458 set_reg_data_seg(*seg, mr, pd);
2462 *seg += sizeof(struct mlx5_wqe_data_seg); 2459 *seg += sizeof(struct mlx5_wqe_data_seg);
2463 *size += (sizeof(struct mlx5_wqe_data_seg) / 16); 2460 *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
2464 } 2461
2465 return 0; 2462 return 0;
2466} 2463}
2467 2464
2465static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size)
2466{
2467 set_linv_umr_seg(*seg);
2468 *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
2469 *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
2470 if (unlikely((*seg == qp->sq.qend)))
2471 *seg = mlx5_get_send_wqe(qp, 0);
2472 set_linv_mkey_seg(*seg);
2473 *seg += sizeof(struct mlx5_mkey_seg);
2474 *size += sizeof(struct mlx5_mkey_seg) / 16;
2475 if (unlikely((*seg == qp->sq.qend)))
2476 *seg = mlx5_get_send_wqe(qp, 0);
2477}
2478
2468static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) 2479static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
2469{ 2480{
2470 __be32 *p = NULL; 2481 __be32 *p = NULL;
@@ -2578,7 +2589,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2578{ 2589{
2579 struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ 2590 struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
2580 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 2591 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
2581 struct mlx5_core_dev *mdev = dev->mdev;
2582 struct mlx5_ib_qp *qp = to_mqp(ibqp); 2592 struct mlx5_ib_qp *qp = to_mqp(ibqp);
2583 struct mlx5_ib_mr *mr; 2593 struct mlx5_ib_mr *mr;
2584 struct mlx5_wqe_data_seg *dpseg; 2594 struct mlx5_wqe_data_seg *dpseg;
@@ -2627,7 +2637,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2627 switch (ibqp->qp_type) { 2637 switch (ibqp->qp_type) {
2628 case IB_QPT_XRC_INI: 2638 case IB_QPT_XRC_INI:
2629 xrc = seg; 2639 xrc = seg;
2630 xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num);
2631 seg += sizeof(*xrc); 2640 seg += sizeof(*xrc);
2632 size += sizeof(*xrc) / 16; 2641 size += sizeof(*xrc) / 16;
2633 /* fall through */ 2642 /* fall through */
@@ -2636,8 +2645,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2636 case IB_WR_RDMA_READ: 2645 case IB_WR_RDMA_READ:
2637 case IB_WR_RDMA_WRITE: 2646 case IB_WR_RDMA_WRITE:
2638 case IB_WR_RDMA_WRITE_WITH_IMM: 2647 case IB_WR_RDMA_WRITE_WITH_IMM:
2639 set_raddr_seg(seg, wr->wr.rdma.remote_addr, 2648 set_raddr_seg(seg, rdma_wr(wr)->remote_addr,
2640 wr->wr.rdma.rkey); 2649 rdma_wr(wr)->rkey);
2641 seg += sizeof(struct mlx5_wqe_raddr_seg); 2650 seg += sizeof(struct mlx5_wqe_raddr_seg);
2642 size += sizeof(struct mlx5_wqe_raddr_seg) / 16; 2651 size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
2643 break; 2652 break;
@@ -2654,22 +2663,16 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2654 next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; 2663 next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
2655 qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; 2664 qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
2656 ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); 2665 ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
2657 err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp); 2666 set_linv_wr(qp, &seg, &size);
2658 if (err) {
2659 mlx5_ib_warn(dev, "\n");
2660 *bad_wr = wr;
2661 goto out;
2662 }
2663 num_sge = 0; 2667 num_sge = 0;
2664 break; 2668 break;
2665 2669
2666 case IB_WR_FAST_REG_MR: 2670 case IB_WR_REG_MR:
2667 next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; 2671 next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
2668 qp->sq.wr_data[idx] = IB_WR_FAST_REG_MR; 2672 qp->sq.wr_data[idx] = IB_WR_REG_MR;
2669 ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey); 2673 ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
2670 err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp); 2674 err = set_reg_wr(qp, reg_wr(wr), &seg, &size);
2671 if (err) { 2675 if (err) {
2672 mlx5_ib_warn(dev, "\n");
2673 *bad_wr = wr; 2676 *bad_wr = wr;
2674 goto out; 2677 goto out;
2675 } 2678 }
@@ -2678,7 +2681,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2678 2681
2679 case IB_WR_REG_SIG_MR: 2682 case IB_WR_REG_SIG_MR:
2680 qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; 2683 qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR;
2681 mr = to_mmr(wr->wr.sig_handover.sig_mr); 2684 mr = to_mmr(sig_handover_wr(wr)->sig_mr);
2682 2685
2683 ctrl->imm = cpu_to_be32(mr->ibmr.rkey); 2686 ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
2684 err = set_sig_umr_wr(wr, qp, &seg, &size); 2687 err = set_sig_umr_wr(wr, qp, &seg, &size);
@@ -2706,7 +2709,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2706 goto out; 2709 goto out;
2707 } 2710 }
2708 2711
2709 err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem, 2712 err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->mem,
2710 mr->sig->psv_memory.psv_idx, &seg, 2713 mr->sig->psv_memory.psv_idx, &seg,
2711 &size); 2714 &size);
2712 if (err) { 2715 if (err) {
@@ -2728,7 +2731,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2728 } 2731 }
2729 2732
2730 next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; 2733 next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
2731 err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire, 2734 err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire,
2732 mr->sig->psv_wire.psv_idx, &seg, 2735 mr->sig->psv_wire.psv_idx, &seg,
2733 &size); 2736 &size);
2734 if (err) { 2737 if (err) {
@@ -2752,8 +2755,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2752 switch (wr->opcode) { 2755 switch (wr->opcode) {
2753 case IB_WR_RDMA_WRITE: 2756 case IB_WR_RDMA_WRITE:
2754 case IB_WR_RDMA_WRITE_WITH_IMM: 2757 case IB_WR_RDMA_WRITE_WITH_IMM:
2755 set_raddr_seg(seg, wr->wr.rdma.remote_addr, 2758 set_raddr_seg(seg, rdma_wr(wr)->remote_addr,
2756 wr->wr.rdma.rkey); 2759 rdma_wr(wr)->rkey);
2757 seg += sizeof(struct mlx5_wqe_raddr_seg); 2760 seg += sizeof(struct mlx5_wqe_raddr_seg);
2758 size += sizeof(struct mlx5_wqe_raddr_seg) / 16; 2761 size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
2759 break; 2762 break;
@@ -2780,7 +2783,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2780 goto out; 2783 goto out;
2781 } 2784 }
2782 qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; 2785 qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
2783 ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey); 2786 ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey);
2784 set_reg_umr_segment(seg, wr); 2787 set_reg_umr_segment(seg, wr);
2785 seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); 2788 seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
2786 size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; 2789 size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index 32f6c6315454..bcac294042f5 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -281,7 +281,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
281 ib_get_cached_gid(&dev->ib_dev, 281 ib_get_cached_gid(&dev->ib_dev,
282 be32_to_cpu(ah->av->port_pd) >> 24, 282 be32_to_cpu(ah->av->port_pd) >> 24,
283 ah->av->gid_index % dev->limits.gid_table_len, 283 ah->av->gid_index % dev->limits.gid_table_len,
284 &header->grh.source_gid); 284 &header->grh.source_gid, NULL);
285 memcpy(header->grh.destination_gid.raw, 285 memcpy(header->grh.destination_gid.raw,
286 ah->av->dgid, 16); 286 ah->av->dgid, 16);
287 } 287 }
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index e354b2f04ad9..35fe506e2cfa 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1476,7 +1476,7 @@ void mthca_free_qp(struct mthca_dev *dev,
1476 1476
1477/* Create UD header for an MLX send and build a data segment for it */ 1477/* Create UD header for an MLX send and build a data segment for it */
1478static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, 1478static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
1479 int ind, struct ib_send_wr *wr, 1479 int ind, struct ib_ud_wr *wr,
1480 struct mthca_mlx_seg *mlx, 1480 struct mthca_mlx_seg *mlx,
1481 struct mthca_data_seg *data) 1481 struct mthca_data_seg *data)
1482{ 1482{
@@ -1485,10 +1485,10 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
1485 u16 pkey; 1485 u16 pkey;
1486 1486
1487 ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0, 1487 ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0,
1488 mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0, 1488 mthca_ah_grh_present(to_mah(wr->ah)), 0,
1489 &sqp->ud_header); 1489 &sqp->ud_header);
1490 1490
1491 err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header); 1491 err = mthca_read_ah(dev, to_mah(wr->ah), &sqp->ud_header);
1492 if (err) 1492 if (err)
1493 return err; 1493 return err;
1494 mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1); 1494 mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1);
@@ -1499,7 +1499,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
1499 mlx->rlid = sqp->ud_header.lrh.destination_lid; 1499 mlx->rlid = sqp->ud_header.lrh.destination_lid;
1500 mlx->vcrc = 0; 1500 mlx->vcrc = 0;
1501 1501
1502 switch (wr->opcode) { 1502 switch (wr->wr.opcode) {
1503 case IB_WR_SEND: 1503 case IB_WR_SEND:
1504 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; 1504 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
1505 sqp->ud_header.immediate_present = 0; 1505 sqp->ud_header.immediate_present = 0;
@@ -1507,7 +1507,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
1507 case IB_WR_SEND_WITH_IMM: 1507 case IB_WR_SEND_WITH_IMM:
1508 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; 1508 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
1509 sqp->ud_header.immediate_present = 1; 1509 sqp->ud_header.immediate_present = 1;
1510 sqp->ud_header.immediate_data = wr->ex.imm_data; 1510 sqp->ud_header.immediate_data = wr->wr.ex.imm_data;
1511 break; 1511 break;
1512 default: 1512 default:
1513 return -EINVAL; 1513 return -EINVAL;
@@ -1516,18 +1516,18 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
1516 sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; 1516 sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
1517 if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) 1517 if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
1518 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; 1518 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
1519 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); 1519 sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
1520 if (!sqp->qp.ibqp.qp_num) 1520 if (!sqp->qp.ibqp.qp_num)
1521 ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port, 1521 ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
1522 sqp->pkey_index, &pkey); 1522 sqp->pkey_index, &pkey);
1523 else 1523 else
1524 ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port, 1524 ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
1525 wr->wr.ud.pkey_index, &pkey); 1525 wr->pkey_index, &pkey);
1526 sqp->ud_header.bth.pkey = cpu_to_be16(pkey); 1526 sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
1527 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); 1527 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
1528 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); 1528 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
1529 sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? 1529 sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
1530 sqp->qkey : wr->wr.ud.remote_qkey); 1530 sqp->qkey : wr->remote_qkey);
1531 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); 1531 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
1532 1532
1533 header_size = ib_ud_header_pack(&sqp->ud_header, 1533 header_size = ib_ud_header_pack(&sqp->ud_header,
@@ -1569,34 +1569,34 @@ static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg,
1569} 1569}
1570 1570
1571static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg, 1571static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
1572 struct ib_send_wr *wr) 1572 struct ib_atomic_wr *wr)
1573{ 1573{
1574 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 1574 if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
1575 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); 1575 aseg->swap_add = cpu_to_be64(wr->swap);
1576 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); 1576 aseg->compare = cpu_to_be64(wr->compare_add);
1577 } else { 1577 } else {
1578 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); 1578 aseg->swap_add = cpu_to_be64(wr->compare_add);
1579 aseg->compare = 0; 1579 aseg->compare = 0;
1580 } 1580 }
1581 1581
1582} 1582}
1583 1583
1584static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg, 1584static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
1585 struct ib_send_wr *wr) 1585 struct ib_ud_wr *wr)
1586{ 1586{
1587 useg->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key); 1587 useg->lkey = cpu_to_be32(to_mah(wr->ah)->key);
1588 useg->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma); 1588 useg->av_addr = cpu_to_be64(to_mah(wr->ah)->avdma);
1589 useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); 1589 useg->dqpn = cpu_to_be32(wr->remote_qpn);
1590 useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); 1590 useg->qkey = cpu_to_be32(wr->remote_qkey);
1591 1591
1592} 1592}
1593 1593
1594static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg, 1594static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg,
1595 struct ib_send_wr *wr) 1595 struct ib_ud_wr *wr)
1596{ 1596{
1597 memcpy(useg->av, to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE); 1597 memcpy(useg->av, to_mah(wr->ah)->av, MTHCA_AV_SIZE);
1598 useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); 1598 useg->dqpn = cpu_to_be32(wr->remote_qpn);
1599 useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); 1599 useg->qkey = cpu_to_be32(wr->remote_qkey);
1600} 1600}
1601 1601
1602int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 1602int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
@@ -1664,11 +1664,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1664 switch (wr->opcode) { 1664 switch (wr->opcode) {
1665 case IB_WR_ATOMIC_CMP_AND_SWP: 1665 case IB_WR_ATOMIC_CMP_AND_SWP:
1666 case IB_WR_ATOMIC_FETCH_AND_ADD: 1666 case IB_WR_ATOMIC_FETCH_AND_ADD:
1667 set_raddr_seg(wqe, wr->wr.atomic.remote_addr, 1667 set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
1668 wr->wr.atomic.rkey); 1668 atomic_wr(wr)->rkey);
1669 wqe += sizeof (struct mthca_raddr_seg); 1669 wqe += sizeof (struct mthca_raddr_seg);
1670 1670
1671 set_atomic_seg(wqe, wr); 1671 set_atomic_seg(wqe, atomic_wr(wr));
1672 wqe += sizeof (struct mthca_atomic_seg); 1672 wqe += sizeof (struct mthca_atomic_seg);
1673 size += (sizeof (struct mthca_raddr_seg) + 1673 size += (sizeof (struct mthca_raddr_seg) +
1674 sizeof (struct mthca_atomic_seg)) / 16; 1674 sizeof (struct mthca_atomic_seg)) / 16;
@@ -1677,8 +1677,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1677 case IB_WR_RDMA_WRITE: 1677 case IB_WR_RDMA_WRITE:
1678 case IB_WR_RDMA_WRITE_WITH_IMM: 1678 case IB_WR_RDMA_WRITE_WITH_IMM:
1679 case IB_WR_RDMA_READ: 1679 case IB_WR_RDMA_READ:
1680 set_raddr_seg(wqe, wr->wr.rdma.remote_addr, 1680 set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
1681 wr->wr.rdma.rkey); 1681 rdma_wr(wr)->rkey);
1682 wqe += sizeof (struct mthca_raddr_seg); 1682 wqe += sizeof (struct mthca_raddr_seg);
1683 size += sizeof (struct mthca_raddr_seg) / 16; 1683 size += sizeof (struct mthca_raddr_seg) / 16;
1684 break; 1684 break;
@@ -1694,8 +1694,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1694 switch (wr->opcode) { 1694 switch (wr->opcode) {
1695 case IB_WR_RDMA_WRITE: 1695 case IB_WR_RDMA_WRITE:
1696 case IB_WR_RDMA_WRITE_WITH_IMM: 1696 case IB_WR_RDMA_WRITE_WITH_IMM:
1697 set_raddr_seg(wqe, wr->wr.rdma.remote_addr, 1697 set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
1698 wr->wr.rdma.rkey); 1698 rdma_wr(wr)->rkey);
1699 wqe += sizeof (struct mthca_raddr_seg); 1699 wqe += sizeof (struct mthca_raddr_seg);
1700 size += sizeof (struct mthca_raddr_seg) / 16; 1700 size += sizeof (struct mthca_raddr_seg) / 16;
1701 break; 1701 break;
@@ -1708,13 +1708,13 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1708 break; 1708 break;
1709 1709
1710 case UD: 1710 case UD:
1711 set_tavor_ud_seg(wqe, wr); 1711 set_tavor_ud_seg(wqe, ud_wr(wr));
1712 wqe += sizeof (struct mthca_tavor_ud_seg); 1712 wqe += sizeof (struct mthca_tavor_ud_seg);
1713 size += sizeof (struct mthca_tavor_ud_seg) / 16; 1713 size += sizeof (struct mthca_tavor_ud_seg) / 16;
1714 break; 1714 break;
1715 1715
1716 case MLX: 1716 case MLX:
1717 err = build_mlx_header(dev, to_msqp(qp), ind, wr, 1717 err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr),
1718 wqe - sizeof (struct mthca_next_seg), 1718 wqe - sizeof (struct mthca_next_seg),
1719 wqe); 1719 wqe);
1720 if (err) { 1720 if (err) {
@@ -2005,11 +2005,11 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2005 switch (wr->opcode) { 2005 switch (wr->opcode) {
2006 case IB_WR_ATOMIC_CMP_AND_SWP: 2006 case IB_WR_ATOMIC_CMP_AND_SWP:
2007 case IB_WR_ATOMIC_FETCH_AND_ADD: 2007 case IB_WR_ATOMIC_FETCH_AND_ADD:
2008 set_raddr_seg(wqe, wr->wr.atomic.remote_addr, 2008 set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
2009 wr->wr.atomic.rkey); 2009 atomic_wr(wr)->rkey);
2010 wqe += sizeof (struct mthca_raddr_seg); 2010 wqe += sizeof (struct mthca_raddr_seg);
2011 2011
2012 set_atomic_seg(wqe, wr); 2012 set_atomic_seg(wqe, atomic_wr(wr));
2013 wqe += sizeof (struct mthca_atomic_seg); 2013 wqe += sizeof (struct mthca_atomic_seg);
2014 size += (sizeof (struct mthca_raddr_seg) + 2014 size += (sizeof (struct mthca_raddr_seg) +
2015 sizeof (struct mthca_atomic_seg)) / 16; 2015 sizeof (struct mthca_atomic_seg)) / 16;
@@ -2018,8 +2018,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2018 case IB_WR_RDMA_READ: 2018 case IB_WR_RDMA_READ:
2019 case IB_WR_RDMA_WRITE: 2019 case IB_WR_RDMA_WRITE:
2020 case IB_WR_RDMA_WRITE_WITH_IMM: 2020 case IB_WR_RDMA_WRITE_WITH_IMM:
2021 set_raddr_seg(wqe, wr->wr.rdma.remote_addr, 2021 set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
2022 wr->wr.rdma.rkey); 2022 rdma_wr(wr)->rkey);
2023 wqe += sizeof (struct mthca_raddr_seg); 2023 wqe += sizeof (struct mthca_raddr_seg);
2024 size += sizeof (struct mthca_raddr_seg) / 16; 2024 size += sizeof (struct mthca_raddr_seg) / 16;
2025 break; 2025 break;
@@ -2035,8 +2035,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2035 switch (wr->opcode) { 2035 switch (wr->opcode) {
2036 case IB_WR_RDMA_WRITE: 2036 case IB_WR_RDMA_WRITE:
2037 case IB_WR_RDMA_WRITE_WITH_IMM: 2037 case IB_WR_RDMA_WRITE_WITH_IMM:
2038 set_raddr_seg(wqe, wr->wr.rdma.remote_addr, 2038 set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
2039 wr->wr.rdma.rkey); 2039 rdma_wr(wr)->rkey);
2040 wqe += sizeof (struct mthca_raddr_seg); 2040 wqe += sizeof (struct mthca_raddr_seg);
2041 size += sizeof (struct mthca_raddr_seg) / 16; 2041 size += sizeof (struct mthca_raddr_seg) / 16;
2042 break; 2042 break;
@@ -2049,13 +2049,13 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2049 break; 2049 break;
2050 2050
2051 case UD: 2051 case UD:
2052 set_arbel_ud_seg(wqe, wr); 2052 set_arbel_ud_seg(wqe, ud_wr(wr));
2053 wqe += sizeof (struct mthca_arbel_ud_seg); 2053 wqe += sizeof (struct mthca_arbel_ud_seg);
2054 size += sizeof (struct mthca_arbel_ud_seg) / 16; 2054 size += sizeof (struct mthca_arbel_ud_seg) / 16;
2055 break; 2055 break;
2056 2056
2057 case MLX: 2057 case MLX:
2058 err = build_mlx_header(dev, to_msqp(qp), ind, wr, 2058 err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr),
2059 wqe - sizeof (struct mthca_next_seg), 2059 wqe - sizeof (struct mthca_next_seg),
2060 wqe); 2060 wqe);
2061 if (err) { 2061 if (err) {
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index d748e4b31b8d..c9080208aad2 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -1200,12 +1200,6 @@ struct nes_fast_mr_wqe_pbl {
1200 dma_addr_t paddr; 1200 dma_addr_t paddr;
1201}; 1201};
1202 1202
1203struct nes_ib_fast_reg_page_list {
1204 struct ib_fast_reg_page_list ibfrpl;
1205 struct nes_fast_mr_wqe_pbl nes_wqe_pbl;
1206 u64 pbl;
1207};
1208
1209struct nes_listener { 1203struct nes_listener {
1210 struct work_struct work; 1204 struct work_struct work;
1211 struct workqueue_struct *wq; 1205 struct workqueue_struct *wq;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 44cb513f9a87..137880a19ebe 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -51,6 +51,7 @@ atomic_t qps_created;
51atomic_t sw_qps_destroyed; 51atomic_t sw_qps_destroyed;
52 52
53static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev); 53static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev);
54static int nes_dereg_mr(struct ib_mr *ib_mr);
54 55
55/** 56/**
56 * nes_alloc_mw 57 * nes_alloc_mw
@@ -443,79 +444,46 @@ static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd,
443 } else { 444 } else {
444 kfree(nesmr); 445 kfree(nesmr);
445 nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); 446 nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
446 ibmr = ERR_PTR(-ENOMEM); 447 return ERR_PTR(-ENOMEM);
447 } 448 }
449
450 nesmr->pages = pci_alloc_consistent(nesdev->pcidev,
451 max_num_sg * sizeof(u64),
452 &nesmr->paddr);
453 if (!nesmr->paddr)
454 goto err;
455
456 nesmr->max_pages = max_num_sg;
457
448 return ibmr; 458 return ibmr;
459
460err:
461 nes_dereg_mr(ibmr);
462
463 return ERR_PTR(-ENOMEM);
449} 464}
450 465
451/* 466static int nes_set_page(struct ib_mr *ibmr, u64 addr)
452 * nes_alloc_fast_reg_page_list
453 */
454static struct ib_fast_reg_page_list *nes_alloc_fast_reg_page_list(
455 struct ib_device *ibdev,
456 int page_list_len)
457{ 467{
458 struct nes_vnic *nesvnic = to_nesvnic(ibdev); 468 struct nes_mr *nesmr = to_nesmr(ibmr);
459 struct nes_device *nesdev = nesvnic->nesdev;
460 struct ib_fast_reg_page_list *pifrpl;
461 struct nes_ib_fast_reg_page_list *pnesfrpl;
462 469
463 if (page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) 470 if (unlikely(nesmr->npages == nesmr->max_pages))
464 return ERR_PTR(-E2BIG); 471 return -ENOMEM;
465 /*
466 * Allocate the ib_fast_reg_page_list structure, the
467 * nes_fast_bpl structure, and the PLB table.
468 */
469 pnesfrpl = kmalloc(sizeof(struct nes_ib_fast_reg_page_list) +
470 page_list_len * sizeof(u64), GFP_KERNEL);
471
472 if (!pnesfrpl)
473 return ERR_PTR(-ENOMEM);
474 472
475 pifrpl = &pnesfrpl->ibfrpl; 473 nesmr->pages[nesmr->npages++] = cpu_to_le64(addr);
476 pifrpl->page_list = &pnesfrpl->pbl;
477 pifrpl->max_page_list_len = page_list_len;
478 /*
479 * Allocate the WQE PBL
480 */
481 pnesfrpl->nes_wqe_pbl.kva = pci_alloc_consistent(nesdev->pcidev,
482 page_list_len * sizeof(u64),
483 &pnesfrpl->nes_wqe_pbl.paddr);
484 474
485 if (!pnesfrpl->nes_wqe_pbl.kva) { 475 return 0;
486 kfree(pnesfrpl);
487 return ERR_PTR(-ENOMEM);
488 }
489 nes_debug(NES_DBG_MR, "nes_alloc_fast_reg_pbl: nes_frpl = %p, "
490 "ibfrpl = %p, ibfrpl.page_list = %p, pbl.kva = %p, "
491 "pbl.paddr = %llx\n", pnesfrpl, &pnesfrpl->ibfrpl,
492 pnesfrpl->ibfrpl.page_list, pnesfrpl->nes_wqe_pbl.kva,
493 (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr);
494
495 return pifrpl;
496} 476}
497 477
498/* 478static int nes_map_mr_sg(struct ib_mr *ibmr,
499 * nes_free_fast_reg_page_list 479 struct scatterlist *sg,
500 */ 480 int sg_nents)
501static void nes_free_fast_reg_page_list(struct ib_fast_reg_page_list *pifrpl)
502{ 481{
503 struct nes_vnic *nesvnic = to_nesvnic(pifrpl->device); 482 struct nes_mr *nesmr = to_nesmr(ibmr);
504 struct nes_device *nesdev = nesvnic->nesdev;
505 struct nes_ib_fast_reg_page_list *pnesfrpl;
506 483
507 pnesfrpl = container_of(pifrpl, struct nes_ib_fast_reg_page_list, ibfrpl); 484 nesmr->npages = 0;
508 /* 485
509 * Free the WQE PBL. 486 return ib_sg_to_pages(ibmr, sg, sg_nents, nes_set_page);
510 */
511 pci_free_consistent(nesdev->pcidev,
512 pifrpl->max_page_list_len * sizeof(u64),
513 pnesfrpl->nes_wqe_pbl.kva,
514 pnesfrpl->nes_wqe_pbl.paddr);
515 /*
516 * Free the PBL structure
517 */
518 kfree(pnesfrpl);
519} 487}
520 488
521/** 489/**
@@ -2683,6 +2651,13 @@ static int nes_dereg_mr(struct ib_mr *ib_mr)
2683 u16 major_code; 2651 u16 major_code;
2684 u16 minor_code; 2652 u16 minor_code;
2685 2653
2654
2655 if (nesmr->pages)
2656 pci_free_consistent(nesdev->pcidev,
2657 nesmr->max_pages * sizeof(u64),
2658 nesmr->pages,
2659 nesmr->paddr);
2660
2686 if (nesmr->region) { 2661 if (nesmr->region) {
2687 ib_umem_release(nesmr->region); 2662 ib_umem_release(nesmr->region);
2688 } 2663 }
@@ -3372,9 +3347,9 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
3372 wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; 3347 wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
3373 3348
3374 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, 3349 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
3375 ib_wr->wr.rdma.rkey); 3350 rdma_wr(ib_wr)->rkey);
3376 set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, 3351 set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
3377 ib_wr->wr.rdma.remote_addr); 3352 rdma_wr(ib_wr)->remote_addr);
3378 3353
3379 if ((ib_wr->send_flags & IB_SEND_INLINE) && 3354 if ((ib_wr->send_flags & IB_SEND_INLINE) &&
3380 ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) && 3355 ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
@@ -3409,9 +3384,9 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
3409 } 3384 }
3410 3385
3411 set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, 3386 set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
3412 ib_wr->wr.rdma.remote_addr); 3387 rdma_wr(ib_wr)->remote_addr);
3413 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, 3388 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
3414 ib_wr->wr.rdma.rkey); 3389 rdma_wr(ib_wr)->rkey);
3415 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX, 3390 set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX,
3416 ib_wr->sg_list->length); 3391 ib_wr->sg_list->length);
3417 set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX, 3392 set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
@@ -3425,19 +3400,13 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
3425 NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX, 3400 NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX,
3426 ib_wr->ex.invalidate_rkey); 3401 ib_wr->ex.invalidate_rkey);
3427 break; 3402 break;
3428 case IB_WR_FAST_REG_MR: 3403 case IB_WR_REG_MR:
3429 { 3404 {
3430 int i; 3405 struct nes_mr *mr = to_nesmr(reg_wr(ib_wr)->mr);
3431 int flags = ib_wr->wr.fast_reg.access_flags; 3406 int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size);
3432 struct nes_ib_fast_reg_page_list *pnesfrpl = 3407 int flags = reg_wr(ib_wr)->access;
3433 container_of(ib_wr->wr.fast_reg.page_list, 3408
3434 struct nes_ib_fast_reg_page_list, 3409 if (mr->npages > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) {
3435 ibfrpl);
3436 u64 *src_page_list = pnesfrpl->ibfrpl.page_list;
3437 u64 *dst_page_list = pnesfrpl->nes_wqe_pbl.kva;
3438
3439 if (ib_wr->wr.fast_reg.page_list_len >
3440 (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) {
3441 nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n"); 3410 nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n");
3442 err = -EINVAL; 3411 err = -EINVAL;
3443 break; 3412 break;
@@ -3445,19 +3414,19 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
3445 wqe_misc = NES_IWARP_SQ_OP_FAST_REG; 3414 wqe_misc = NES_IWARP_SQ_OP_FAST_REG;
3446 set_wqe_64bit_value(wqe->wqe_words, 3415 set_wqe_64bit_value(wqe->wqe_words,
3447 NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX, 3416 NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX,
3448 ib_wr->wr.fast_reg.iova_start); 3417 mr->ibmr.iova);
3449 set_wqe_32bit_value(wqe->wqe_words, 3418 set_wqe_32bit_value(wqe->wqe_words,
3450 NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX, 3419 NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
3451 ib_wr->wr.fast_reg.length); 3420 mr->ibmr.length);
3452 set_wqe_32bit_value(wqe->wqe_words, 3421 set_wqe_32bit_value(wqe->wqe_words,
3453 NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0); 3422 NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0);
3454 set_wqe_32bit_value(wqe->wqe_words, 3423 set_wqe_32bit_value(wqe->wqe_words,
3455 NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX, 3424 NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX,
3456 ib_wr->wr.fast_reg.rkey); 3425 reg_wr(ib_wr)->key);
3457 /* Set page size: */ 3426
3458 if (ib_wr->wr.fast_reg.page_shift == 12) { 3427 if (page_shift == 12) {
3459 wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K; 3428 wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K;
3460 } else if (ib_wr->wr.fast_reg.page_shift == 21) { 3429 } else if (page_shift == 21) {
3461 wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M; 3430 wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M;
3462 } else { 3431 } else {
3463 nes_debug(NES_DBG_IW_TX, "Invalid page shift," 3432 nes_debug(NES_DBG_IW_TX, "Invalid page shift,"
@@ -3465,6 +3434,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
3465 err = -EINVAL; 3434 err = -EINVAL;
3466 break; 3435 break;
3467 } 3436 }
3437
3468 /* Set access_flags */ 3438 /* Set access_flags */
3469 wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ; 3439 wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ;
3470 if (flags & IB_ACCESS_LOCAL_WRITE) 3440 if (flags & IB_ACCESS_LOCAL_WRITE)
@@ -3480,35 +3450,22 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
3480 wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND; 3450 wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND;
3481 3451
3482 /* Fill in PBL info: */ 3452 /* Fill in PBL info: */
3483 if (ib_wr->wr.fast_reg.page_list_len >
3484 pnesfrpl->ibfrpl.max_page_list_len) {
3485 nes_debug(NES_DBG_IW_TX, "Invalid page list length,"
3486 " ib_wr=%p, value=%u, max=%u\n",
3487 ib_wr, ib_wr->wr.fast_reg.page_list_len,
3488 pnesfrpl->ibfrpl.max_page_list_len);
3489 err = -EINVAL;
3490 break;
3491 }
3492
3493 set_wqe_64bit_value(wqe->wqe_words, 3453 set_wqe_64bit_value(wqe->wqe_words,
3494 NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX, 3454 NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX,
3495 pnesfrpl->nes_wqe_pbl.paddr); 3455 mr->paddr);
3496 3456
3497 set_wqe_32bit_value(wqe->wqe_words, 3457 set_wqe_32bit_value(wqe->wqe_words,
3498 NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX, 3458 NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX,
3499 ib_wr->wr.fast_reg.page_list_len * 8); 3459 mr->npages * 8);
3500
3501 for (i = 0; i < ib_wr->wr.fast_reg.page_list_len; i++)
3502 dst_page_list[i] = cpu_to_le64(src_page_list[i]);
3503 3460
3504 nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %llx, " 3461 nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, "
3505 "length: %d, rkey: %0x, pgl_paddr: %llx, " 3462 "length: %d, rkey: %0x, pgl_paddr: %llx, "
3506 "page_list_len: %u, wqe_misc: %x\n", 3463 "page_list_len: %u, wqe_misc: %x\n",
3507 (unsigned long long) ib_wr->wr.fast_reg.iova_start, 3464 (unsigned long long) mr->ibmr.iova,
3508 ib_wr->wr.fast_reg.length, 3465 mr->ibmr.length,
3509 ib_wr->wr.fast_reg.rkey, 3466 reg_wr(ib_wr)->key,
3510 (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr, 3467 (unsigned long long) mr->paddr,
3511 ib_wr->wr.fast_reg.page_list_len, 3468 mr->npages,
3512 wqe_misc); 3469 wqe_misc);
3513 break; 3470 break;
3514 } 3471 }
@@ -3751,7 +3708,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
3751 entry->opcode = IB_WC_LOCAL_INV; 3708 entry->opcode = IB_WC_LOCAL_INV;
3752 break; 3709 break;
3753 case NES_IWARP_SQ_OP_FAST_REG: 3710 case NES_IWARP_SQ_OP_FAST_REG:
3754 entry->opcode = IB_WC_FAST_REG_MR; 3711 entry->opcode = IB_WC_REG_MR;
3755 break; 3712 break;
3756 } 3713 }
3757 3714
@@ -3939,8 +3896,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
3939 nesibdev->ibdev.bind_mw = nes_bind_mw; 3896 nesibdev->ibdev.bind_mw = nes_bind_mw;
3940 3897
3941 nesibdev->ibdev.alloc_mr = nes_alloc_mr; 3898 nesibdev->ibdev.alloc_mr = nes_alloc_mr;
3942 nesibdev->ibdev.alloc_fast_reg_page_list = nes_alloc_fast_reg_page_list; 3899 nesibdev->ibdev.map_mr_sg = nes_map_mr_sg;
3943 nesibdev->ibdev.free_fast_reg_page_list = nes_free_fast_reg_page_list;
3944 3900
3945 nesibdev->ibdev.attach_mcast = nes_multicast_attach; 3901 nesibdev->ibdev.attach_mcast = nes_multicast_attach;
3946 nesibdev->ibdev.detach_mcast = nes_multicast_detach; 3902 nesibdev->ibdev.detach_mcast = nes_multicast_detach;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index 309b31c31ae1..a204b677af22 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -79,6 +79,10 @@ struct nes_mr {
79 u16 pbls_used; 79 u16 pbls_used;
80 u8 mode; 80 u8 mode;
81 u8 pbl_4k; 81 u8 pbl_4k;
82 __le64 *pages;
83 dma_addr_t paddr;
84 u32 max_pages;
85 u32 npages;
82}; 86};
83 87
84struct nes_hw_pb { 88struct nes_hw_pb {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index b4091ab48db0..ae80590aabdf 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -55,7 +55,7 @@
55#include <be_roce.h> 55#include <be_roce.h>
56#include "ocrdma_sli.h" 56#include "ocrdma_sli.h"
57 57
58#define OCRDMA_ROCE_DRV_VERSION "10.6.0.0" 58#define OCRDMA_ROCE_DRV_VERSION "11.0.0.0"
59 59
60#define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver" 60#define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver"
61#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" 61#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
@@ -193,6 +193,8 @@ struct ocrdma_mr {
193 struct ib_mr ibmr; 193 struct ib_mr ibmr;
194 struct ib_umem *umem; 194 struct ib_umem *umem;
195 struct ocrdma_hw_mr hwmr; 195 struct ocrdma_hw_mr hwmr;
196 u64 *pages;
197 u32 npages;
196}; 198};
197 199
198struct ocrdma_stats { 200struct ocrdma_stats {
@@ -278,7 +280,6 @@ struct ocrdma_dev {
278 u32 hba_port_num; 280 u32 hba_port_num;
279 281
280 struct list_head entry; 282 struct list_head entry;
281 struct rcu_head rcu;
282 int id; 283 int id;
283 u64 *stag_arr; 284 u64 *stag_arr;
284 u8 sl; /* service level */ 285 u8 sl; /* service level */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 44766fee1f4e..9820074be59d 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -45,6 +45,7 @@
45 45
46#include <rdma/ib_addr.h> 46#include <rdma/ib_addr.h>
47#include <rdma/ib_mad.h> 47#include <rdma/ib_mad.h>
48#include <rdma/ib_cache.h>
48 49
49#include "ocrdma.h" 50#include "ocrdma.h"
50#include "ocrdma_verbs.h" 51#include "ocrdma_verbs.h"
@@ -56,10 +57,9 @@
56 57
57static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, 58static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
58 struct ib_ah_attr *attr, union ib_gid *sgid, 59 struct ib_ah_attr *attr, union ib_gid *sgid,
59 int pdid, bool *isvlan) 60 int pdid, bool *isvlan, u16 vlan_tag)
60{ 61{
61 int status = 0; 62 int status = 0;
62 u16 vlan_tag;
63 struct ocrdma_eth_vlan eth; 63 struct ocrdma_eth_vlan eth;
64 struct ocrdma_grh grh; 64 struct ocrdma_grh grh;
65 int eth_sz; 65 int eth_sz;
@@ -68,7 +68,6 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
68 memset(&grh, 0, sizeof(grh)); 68 memset(&grh, 0, sizeof(grh));
69 69
70 /* VLAN */ 70 /* VLAN */
71 vlan_tag = attr->vlan_id;
72 if (!vlan_tag || (vlan_tag > 0xFFF)) 71 if (!vlan_tag || (vlan_tag > 0xFFF))
73 vlan_tag = dev->pvid; 72 vlan_tag = dev->pvid;
74 if (vlan_tag || dev->pfc_state) { 73 if (vlan_tag || dev->pfc_state) {
@@ -115,9 +114,11 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
115struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) 114struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
116{ 115{
117 u32 *ahid_addr; 116 u32 *ahid_addr;
118 bool isvlan = false;
119 int status; 117 int status;
120 struct ocrdma_ah *ah; 118 struct ocrdma_ah *ah;
119 bool isvlan = false;
120 u16 vlan_tag = 0xffff;
121 struct ib_gid_attr sgid_attr;
121 struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); 122 struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
122 struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); 123 struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
123 union ib_gid sgid; 124 union ib_gid sgid;
@@ -135,18 +136,25 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
135 if (status) 136 if (status)
136 goto av_err; 137 goto av_err;
137 138
138 status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid); 139 status = ib_get_cached_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid,
140 &sgid_attr);
139 if (status) { 141 if (status) {
140 pr_err("%s(): Failed to query sgid, status = %d\n", 142 pr_err("%s(): Failed to query sgid, status = %d\n",
141 __func__, status); 143 __func__, status);
142 goto av_conf_err; 144 goto av_conf_err;
143 } 145 }
146 if (sgid_attr.ndev) {
147 if (is_vlan_dev(sgid_attr.ndev))
148 vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev);
149 dev_put(sgid_attr.ndev);
150 }
144 151
145 if ((pd->uctx) && 152 if ((pd->uctx) &&
146 (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) && 153 (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
147 (!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) { 154 (!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) {
148 status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid, 155 status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid,
149 attr->dmac, &attr->vlan_id); 156 attr->dmac, &vlan_tag,
157 sgid_attr.ndev->ifindex);
150 if (status) { 158 if (status) {
151 pr_err("%s(): Failed to resolve dmac from gid." 159 pr_err("%s(): Failed to resolve dmac from gid."
152 "status = %d\n", __func__, status); 160 "status = %d\n", __func__, status);
@@ -154,7 +162,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
154 } 162 }
155 } 163 }
156 164
157 status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan); 165 status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag);
158 if (status) 166 if (status)
159 goto av_conf_err; 167 goto av_conf_err;
160 168
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index aab391a15db4..30f67bebffa3 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -47,6 +47,7 @@
47 47
48#include <rdma/ib_verbs.h> 48#include <rdma/ib_verbs.h>
49#include <rdma/ib_user_verbs.h> 49#include <rdma/ib_user_verbs.h>
50#include <rdma/ib_cache.h>
50 51
51#include "ocrdma.h" 52#include "ocrdma.h"
52#include "ocrdma_hw.h" 53#include "ocrdma_hw.h"
@@ -678,11 +679,33 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
678 int dev_event = 0; 679 int dev_event = 0;
679 int type = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_TYPE_MASK) >> 680 int type = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_TYPE_MASK) >>
680 OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT; 681 OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT;
682 u16 qpid = cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPID_MASK;
683 u16 cqid = cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK;
681 684
682 if (cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPVALID) 685 /*
683 qp = dev->qp_tbl[cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPID_MASK]; 686 * Some FW version returns wrong qp or cq ids in CQEs.
684 if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID) 687 * Checking whether the IDs are valid
685 cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK]; 688 */
689
690 if (cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPVALID) {
691 if (qpid < dev->attr.max_qp)
692 qp = dev->qp_tbl[qpid];
693 if (qp == NULL) {
694 pr_err("ocrdma%d:Async event - qpid %u is not valid\n",
695 dev->id, qpid);
696 return;
697 }
698 }
699
700 if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID) {
701 if (cqid < dev->attr.max_cq)
702 cq = dev->cq_tbl[cqid];
703 if (cq == NULL) {
704 pr_err("ocrdma%d:Async event - cqid %u is not valid\n",
705 dev->id, cqid);
706 return;
707 }
708 }
686 709
687 memset(&ib_evt, 0, sizeof(ib_evt)); 710 memset(&ib_evt, 0, sizeof(ib_evt));
688 711
@@ -2448,6 +2471,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
2448 int status; 2471 int status;
2449 struct ib_ah_attr *ah_attr = &attrs->ah_attr; 2472 struct ib_ah_attr *ah_attr = &attrs->ah_attr;
2450 union ib_gid sgid, zgid; 2473 union ib_gid sgid, zgid;
2474 struct ib_gid_attr sgid_attr;
2451 u32 vlan_id = 0xFFFF; 2475 u32 vlan_id = 0xFFFF;
2452 u8 mac_addr[6]; 2476 u8 mac_addr[6];
2453 struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device); 2477 struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
@@ -2466,10 +2490,14 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
2466 cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID; 2490 cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID;
2467 memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0], 2491 memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
2468 sizeof(cmd->params.dgid)); 2492 sizeof(cmd->params.dgid));
2469 status = ocrdma_query_gid(&dev->ibdev, 1, 2493
2470 ah_attr->grh.sgid_index, &sgid); 2494 status = ib_get_cached_gid(&dev->ibdev, 1, ah_attr->grh.sgid_index,
2471 if (status) 2495 &sgid, &sgid_attr);
2472 return status; 2496 if (!status && sgid_attr.ndev) {
2497 vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
2498 memcpy(mac_addr, sgid_attr.ndev->dev_addr, ETH_ALEN);
2499 dev_put(sgid_attr.ndev);
2500 }
2473 2501
2474 memset(&zgid, 0, sizeof(zgid)); 2502 memset(&zgid, 0, sizeof(zgid));
2475 if (!memcmp(&sgid, &zgid, sizeof(zgid))) 2503 if (!memcmp(&sgid, &zgid, sizeof(zgid)))
@@ -2486,17 +2514,15 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
2486 ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid)); 2514 ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
2487 ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid)); 2515 ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
2488 cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8); 2516 cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8);
2489 if (attr_mask & IB_QP_VID) {
2490 vlan_id = attrs->vlan_id;
2491 } else if (dev->pfc_state) {
2492 vlan_id = 0;
2493 pr_err("ocrdma%d:Using VLAN with PFC is recommended\n",
2494 dev->id);
2495 pr_err("ocrdma%d:Using VLAN 0 for this connection\n",
2496 dev->id);
2497 }
2498 2517
2499 if (vlan_id < 0x1000) { 2518 if (vlan_id < 0x1000) {
2519 if (dev->pfc_state) {
2520 vlan_id = 0;
2521 pr_err("ocrdma%d:Using VLAN with PFC is recommended\n",
2522 dev->id);
2523 pr_err("ocrdma%d:Using VLAN 0 for this connection\n",
2524 dev->id);
2525 }
2500 cmd->params.vlan_dmac_b4_to_b5 |= 2526 cmd->params.vlan_dmac_b4_to_b5 |=
2501 vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT; 2527 vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
2502 cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID; 2528 cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 87aa55df7c82..62b7009daa6c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -63,8 +63,6 @@ MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION);
63MODULE_AUTHOR("Emulex Corporation"); 63MODULE_AUTHOR("Emulex Corporation");
64MODULE_LICENSE("Dual BSD/GPL"); 64MODULE_LICENSE("Dual BSD/GPL");
65 65
66static LIST_HEAD(ocrdma_dev_list);
67static DEFINE_SPINLOCK(ocrdma_devlist_lock);
68static DEFINE_IDR(ocrdma_dev_id); 66static DEFINE_IDR(ocrdma_dev_id);
69 67
70void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid) 68void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid)
@@ -182,8 +180,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
182 dev->ibdev.reg_user_mr = ocrdma_reg_user_mr; 180 dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
183 181
184 dev->ibdev.alloc_mr = ocrdma_alloc_mr; 182 dev->ibdev.alloc_mr = ocrdma_alloc_mr;
185 dev->ibdev.alloc_fast_reg_page_list = ocrdma_alloc_frmr_page_list; 183 dev->ibdev.map_mr_sg = ocrdma_map_mr_sg;
186 dev->ibdev.free_fast_reg_page_list = ocrdma_free_frmr_page_list;
187 184
188 /* mandatory to support user space verbs consumer. */ 185 /* mandatory to support user space verbs consumer. */
189 dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext; 186 dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
@@ -325,9 +322,6 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
325 for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++) 322 for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
326 if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i])) 323 if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i]))
327 goto sysfs_err; 324 goto sysfs_err;
328 spin_lock(&ocrdma_devlist_lock);
329 list_add_tail_rcu(&dev->entry, &ocrdma_dev_list);
330 spin_unlock(&ocrdma_devlist_lock);
331 /* Init stats */ 325 /* Init stats */
332 ocrdma_add_port_stats(dev); 326 ocrdma_add_port_stats(dev);
333 /* Interrupt Moderation */ 327 /* Interrupt Moderation */
@@ -356,9 +350,8 @@ idr_err:
356 return NULL; 350 return NULL;
357} 351}
358 352
359static void ocrdma_remove_free(struct rcu_head *rcu) 353static void ocrdma_remove_free(struct ocrdma_dev *dev)
360{ 354{
361 struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu);
362 355
363 idr_remove(&ocrdma_dev_id, dev->id); 356 idr_remove(&ocrdma_dev_id, dev->id);
364 kfree(dev->mbx_cmd); 357 kfree(dev->mbx_cmd);
@@ -375,15 +368,9 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
375 ib_unregister_device(&dev->ibdev); 368 ib_unregister_device(&dev->ibdev);
376 369
377 ocrdma_rem_port_stats(dev); 370 ocrdma_rem_port_stats(dev);
378
379 spin_lock(&ocrdma_devlist_lock);
380 list_del_rcu(&dev->entry);
381 spin_unlock(&ocrdma_devlist_lock);
382
383 ocrdma_free_resources(dev); 371 ocrdma_free_resources(dev);
384 ocrdma_cleanup_hw(dev); 372 ocrdma_cleanup_hw(dev);
385 373 ocrdma_remove_free(dev);
386 call_rcu(&dev->rcu, ocrdma_remove_free);
387} 374}
388 375
389static int ocrdma_open(struct ocrdma_dev *dev) 376static int ocrdma_open(struct ocrdma_dev *dev)
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 69334e214571..86c303a620c1 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -855,9 +855,9 @@ void ocrdma_rem_port_stats(struct ocrdma_dev *dev)
855{ 855{
856 if (!dev->dir) 856 if (!dev->dir)
857 return; 857 return;
858 debugfs_remove(dev->dir);
858 mutex_destroy(&dev->stats_lock); 859 mutex_destroy(&dev->stats_lock);
859 ocrdma_release_stats_mem(dev); 860 ocrdma_release_stats_mem(dev);
860 debugfs_remove(dev->dir);
861} 861}
862 862
863void ocrdma_init_debugfs(void) 863void ocrdma_init_debugfs(void)
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 1f3affb6a477..583001bcfb8f 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -73,7 +73,7 @@ int ocrdma_query_gid(struct ib_device *ibdev, u8 port,
73 if (index >= OCRDMA_MAX_SGID) 73 if (index >= OCRDMA_MAX_SGID)
74 return -EINVAL; 74 return -EINVAL;
75 75
76 ret = ib_get_cached_gid(ibdev, port, index, sgid); 76 ret = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
77 if (ret == -EAGAIN) { 77 if (ret == -EAGAIN) {
78 memcpy(sgid, &zgid, sizeof(*sgid)); 78 memcpy(sgid, &zgid, sizeof(*sgid));
79 return 0; 79 return 0;
@@ -1013,6 +1013,7 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
1013 1013
1014 (void) ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey); 1014 (void) ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
1015 1015
1016 kfree(mr->pages);
1016 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); 1017 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
1017 1018
1018 /* it could be user registered memory. */ 1019 /* it could be user registered memory. */
@@ -1997,13 +1998,13 @@ static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
1997{ 1998{
1998 struct ocrdma_ewqe_ud_hdr *ud_hdr = 1999 struct ocrdma_ewqe_ud_hdr *ud_hdr =
1999 (struct ocrdma_ewqe_ud_hdr *)(hdr + 1); 2000 (struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
2000 struct ocrdma_ah *ah = get_ocrdma_ah(wr->wr.ud.ah); 2001 struct ocrdma_ah *ah = get_ocrdma_ah(ud_wr(wr)->ah);
2001 2002
2002 ud_hdr->rsvd_dest_qpn = wr->wr.ud.remote_qpn; 2003 ud_hdr->rsvd_dest_qpn = ud_wr(wr)->remote_qpn;
2003 if (qp->qp_type == IB_QPT_GSI) 2004 if (qp->qp_type == IB_QPT_GSI)
2004 ud_hdr->qkey = qp->qkey; 2005 ud_hdr->qkey = qp->qkey;
2005 else 2006 else
2006 ud_hdr->qkey = wr->wr.ud.remote_qkey; 2007 ud_hdr->qkey = ud_wr(wr)->remote_qkey;
2007 ud_hdr->rsvd_ahid = ah->id; 2008 ud_hdr->rsvd_ahid = ah->id;
2008 if (ah->av->valid & OCRDMA_AV_VLAN_VALID) 2009 if (ah->av->valid & OCRDMA_AV_VLAN_VALID)
2009 hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT); 2010 hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT);
@@ -2106,9 +2107,9 @@ static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
2106 status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size); 2107 status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
2107 if (status) 2108 if (status)
2108 return status; 2109 return status;
2109 ext_rw->addr_lo = wr->wr.rdma.remote_addr; 2110 ext_rw->addr_lo = rdma_wr(wr)->remote_addr;
2110 ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr); 2111 ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr);
2111 ext_rw->lrkey = wr->wr.rdma.rkey; 2112 ext_rw->lrkey = rdma_wr(wr)->rkey;
2112 ext_rw->len = hdr->total_len; 2113 ext_rw->len = hdr->total_len;
2113 return 0; 2114 return 0;
2114} 2115}
@@ -2126,46 +2127,12 @@ static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
2126 hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT); 2127 hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT);
2127 hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT); 2128 hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
2128 2129
2129 ext_rw->addr_lo = wr->wr.rdma.remote_addr; 2130 ext_rw->addr_lo = rdma_wr(wr)->remote_addr;
2130 ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr); 2131 ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr);
2131 ext_rw->lrkey = wr->wr.rdma.rkey; 2132 ext_rw->lrkey = rdma_wr(wr)->rkey;
2132 ext_rw->len = hdr->total_len; 2133 ext_rw->len = hdr->total_len;
2133} 2134}
2134 2135
2135static void build_frmr_pbes(struct ib_send_wr *wr, struct ocrdma_pbl *pbl_tbl,
2136 struct ocrdma_hw_mr *hwmr)
2137{
2138 int i;
2139 u64 buf_addr = 0;
2140 int num_pbes;
2141 struct ocrdma_pbe *pbe;
2142
2143 pbe = (struct ocrdma_pbe *)pbl_tbl->va;
2144 num_pbes = 0;
2145
2146 /* go through the OS phy regions & fill hw pbe entries into pbls. */
2147 for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
2148 /* number of pbes can be more for one OS buf, when
2149 * buffers are of different sizes.
2150 * split the ib_buf to one or more pbes.
2151 */
2152 buf_addr = wr->wr.fast_reg.page_list->page_list[i];
2153 pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK));
2154 pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr));
2155 num_pbes += 1;
2156 pbe++;
2157
2158 /* if the pbl is full storing the pbes,
2159 * move to next pbl.
2160 */
2161 if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
2162 pbl_tbl++;
2163 pbe = (struct ocrdma_pbe *)pbl_tbl->va;
2164 }
2165 }
2166 return;
2167}
2168
2169static int get_encoded_page_size(int pg_sz) 2136static int get_encoded_page_size(int pg_sz)
2170{ 2137{
2171 /* Max size is 256M 4096 << 16 */ 2138 /* Max size is 256M 4096 << 16 */
@@ -2176,48 +2143,59 @@ static int get_encoded_page_size(int pg_sz)
2176 return i; 2143 return i;
2177} 2144}
2178 2145
2179 2146static int ocrdma_build_reg(struct ocrdma_qp *qp,
2180static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, 2147 struct ocrdma_hdr_wqe *hdr,
2181 struct ib_send_wr *wr) 2148 struct ib_reg_wr *wr)
2182{ 2149{
2183 u64 fbo; 2150 u64 fbo;
2184 struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1); 2151 struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1);
2185 struct ocrdma_mr *mr; 2152 struct ocrdma_mr *mr = get_ocrdma_mr(wr->mr);
2186 struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device); 2153 struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
2154 struct ocrdma_pbe *pbe;
2187 u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr); 2155 u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr);
2156 int num_pbes = 0, i;
2188 2157
2189 wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES); 2158 wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
2190 2159
2191 if (wr->wr.fast_reg.page_list_len > dev->attr.max_pages_per_frmr)
2192 return -EINVAL;
2193
2194 hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT); 2160 hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
2195 hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT); 2161 hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
2196 2162
2197 if (wr->wr.fast_reg.page_list_len == 0) 2163 if (wr->access & IB_ACCESS_LOCAL_WRITE)
2198 BUG();
2199 if (wr->wr.fast_reg.access_flags & IB_ACCESS_LOCAL_WRITE)
2200 hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR; 2164 hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR;
2201 if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_WRITE) 2165 if (wr->access & IB_ACCESS_REMOTE_WRITE)
2202 hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR; 2166 hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR;
2203 if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_READ) 2167 if (wr->access & IB_ACCESS_REMOTE_READ)
2204 hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD; 2168 hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD;
2205 hdr->lkey = wr->wr.fast_reg.rkey; 2169 hdr->lkey = wr->key;
2206 hdr->total_len = wr->wr.fast_reg.length; 2170 hdr->total_len = mr->ibmr.length;
2207 2171
2208 fbo = wr->wr.fast_reg.iova_start - 2172 fbo = mr->ibmr.iova - mr->pages[0];
2209 (wr->wr.fast_reg.page_list->page_list[0] & PAGE_MASK);
2210 2173
2211 fast_reg->va_hi = upper_32_bits(wr->wr.fast_reg.iova_start); 2174 fast_reg->va_hi = upper_32_bits(mr->ibmr.iova);
2212 fast_reg->va_lo = (u32) (wr->wr.fast_reg.iova_start & 0xffffffff); 2175 fast_reg->va_lo = (u32) (mr->ibmr.iova & 0xffffffff);
2213 fast_reg->fbo_hi = upper_32_bits(fbo); 2176 fast_reg->fbo_hi = upper_32_bits(fbo);
2214 fast_reg->fbo_lo = (u32) fbo & 0xffffffff; 2177 fast_reg->fbo_lo = (u32) fbo & 0xffffffff;
2215 fast_reg->num_sges = wr->wr.fast_reg.page_list_len; 2178 fast_reg->num_sges = mr->npages;
2216 fast_reg->size_sge = 2179 fast_reg->size_sge = get_encoded_page_size(mr->ibmr.page_size);
2217 get_encoded_page_size(1 << wr->wr.fast_reg.page_shift); 2180
2218 mr = (struct ocrdma_mr *) (unsigned long) 2181 pbe = pbl_tbl->va;
2219 dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)]; 2182 for (i = 0; i < mr->npages; i++) {
2220 build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr); 2183 u64 buf_addr = mr->pages[i];
2184
2185 pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK));
2186 pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr));
2187 num_pbes += 1;
2188 pbe++;
2189
2190 /* if the pbl is full storing the pbes,
2191 * move to next pbl.
2192 */
2193 if (num_pbes == (mr->hwmr.pbl_size/sizeof(u64))) {
2194 pbl_tbl++;
2195 pbe = (struct ocrdma_pbe *)pbl_tbl->va;
2196 }
2197 }
2198
2221 return 0; 2199 return 0;
2222} 2200}
2223 2201
@@ -2300,8 +2278,8 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2300 OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT; 2278 OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT;
2301 hdr->lkey = wr->ex.invalidate_rkey; 2279 hdr->lkey = wr->ex.invalidate_rkey;
2302 break; 2280 break;
2303 case IB_WR_FAST_REG_MR: 2281 case IB_WR_REG_MR:
2304 status = ocrdma_build_fr(qp, hdr, wr); 2282 status = ocrdma_build_reg(qp, hdr, reg_wr(wr));
2305 break; 2283 break;
2306 default: 2284 default:
2307 status = -EINVAL; 2285 status = -EINVAL;
@@ -2567,7 +2545,7 @@ static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
2567 ibwc->opcode = IB_WC_SEND; 2545 ibwc->opcode = IB_WC_SEND;
2568 break; 2546 break;
2569 case OCRDMA_FR_MR: 2547 case OCRDMA_FR_MR:
2570 ibwc->opcode = IB_WC_FAST_REG_MR; 2548 ibwc->opcode = IB_WC_REG_MR;
2571 break; 2549 break;
2572 case OCRDMA_LKEY_INV: 2550 case OCRDMA_LKEY_INV:
2573 ibwc->opcode = IB_WC_LOCAL_INV; 2551 ibwc->opcode = IB_WC_LOCAL_INV;
@@ -2933,16 +2911,11 @@ expand_cqe:
2933 } 2911 }
2934stop_cqe: 2912stop_cqe:
2935 cq->getp = cur_getp; 2913 cq->getp = cur_getp;
2936 if (cq->deferred_arm) { 2914 if (cq->deferred_arm || polled_hw_cqes) {
2937 ocrdma_ring_cq_db(dev, cq->id, true, cq->deferred_sol, 2915 ocrdma_ring_cq_db(dev, cq->id, cq->deferred_arm,
2938 polled_hw_cqes); 2916 cq->deferred_sol, polled_hw_cqes);
2939 cq->deferred_arm = false; 2917 cq->deferred_arm = false;
2940 cq->deferred_sol = false; 2918 cq->deferred_sol = false;
2941 } else {
2942 /* We need to pop the CQE. No need to arm */
2943 ocrdma_ring_cq_db(dev, cq->id, false, cq->deferred_sol,
2944 polled_hw_cqes);
2945 cq->deferred_sol = false;
2946 } 2919 }
2947 2920
2948 return i; 2921 return i;
@@ -3058,6 +3031,12 @@ struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd,
3058 if (!mr) 3031 if (!mr)
3059 return ERR_PTR(-ENOMEM); 3032 return ERR_PTR(-ENOMEM);
3060 3033
3034 mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
3035 if (!mr->pages) {
3036 status = -ENOMEM;
3037 goto pl_err;
3038 }
3039
3061 status = ocrdma_get_pbl_info(dev, mr, max_num_sg); 3040 status = ocrdma_get_pbl_info(dev, mr, max_num_sg);
3062 if (status) 3041 if (status)
3063 goto pbl_err; 3042 goto pbl_err;
@@ -3081,30 +3060,12 @@ struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd,
3081mbx_err: 3060mbx_err:
3082 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); 3061 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
3083pbl_err: 3062pbl_err:
3063 kfree(mr->pages);
3064pl_err:
3084 kfree(mr); 3065 kfree(mr);
3085 return ERR_PTR(-ENOMEM); 3066 return ERR_PTR(-ENOMEM);
3086} 3067}
3087 3068
3088struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device
3089 *ibdev,
3090 int page_list_len)
3091{
3092 struct ib_fast_reg_page_list *frmr_list;
3093 int size;
3094
3095 size = sizeof(*frmr_list) + (page_list_len * sizeof(u64));
3096 frmr_list = kzalloc(size, GFP_KERNEL);
3097 if (!frmr_list)
3098 return ERR_PTR(-ENOMEM);
3099 frmr_list->page_list = (u64 *)(frmr_list + 1);
3100 return frmr_list;
3101}
3102
3103void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list)
3104{
3105 kfree(page_list);
3106}
3107
3108#define MAX_KERNEL_PBE_SIZE 65536 3069#define MAX_KERNEL_PBE_SIZE 65536
3109static inline int count_kernel_pbes(struct ib_phys_buf *buf_list, 3070static inline int count_kernel_pbes(struct ib_phys_buf *buf_list,
3110 int buf_cnt, u32 *pbe_size) 3071 int buf_cnt, u32 *pbe_size)
@@ -3267,3 +3228,26 @@ pbl_err:
3267 kfree(mr); 3228 kfree(mr);
3268 return ERR_PTR(status); 3229 return ERR_PTR(status);
3269} 3230}
3231
3232static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr)
3233{
3234 struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
3235
3236 if (unlikely(mr->npages == mr->hwmr.num_pbes))
3237 return -ENOMEM;
3238
3239 mr->pages[mr->npages++] = addr;
3240
3241 return 0;
3242}
3243
3244int ocrdma_map_mr_sg(struct ib_mr *ibmr,
3245 struct scatterlist *sg,
3246 int sg_nents)
3247{
3248 struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
3249
3250 mr->npages = 0;
3251
3252 return ib_sg_to_pages(ibmr, sg, sg_nents, ocrdma_set_page);
3253}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index 308c16857a5d..a2f3b4dc20b0 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -125,9 +125,8 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
125struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, 125struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd,
126 enum ib_mr_type mr_type, 126 enum ib_mr_type mr_type,
127 u32 max_num_sg); 127 u32 max_num_sg);
128struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device 128int ocrdma_map_mr_sg(struct ib_mr *ibmr,
129 *ibdev, 129 struct scatterlist *sg,
130 int page_list_len); 130 int sg_nents);
131void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list);
132 131
133#endif /* __OCRDMA_VERBS_H__ */ 132#endif /* __OCRDMA_VERBS_H__ */
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
index 5afaa218508d..d725c565518d 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -336,14 +336,15 @@ bail:
336} 336}
337 337
338/* 338/*
339 * Initialize the memory region specified by the work reqeust. 339 * Initialize the memory region specified by the work request.
340 */ 340 */
341int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr) 341int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr)
342{ 342{
343 struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; 343 struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
344 struct qib_pd *pd = to_ipd(qp->ibqp.pd); 344 struct qib_pd *pd = to_ipd(qp->ibqp.pd);
345 struct qib_mregion *mr; 345 struct qib_mr *mr = to_imr(wr->mr);
346 u32 rkey = wr->wr.fast_reg.rkey; 346 struct qib_mregion *mrg;
347 u32 key = wr->key;
347 unsigned i, n, m; 348 unsigned i, n, m;
348 int ret = -EINVAL; 349 int ret = -EINVAL;
349 unsigned long flags; 350 unsigned long flags;
@@ -351,33 +352,33 @@ int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr)
351 size_t ps; 352 size_t ps;
352 353
353 spin_lock_irqsave(&rkt->lock, flags); 354 spin_lock_irqsave(&rkt->lock, flags);
354 if (pd->user || rkey == 0) 355 if (pd->user || key == 0)
355 goto bail; 356 goto bail;
356 357
357 mr = rcu_dereference_protected( 358 mrg = rcu_dereference_protected(
358 rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))], 359 rkt->table[(key >> (32 - ib_qib_lkey_table_size))],
359 lockdep_is_held(&rkt->lock)); 360 lockdep_is_held(&rkt->lock));
360 if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd)) 361 if (unlikely(mrg == NULL || qp->ibqp.pd != mrg->pd))
361 goto bail; 362 goto bail;
362 363
363 if (wr->wr.fast_reg.page_list_len > mr->max_segs) 364 if (mr->npages > mrg->max_segs)
364 goto bail; 365 goto bail;
365 366
366 ps = 1UL << wr->wr.fast_reg.page_shift; 367 ps = mr->ibmr.page_size;
367 if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len) 368 if (mr->ibmr.length > ps * mr->npages)
368 goto bail; 369 goto bail;
369 370
370 mr->user_base = wr->wr.fast_reg.iova_start; 371 mrg->user_base = mr->ibmr.iova;
371 mr->iova = wr->wr.fast_reg.iova_start; 372 mrg->iova = mr->ibmr.iova;
372 mr->lkey = rkey; 373 mrg->lkey = key;
373 mr->length = wr->wr.fast_reg.length; 374 mrg->length = mr->ibmr.length;
374 mr->access_flags = wr->wr.fast_reg.access_flags; 375 mrg->access_flags = wr->access;
375 page_list = wr->wr.fast_reg.page_list->page_list; 376 page_list = mr->pages;
376 m = 0; 377 m = 0;
377 n = 0; 378 n = 0;
378 for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { 379 for (i = 0; i < mr->npages; i++) {
379 mr->map[m]->segs[n].vaddr = (void *) page_list[i]; 380 mrg->map[m]->segs[n].vaddr = (void *) page_list[i];
380 mr->map[m]->segs[n].length = ps; 381 mrg->map[m]->segs[n].length = ps;
381 if (++n == QIB_SEGSZ) { 382 if (++n == QIB_SEGSZ) {
382 m++; 383 m++;
383 n = 0; 384 n = 0;
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index 19220dcb9a3b..294f5c706be9 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -303,6 +303,7 @@ int qib_dereg_mr(struct ib_mr *ibmr)
303 int ret = 0; 303 int ret = 0;
304 unsigned long timeout; 304 unsigned long timeout;
305 305
306 kfree(mr->pages);
306 qib_free_lkey(&mr->mr); 307 qib_free_lkey(&mr->mr);
307 308
308 qib_put_mr(&mr->mr); /* will set completion if last */ 309 qib_put_mr(&mr->mr); /* will set completion if last */
@@ -323,7 +324,7 @@ out:
323 324
324/* 325/*
325 * Allocate a memory region usable with the 326 * Allocate a memory region usable with the
326 * IB_WR_FAST_REG_MR send work request. 327 * IB_WR_REG_MR send work request.
327 * 328 *
328 * Return the memory region on success, otherwise return an errno. 329 * Return the memory region on success, otherwise return an errno.
329 */ 330 */
@@ -340,37 +341,38 @@ struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
340 if (IS_ERR(mr)) 341 if (IS_ERR(mr))
341 return (struct ib_mr *)mr; 342 return (struct ib_mr *)mr;
342 343
344 mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
345 if (!mr->pages)
346 goto err;
347
343 return &mr->ibmr; 348 return &mr->ibmr;
349
350err:
351 qib_dereg_mr(&mr->ibmr);
352 return ERR_PTR(-ENOMEM);
344} 353}
345 354
346struct ib_fast_reg_page_list * 355static int qib_set_page(struct ib_mr *ibmr, u64 addr)
347qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len)
348{ 356{
349 unsigned size = page_list_len * sizeof(u64); 357 struct qib_mr *mr = to_imr(ibmr);
350 struct ib_fast_reg_page_list *pl;
351
352 if (size > PAGE_SIZE)
353 return ERR_PTR(-EINVAL);
354
355 pl = kzalloc(sizeof(*pl), GFP_KERNEL);
356 if (!pl)
357 return ERR_PTR(-ENOMEM);
358 358
359 pl->page_list = kzalloc(size, GFP_KERNEL); 359 if (unlikely(mr->npages == mr->mr.max_segs))
360 if (!pl->page_list) 360 return -ENOMEM;
361 goto err_free;
362 361
363 return pl; 362 mr->pages[mr->npages++] = addr;
364 363
365err_free: 364 return 0;
366 kfree(pl);
367 return ERR_PTR(-ENOMEM);
368} 365}
369 366
370void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl) 367int qib_map_mr_sg(struct ib_mr *ibmr,
368 struct scatterlist *sg,
369 int sg_nents)
371{ 370{
372 kfree(pl->page_list); 371 struct qib_mr *mr = to_imr(ibmr);
373 kfree(pl); 372
373 mr->npages = 0;
374
375 return ib_sg_to_pages(ibmr, sg, sg_nents, qib_set_page);
374} 376}
375 377
376/** 378/**
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 4fa88ba2963e..40f85bb3e0d3 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -436,7 +436,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
436 if (qp->ibqp.qp_type == IB_QPT_UD || 436 if (qp->ibqp.qp_type == IB_QPT_UD ||
437 qp->ibqp.qp_type == IB_QPT_SMI || 437 qp->ibqp.qp_type == IB_QPT_SMI ||
438 qp->ibqp.qp_type == IB_QPT_GSI) 438 qp->ibqp.qp_type == IB_QPT_GSI)
439 atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); 439 atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
440 if (++qp->s_last >= qp->s_size) 440 if (++qp->s_last >= qp->s_size)
441 qp->s_last = 0; 441 qp->s_last = 0;
442 } 442 }
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 4544d6f88ad7..e6b7556d5221 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -373,10 +373,11 @@ int qib_make_rc_req(struct qib_qp *qp)
373 qp->s_flags |= QIB_S_WAIT_SSN_CREDIT; 373 qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
374 goto bail; 374 goto bail;
375 } 375 }
376
376 ohdr->u.rc.reth.vaddr = 377 ohdr->u.rc.reth.vaddr =
377 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 378 cpu_to_be64(wqe->rdma_wr.remote_addr);
378 ohdr->u.rc.reth.rkey = 379 ohdr->u.rc.reth.rkey =
379 cpu_to_be32(wqe->wr.wr.rdma.rkey); 380 cpu_to_be32(wqe->rdma_wr.rkey);
380 ohdr->u.rc.reth.length = cpu_to_be32(len); 381 ohdr->u.rc.reth.length = cpu_to_be32(len);
381 hwords += sizeof(struct ib_reth) / sizeof(u32); 382 hwords += sizeof(struct ib_reth) / sizeof(u32);
382 wqe->lpsn = wqe->psn; 383 wqe->lpsn = wqe->psn;
@@ -386,15 +387,15 @@ int qib_make_rc_req(struct qib_qp *qp)
386 len = pmtu; 387 len = pmtu;
387 break; 388 break;
388 } 389 }
389 if (wqe->wr.opcode == IB_WR_RDMA_WRITE) 390 if (wqe->rdma_wr.wr.opcode == IB_WR_RDMA_WRITE)
390 qp->s_state = OP(RDMA_WRITE_ONLY); 391 qp->s_state = OP(RDMA_WRITE_ONLY);
391 else { 392 else {
392 qp->s_state = 393 qp->s_state = OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
393 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
394 /* Immediate data comes after RETH */ 394 /* Immediate data comes after RETH */
395 ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; 395 ohdr->u.rc.imm_data =
396 wqe->rdma_wr.wr.ex.imm_data;
396 hwords += 1; 397 hwords += 1;
397 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 398 if (wqe->rdma_wr.wr.send_flags & IB_SEND_SOLICITED)
398 bth0 |= IB_BTH_SOLICITED; 399 bth0 |= IB_BTH_SOLICITED;
399 } 400 }
400 bth2 |= IB_BTH_REQ_ACK; 401 bth2 |= IB_BTH_REQ_ACK;
@@ -424,10 +425,11 @@ int qib_make_rc_req(struct qib_qp *qp)
424 qp->s_next_psn += (len - 1) / pmtu; 425 qp->s_next_psn += (len - 1) / pmtu;
425 wqe->lpsn = qp->s_next_psn++; 426 wqe->lpsn = qp->s_next_psn++;
426 } 427 }
428
427 ohdr->u.rc.reth.vaddr = 429 ohdr->u.rc.reth.vaddr =
428 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 430 cpu_to_be64(wqe->rdma_wr.remote_addr);
429 ohdr->u.rc.reth.rkey = 431 ohdr->u.rc.reth.rkey =
430 cpu_to_be32(wqe->wr.wr.rdma.rkey); 432 cpu_to_be32(wqe->rdma_wr.rkey);
431 ohdr->u.rc.reth.length = cpu_to_be32(len); 433 ohdr->u.rc.reth.length = cpu_to_be32(len);
432 qp->s_state = OP(RDMA_READ_REQUEST); 434 qp->s_state = OP(RDMA_READ_REQUEST);
433 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); 435 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
@@ -455,24 +457,24 @@ int qib_make_rc_req(struct qib_qp *qp)
455 qp->s_lsn++; 457 qp->s_lsn++;
456 wqe->lpsn = wqe->psn; 458 wqe->lpsn = wqe->psn;
457 } 459 }
458 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 460 if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
459 qp->s_state = OP(COMPARE_SWAP); 461 qp->s_state = OP(COMPARE_SWAP);
460 ohdr->u.atomic_eth.swap_data = cpu_to_be64( 462 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
461 wqe->wr.wr.atomic.swap); 463 wqe->atomic_wr.swap);
462 ohdr->u.atomic_eth.compare_data = cpu_to_be64( 464 ohdr->u.atomic_eth.compare_data = cpu_to_be64(
463 wqe->wr.wr.atomic.compare_add); 465 wqe->atomic_wr.compare_add);
464 } else { 466 } else {
465 qp->s_state = OP(FETCH_ADD); 467 qp->s_state = OP(FETCH_ADD);
466 ohdr->u.atomic_eth.swap_data = cpu_to_be64( 468 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
467 wqe->wr.wr.atomic.compare_add); 469 wqe->atomic_wr.compare_add);
468 ohdr->u.atomic_eth.compare_data = 0; 470 ohdr->u.atomic_eth.compare_data = 0;
469 } 471 }
470 ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( 472 ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
471 wqe->wr.wr.atomic.remote_addr >> 32); 473 wqe->atomic_wr.remote_addr >> 32);
472 ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( 474 ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
473 wqe->wr.wr.atomic.remote_addr); 475 wqe->atomic_wr.remote_addr);
474 ohdr->u.atomic_eth.rkey = cpu_to_be32( 476 ohdr->u.atomic_eth.rkey = cpu_to_be32(
475 wqe->wr.wr.atomic.rkey); 477 wqe->atomic_wr.rkey);
476 hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); 478 hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
477 ss = NULL; 479 ss = NULL;
478 len = 0; 480 len = 0;
@@ -597,9 +599,9 @@ int qib_make_rc_req(struct qib_qp *qp)
597 */ 599 */
598 len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu; 600 len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
599 ohdr->u.rc.reth.vaddr = 601 ohdr->u.rc.reth.vaddr =
600 cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); 602 cpu_to_be64(wqe->rdma_wr.remote_addr + len);
601 ohdr->u.rc.reth.rkey = 603 ohdr->u.rc.reth.rkey =
602 cpu_to_be32(wqe->wr.wr.rdma.rkey); 604 cpu_to_be32(wqe->rdma_wr.rkey);
603 ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len); 605 ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
604 qp->s_state = OP(RDMA_READ_REQUEST); 606 qp->s_state = OP(RDMA_READ_REQUEST);
605 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); 607 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index 22e356ca8058..b1aa21bdd484 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -459,8 +459,8 @@ again:
459 if (wqe->length == 0) 459 if (wqe->length == 0)
460 break; 460 break;
461 if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length, 461 if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
462 wqe->wr.wr.rdma.remote_addr, 462 wqe->rdma_wr.remote_addr,
463 wqe->wr.wr.rdma.rkey, 463 wqe->rdma_wr.rkey,
464 IB_ACCESS_REMOTE_WRITE))) 464 IB_ACCESS_REMOTE_WRITE)))
465 goto acc_err; 465 goto acc_err;
466 qp->r_sge.sg_list = NULL; 466 qp->r_sge.sg_list = NULL;
@@ -472,8 +472,8 @@ again:
472 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) 472 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
473 goto inv_err; 473 goto inv_err;
474 if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, 474 if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
475 wqe->wr.wr.rdma.remote_addr, 475 wqe->rdma_wr.remote_addr,
476 wqe->wr.wr.rdma.rkey, 476 wqe->rdma_wr.rkey,
477 IB_ACCESS_REMOTE_READ))) 477 IB_ACCESS_REMOTE_READ)))
478 goto acc_err; 478 goto acc_err;
479 release = 0; 479 release = 0;
@@ -490,18 +490,18 @@ again:
490 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) 490 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
491 goto inv_err; 491 goto inv_err;
492 if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), 492 if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
493 wqe->wr.wr.atomic.remote_addr, 493 wqe->atomic_wr.remote_addr,
494 wqe->wr.wr.atomic.rkey, 494 wqe->atomic_wr.rkey,
495 IB_ACCESS_REMOTE_ATOMIC))) 495 IB_ACCESS_REMOTE_ATOMIC)))
496 goto acc_err; 496 goto acc_err;
497 /* Perform atomic OP and save result. */ 497 /* Perform atomic OP and save result. */
498 maddr = (atomic64_t *) qp->r_sge.sge.vaddr; 498 maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
499 sdata = wqe->wr.wr.atomic.compare_add; 499 sdata = wqe->atomic_wr.compare_add;
500 *(u64 *) sqp->s_sge.sge.vaddr = 500 *(u64 *) sqp->s_sge.sge.vaddr =
501 (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? 501 (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
502 (u64) atomic64_add_return(sdata, maddr) - sdata : 502 (u64) atomic64_add_return(sdata, maddr) - sdata :
503 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, 503 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
504 sdata, wqe->wr.wr.atomic.swap); 504 sdata, wqe->atomic_wr.swap);
505 qib_put_mr(qp->r_sge.sge.mr); 505 qib_put_mr(qp->r_sge.sge.mr);
506 qp->r_sge.num_sge = 0; 506 qp->r_sge.num_sge = 0;
507 goto send_comp; 507 goto send_comp;
@@ -785,7 +785,7 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
785 if (qp->ibqp.qp_type == IB_QPT_UD || 785 if (qp->ibqp.qp_type == IB_QPT_UD ||
786 qp->ibqp.qp_type == IB_QPT_SMI || 786 qp->ibqp.qp_type == IB_QPT_SMI ||
787 qp->ibqp.qp_type == IB_QPT_GSI) 787 qp->ibqp.qp_type == IB_QPT_GSI)
788 atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); 788 atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
789 789
790 /* See ch. 11.2.4.1 and 10.7.3.1 */ 790 /* See ch. 11.2.4.1 and 10.7.3.1 */
791 if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || 791 if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index aa3a8035bb68..06a564589c35 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -129,9 +129,9 @@ int qib_make_uc_req(struct qib_qp *qp)
129 case IB_WR_RDMA_WRITE: 129 case IB_WR_RDMA_WRITE:
130 case IB_WR_RDMA_WRITE_WITH_IMM: 130 case IB_WR_RDMA_WRITE_WITH_IMM:
131 ohdr->u.rc.reth.vaddr = 131 ohdr->u.rc.reth.vaddr =
132 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 132 cpu_to_be64(wqe->rdma_wr.remote_addr);
133 ohdr->u.rc.reth.rkey = 133 ohdr->u.rc.reth.rkey =
134 cpu_to_be32(wqe->wr.wr.rdma.rkey); 134 cpu_to_be32(wqe->rdma_wr.rkey);
135 ohdr->u.rc.reth.length = cpu_to_be32(len); 135 ohdr->u.rc.reth.length = cpu_to_be32(len);
136 hwords += sizeof(struct ib_reth) / 4; 136 hwords += sizeof(struct ib_reth) / 4;
137 if (len > pmtu) { 137 if (len > pmtu) {
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 26243b722b5e..59193f67ea78 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -59,7 +59,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
59 u32 length; 59 u32 length;
60 enum ib_qp_type sqptype, dqptype; 60 enum ib_qp_type sqptype, dqptype;
61 61
62 qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn); 62 qp = qib_lookup_qpn(ibp, swqe->ud_wr.remote_qpn);
63 if (!qp) { 63 if (!qp) {
64 ibp->n_pkt_drops++; 64 ibp->n_pkt_drops++;
65 return; 65 return;
@@ -76,7 +76,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
76 goto drop; 76 goto drop;
77 } 77 }
78 78
79 ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr; 79 ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
80 ppd = ppd_from_ibp(ibp); 80 ppd = ppd_from_ibp(ibp);
81 81
82 if (qp->ibqp.qp_num > 1) { 82 if (qp->ibqp.qp_num > 1) {
@@ -106,8 +106,8 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
106 if (qp->ibqp.qp_num) { 106 if (qp->ibqp.qp_num) {
107 u32 qkey; 107 u32 qkey;
108 108
109 qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ? 109 qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
110 sqp->qkey : swqe->wr.wr.ud.remote_qkey; 110 sqp->qkey : swqe->ud_wr.remote_qkey;
111 if (unlikely(qkey != qp->qkey)) { 111 if (unlikely(qkey != qp->qkey)) {
112 u16 lid; 112 u16 lid;
113 113
@@ -210,7 +210,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
210 wc.qp = &qp->ibqp; 210 wc.qp = &qp->ibqp;
211 wc.src_qp = sqp->ibqp.qp_num; 211 wc.src_qp = sqp->ibqp.qp_num;
212 wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ? 212 wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ?
213 swqe->wr.wr.ud.pkey_index : 0; 213 swqe->ud_wr.pkey_index : 0;
214 wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1)); 214 wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
215 wc.sl = ah_attr->sl; 215 wc.sl = ah_attr->sl;
216 wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1); 216 wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
@@ -277,7 +277,7 @@ int qib_make_ud_req(struct qib_qp *qp)
277 /* Construct the header. */ 277 /* Construct the header. */
278 ibp = to_iport(qp->ibqp.device, qp->port_num); 278 ibp = to_iport(qp->ibqp.device, qp->port_num);
279 ppd = ppd_from_ibp(ibp); 279 ppd = ppd_from_ibp(ibp);
280 ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; 280 ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
281 if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) { 281 if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
282 if (ah_attr->dlid != QIB_PERMISSIVE_LID) 282 if (ah_attr->dlid != QIB_PERMISSIVE_LID)
283 this_cpu_inc(ibp->pmastats->n_multicast_xmit); 283 this_cpu_inc(ibp->pmastats->n_multicast_xmit);
@@ -363,7 +363,7 @@ int qib_make_ud_req(struct qib_qp *qp)
363 bth0 |= extra_bytes << 20; 363 bth0 |= extra_bytes << 20;
364 bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? QIB_DEFAULT_P_KEY : 364 bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? QIB_DEFAULT_P_KEY :
365 qib_get_pkey(ibp, qp->ibqp.qp_type == IB_QPT_GSI ? 365 qib_get_pkey(ibp, qp->ibqp.qp_type == IB_QPT_GSI ?
366 wqe->wr.wr.ud.pkey_index : qp->s_pkey_index); 366 wqe->ud_wr.pkey_index : qp->s_pkey_index);
367 ohdr->bth[0] = cpu_to_be32(bth0); 367 ohdr->bth[0] = cpu_to_be32(bth0);
368 /* 368 /*
369 * Use the multicast QP if the destination LID is a multicast LID. 369 * Use the multicast QP if the destination LID is a multicast LID.
@@ -371,14 +371,14 @@ int qib_make_ud_req(struct qib_qp *qp)
371 ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE && 371 ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
372 ah_attr->dlid != QIB_PERMISSIVE_LID ? 372 ah_attr->dlid != QIB_PERMISSIVE_LID ?
373 cpu_to_be32(QIB_MULTICAST_QPN) : 373 cpu_to_be32(QIB_MULTICAST_QPN) :
374 cpu_to_be32(wqe->wr.wr.ud.remote_qpn); 374 cpu_to_be32(wqe->ud_wr.remote_qpn);
375 ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK); 375 ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK);
376 /* 376 /*
377 * Qkeys with the high order bit set mean use the 377 * Qkeys with the high order bit set mean use the
378 * qkey from the QP context instead of the WR (see 10.2.5). 378 * qkey from the QP context instead of the WR (see 10.2.5).
379 */ 379 */
380 ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ? 380 ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
381 qp->qkey : wqe->wr.wr.ud.remote_qkey); 381 qp->qkey : wqe->ud_wr.remote_qkey);
382 ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); 382 ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
383 383
384done: 384done:
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 3dcc4985b60f..de6cb6fcda8d 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -362,8 +362,8 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
362 * undefined operations. 362 * undefined operations.
363 * Make sure buffer is large enough to hold the result for atomics. 363 * Make sure buffer is large enough to hold the result for atomics.
364 */ 364 */
365 if (wr->opcode == IB_WR_FAST_REG_MR) { 365 if (wr->opcode == IB_WR_REG_MR) {
366 if (qib_fast_reg_mr(qp, wr)) 366 if (qib_reg_mr(qp, reg_wr(wr)))
367 goto bail_inval; 367 goto bail_inval;
368 } else if (qp->ibqp.qp_type == IB_QPT_UC) { 368 } else if (qp->ibqp.qp_type == IB_QPT_UC) {
369 if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) 369 if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
@@ -374,7 +374,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
374 wr->opcode != IB_WR_SEND_WITH_IMM) 374 wr->opcode != IB_WR_SEND_WITH_IMM)
375 goto bail_inval; 375 goto bail_inval;
376 /* Check UD destination address PD */ 376 /* Check UD destination address PD */
377 if (qp->ibqp.pd != wr->wr.ud.ah->pd) 377 if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
378 goto bail_inval; 378 goto bail_inval;
379 } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) 379 } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
380 goto bail_inval; 380 goto bail_inval;
@@ -397,7 +397,23 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
397 rkt = &to_idev(qp->ibqp.device)->lk_table; 397 rkt = &to_idev(qp->ibqp.device)->lk_table;
398 pd = to_ipd(qp->ibqp.pd); 398 pd = to_ipd(qp->ibqp.pd);
399 wqe = get_swqe_ptr(qp, qp->s_head); 399 wqe = get_swqe_ptr(qp, qp->s_head);
400 wqe->wr = *wr; 400
401 if (qp->ibqp.qp_type != IB_QPT_UC &&
402 qp->ibqp.qp_type != IB_QPT_RC)
403 memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
404 else if (wr->opcode == IB_WR_REG_MR)
405 memcpy(&wqe->reg_wr, reg_wr(wr),
406 sizeof(wqe->reg_wr));
407 else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
408 wr->opcode == IB_WR_RDMA_WRITE ||
409 wr->opcode == IB_WR_RDMA_READ)
410 memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
411 else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
412 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
413 memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
414 else
415 memcpy(&wqe->wr, wr, sizeof(wqe->wr));
416
401 wqe->length = 0; 417 wqe->length = 0;
402 j = 0; 418 j = 0;
403 if (wr->num_sge) { 419 if (wr->num_sge) {
@@ -426,7 +442,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
426 qp->port_num - 1)->ibmtu) 442 qp->port_num - 1)->ibmtu)
427 goto bail_inval_free; 443 goto bail_inval_free;
428 else 444 else
429 atomic_inc(&to_iah(wr->wr.ud.ah)->refcount); 445 atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount);
430 wqe->ssn = qp->s_ssn++; 446 wqe->ssn = qp->s_ssn++;
431 qp->s_head = next; 447 qp->s_head = next;
432 448
@@ -2244,8 +2260,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
2244 ibdev->reg_user_mr = qib_reg_user_mr; 2260 ibdev->reg_user_mr = qib_reg_user_mr;
2245 ibdev->dereg_mr = qib_dereg_mr; 2261 ibdev->dereg_mr = qib_dereg_mr;
2246 ibdev->alloc_mr = qib_alloc_mr; 2262 ibdev->alloc_mr = qib_alloc_mr;
2247 ibdev->alloc_fast_reg_page_list = qib_alloc_fast_reg_page_list; 2263 ibdev->map_mr_sg = qib_map_mr_sg;
2248 ibdev->free_fast_reg_page_list = qib_free_fast_reg_page_list;
2249 ibdev->alloc_fmr = qib_alloc_fmr; 2264 ibdev->alloc_fmr = qib_alloc_fmr;
2250 ibdev->map_phys_fmr = qib_map_phys_fmr; 2265 ibdev->map_phys_fmr = qib_map_phys_fmr;
2251 ibdev->unmap_fmr = qib_unmap_fmr; 2266 ibdev->unmap_fmr = qib_unmap_fmr;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index a08df70e8503..2baf5ad251ed 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -330,6 +330,8 @@ struct qib_mr {
330 struct ib_mr ibmr; 330 struct ib_mr ibmr;
331 struct ib_umem *umem; 331 struct ib_umem *umem;
332 struct qib_mregion mr; /* must be last */ 332 struct qib_mregion mr; /* must be last */
333 u64 *pages;
334 u32 npages;
333}; 335};
334 336
335/* 337/*
@@ -338,7 +340,13 @@ struct qib_mr {
338 * in qp->s_max_sge. 340 * in qp->s_max_sge.
339 */ 341 */
340struct qib_swqe { 342struct qib_swqe {
341 struct ib_send_wr wr; /* don't use wr.sg_list */ 343 union {
344 struct ib_send_wr wr; /* don't use wr.sg_list */
345 struct ib_ud_wr ud_wr;
346 struct ib_reg_wr reg_wr;
347 struct ib_rdma_wr rdma_wr;
348 struct ib_atomic_wr atomic_wr;
349 };
342 u32 psn; /* first packet sequence number */ 350 u32 psn; /* first packet sequence number */
343 u32 lpsn; /* last packet sequence number */ 351 u32 lpsn; /* last packet sequence number */
344 u32 ssn; /* send sequence number */ 352 u32 ssn; /* send sequence number */
@@ -1038,12 +1046,11 @@ struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
1038 enum ib_mr_type mr_type, 1046 enum ib_mr_type mr_type,
1039 u32 max_entries); 1047 u32 max_entries);
1040 1048
1041struct ib_fast_reg_page_list *qib_alloc_fast_reg_page_list( 1049int qib_map_mr_sg(struct ib_mr *ibmr,
1042 struct ib_device *ibdev, int page_list_len); 1050 struct scatterlist *sg,
1043 1051 int sg_nents);
1044void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl);
1045 1052
1046int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr); 1053int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr);
1047 1054
1048struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, 1055struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
1049 struct ib_fmr_attr *fmr_attr); 1056 struct ib_fmr_attr *fmr_attr);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 0c15bd885035..565c881a44ba 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -343,16 +343,15 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
343 netdev = pci_get_drvdata(dev); 343 netdev = pci_get_drvdata(dev);
344 344
345 us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev)); 345 us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev));
346 if (IS_ERR_OR_NULL(us_ibdev)) { 346 if (!us_ibdev) {
347 usnic_err("Device %s context alloc failed\n", 347 usnic_err("Device %s context alloc failed\n",
348 netdev_name(pci_get_drvdata(dev))); 348 netdev_name(pci_get_drvdata(dev)));
349 return ERR_PTR(us_ibdev ? PTR_ERR(us_ibdev) : -EFAULT); 349 return ERR_PTR(-EFAULT);
350 } 350 }
351 351
352 us_ibdev->ufdev = usnic_fwd_dev_alloc(dev); 352 us_ibdev->ufdev = usnic_fwd_dev_alloc(dev);
353 if (IS_ERR_OR_NULL(us_ibdev->ufdev)) { 353 if (!us_ibdev->ufdev) {
354 usnic_err("Failed to alloc ufdev for %s with err %ld\n", 354 usnic_err("Failed to alloc ufdev for %s\n", pci_name(dev));
355 pci_name(dev), PTR_ERR(us_ibdev->ufdev));
356 goto err_dealloc; 355 goto err_dealloc;
357 } 356 }
358 357
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
index 85dc3f989ff7..fcea3a24d3eb 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -236,8 +236,8 @@ create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp,
236 236
237 /* Create Flow Handle */ 237 /* Create Flow Handle */
238 qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC); 238 qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
239 if (IS_ERR_OR_NULL(qp_flow)) { 239 if (!qp_flow) {
240 err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM; 240 err = -ENOMEM;
241 goto out_dealloc_flow; 241 goto out_dealloc_flow;
242 } 242 }
243 qp_flow->flow = flow; 243 qp_flow->flow = flow;
@@ -311,8 +311,8 @@ create_udp_flow(struct usnic_ib_qp_grp *qp_grp,
311 311
312 /* Create qp_flow */ 312 /* Create qp_flow */
313 qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC); 313 qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
314 if (IS_ERR_OR_NULL(qp_flow)) { 314 if (!qp_flow) {
315 err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM; 315 err = -ENOMEM;
316 goto out_dealloc_flow; 316 goto out_dealloc_flow;
317 } 317 }
318 qp_flow->flow = flow; 318 qp_flow->flow = flow;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index edc5b8565d6d..3ede10309754 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -360,7 +360,7 @@ struct ipoib_dev_priv {
360 unsigned tx_head; 360 unsigned tx_head;
361 unsigned tx_tail; 361 unsigned tx_tail;
362 struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; 362 struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
363 struct ib_send_wr tx_wr; 363 struct ib_ud_wr tx_wr;
364 unsigned tx_outstanding; 364 unsigned tx_outstanding;
365 struct ib_wc send_wc[MAX_SEND_CQE]; 365 struct ib_wc send_wc[MAX_SEND_CQE];
366 366
@@ -528,7 +528,7 @@ static inline void ipoib_build_sge(struct ipoib_dev_priv *priv,
528 priv->tx_sge[i + off].addr = mapping[i + off]; 528 priv->tx_sge[i + off].addr = mapping[i + off];
529 priv->tx_sge[i + off].length = skb_frag_size(&frags[i]); 529 priv->tx_sge[i + off].length = skb_frag_size(&frags[i]);
530 } 530 }
531 priv->tx_wr.num_sge = nr_frags + off; 531 priv->tx_wr.wr.num_sge = nr_frags + off;
532} 532}
533 533
534#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 534#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index c78dc1638030..3ae9726efb98 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -700,9 +700,9 @@ static inline int post_send(struct ipoib_dev_priv *priv,
700 700
701 ipoib_build_sge(priv, tx_req); 701 ipoib_build_sge(priv, tx_req);
702 702
703 priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM; 703 priv->tx_wr.wr.wr_id = wr_id | IPOIB_OP_CM;
704 704
705 return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr); 705 return ib_post_send(tx->qp, &priv->tx_wr.wr, &bad_wr);
706} 706}
707 707
708void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) 708void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index d266667ca9b8..5ea0c14070d1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -518,19 +518,19 @@ static inline int post_send(struct ipoib_dev_priv *priv,
518 518
519 ipoib_build_sge(priv, tx_req); 519 ipoib_build_sge(priv, tx_req);
520 520
521 priv->tx_wr.wr_id = wr_id; 521 priv->tx_wr.wr.wr_id = wr_id;
522 priv->tx_wr.wr.ud.remote_qpn = qpn; 522 priv->tx_wr.remote_qpn = qpn;
523 priv->tx_wr.wr.ud.ah = address; 523 priv->tx_wr.ah = address;
524 524
525 if (head) { 525 if (head) {
526 priv->tx_wr.wr.ud.mss = skb_shinfo(skb)->gso_size; 526 priv->tx_wr.mss = skb_shinfo(skb)->gso_size;
527 priv->tx_wr.wr.ud.header = head; 527 priv->tx_wr.header = head;
528 priv->tx_wr.wr.ud.hlen = hlen; 528 priv->tx_wr.hlen = hlen;
529 priv->tx_wr.opcode = IB_WR_LSO; 529 priv->tx_wr.wr.opcode = IB_WR_LSO;
530 } else 530 } else
531 priv->tx_wr.opcode = IB_WR_SEND; 531 priv->tx_wr.wr.opcode = IB_WR_SEND;
532 532
533 return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr); 533 return ib_post_send(priv->qp, &priv->tx_wr.wr, &bad_wr);
534} 534}
535 535
536void ipoib_send(struct net_device *dev, struct sk_buff *skb, 536void ipoib_send(struct net_device *dev, struct sk_buff *skb,
@@ -583,9 +583,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
583 } 583 }
584 584
585 if (skb->ip_summed == CHECKSUM_PARTIAL) 585 if (skb->ip_summed == CHECKSUM_PARTIAL)
586 priv->tx_wr.send_flags |= IB_SEND_IP_CSUM; 586 priv->tx_wr.wr.send_flags |= IB_SEND_IP_CSUM;
587 else 587 else
588 priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; 588 priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
589 589
590 if (++priv->tx_outstanding == ipoib_sendq_size) { 590 if (++priv->tx_outstanding == ipoib_sendq_size) {
591 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); 591 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index babba05d7a0e..7d3281866ffc 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -461,7 +461,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
461 netdev_update_features(dev); 461 netdev_update_features(dev);
462 dev_set_mtu(dev, ipoib_cm_max_mtu(dev)); 462 dev_set_mtu(dev, ipoib_cm_max_mtu(dev));
463 rtnl_unlock(); 463 rtnl_unlock();
464 priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; 464 priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
465 465
466 ipoib_flush_paths(dev); 466 ipoib_flush_paths(dev);
467 rtnl_lock(); 467 rtnl_lock();
@@ -1860,7 +1860,7 @@ static struct net_device *ipoib_add_port(const char *format,
1860 priv->dev->broadcast[8] = priv->pkey >> 8; 1860 priv->dev->broadcast[8] = priv->pkey >> 8;
1861 priv->dev->broadcast[9] = priv->pkey & 0xff; 1861 priv->dev->broadcast[9] = priv->pkey & 0xff;
1862 1862
1863 result = ib_query_gid(hca, port, 0, &priv->local_gid); 1863 result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL);
1864 if (result) { 1864 if (result) {
1865 printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", 1865 printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n",
1866 hca->name, port, result); 1866 hca->name, port, result);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index d750a86042f3..f357ca67a41c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -245,7 +245,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
245 245
246 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); 246 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
247 spin_unlock_irq(&priv->lock); 247 spin_unlock_irq(&priv->lock);
248 priv->tx_wr.wr.ud.remote_qkey = priv->qkey; 248 priv->tx_wr.remote_qkey = priv->qkey;
249 set_qkey = 1; 249 set_qkey = 1;
250 } 250 }
251 251
@@ -561,7 +561,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
561 } 561 }
562 priv->local_lid = port_attr.lid; 562 priv->local_lid = port_attr.lid;
563 563
564 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) 564 if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid, NULL))
565 ipoib_warn(priv, "ib_query_gid() failed\n"); 565 ipoib_warn(priv, "ib_query_gid() failed\n");
566 else 566 else
567 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); 567 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 78845b6e8b81..d48c5bae7877 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -221,9 +221,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
221 for (i = 0; i < MAX_SKB_FRAGS + 1; ++i) 221 for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
222 priv->tx_sge[i].lkey = priv->pd->local_dma_lkey; 222 priv->tx_sge[i].lkey = priv->pd->local_dma_lkey;
223 223
224 priv->tx_wr.opcode = IB_WR_SEND; 224 priv->tx_wr.wr.opcode = IB_WR_SEND;
225 priv->tx_wr.sg_list = priv->tx_sge; 225 priv->tx_wr.wr.sg_list = priv->tx_sge;
226 priv->tx_wr.send_flags = IB_SEND_SIGNALED; 226 priv->tx_wr.wr.send_flags = IB_SEND_SIGNALED;
227 227
228 priv->rx_sge[0].lkey = priv->pd->local_dma_lkey; 228 priv->rx_sge[0].lkey = priv->pd->local_dma_lkey;
229 229
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index f58ff96b6cbb..9080161e01af 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -111,7 +111,7 @@ module_param_named(pi_guard, iser_pi_guard, int, S_IRUGO);
111MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]"); 111MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]");
112 112
113/* 113/*
114 * iscsi_iser_recv() - Process a successfull recv completion 114 * iscsi_iser_recv() - Process a successful recv completion
115 * @conn: iscsi connection 115 * @conn: iscsi connection
116 * @hdr: iscsi header 116 * @hdr: iscsi header
117 * @rx_data: buffer containing receive data payload 117 * @rx_data: buffer containing receive data payload
@@ -126,7 +126,6 @@ iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
126{ 126{
127 int rc = 0; 127 int rc = 0;
128 int datalen; 128 int datalen;
129 int ahslen;
130 129
131 /* verify PDU length */ 130 /* verify PDU length */
132 datalen = ntoh24(hdr->dlength); 131 datalen = ntoh24(hdr->dlength);
@@ -141,9 +140,6 @@ iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
141 iser_dbg("aligned datalen (%d) hdr, %d (IB)\n", 140 iser_dbg("aligned datalen (%d) hdr, %d (IB)\n",
142 datalen, rx_data_len); 141 datalen, rx_data_len);
143 142
144 /* read AHS */
145 ahslen = hdr->hlength * 4;
146
147 rc = iscsi_complete_pdu(conn, hdr, rx_data, rx_data_len); 143 rc = iscsi_complete_pdu(conn, hdr, rx_data, rx_data_len);
148 if (rc && rc != ISCSI_ERR_NO_SCSI_CMD) 144 if (rc && rc != ISCSI_ERR_NO_SCSI_CMD)
149 goto error; 145 goto error;
@@ -766,9 +762,7 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s
766 stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */ 762 stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */
767 stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt; 763 stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
768 stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt; 764 stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
769 stats->custom_length = 1; 765 stats->custom_length = 0;
770 strcpy(stats->custom[0].desc, "fmr_unalign_cnt");
771 stats->custom[0].value = conn->fmr_unalign_cnt;
772} 766}
773 767
774static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep, 768static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep,
@@ -973,6 +967,13 @@ static umode_t iser_attr_is_visible(int param_type, int param)
973 return 0; 967 return 0;
974} 968}
975 969
970static int iscsi_iser_slave_alloc(struct scsi_device *sdev)
971{
972 blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K);
973
974 return 0;
975}
976
976static struct scsi_host_template iscsi_iser_sht = { 977static struct scsi_host_template iscsi_iser_sht = {
977 .module = THIS_MODULE, 978 .module = THIS_MODULE,
978 .name = "iSCSI Initiator over iSER", 979 .name = "iSCSI Initiator over iSER",
@@ -985,7 +986,8 @@ static struct scsi_host_template iscsi_iser_sht = {
985 .eh_device_reset_handler= iscsi_eh_device_reset, 986 .eh_device_reset_handler= iscsi_eh_device_reset,
986 .eh_target_reset_handler = iscsi_eh_recover_target, 987 .eh_target_reset_handler = iscsi_eh_recover_target,
987 .target_alloc = iscsi_target_alloc, 988 .target_alloc = iscsi_target_alloc,
988 .use_clustering = DISABLE_CLUSTERING, 989 .use_clustering = ENABLE_CLUSTERING,
990 .slave_alloc = iscsi_iser_slave_alloc,
989 .proc_name = "iscsi_iser", 991 .proc_name = "iscsi_iser",
990 .this_id = -1, 992 .this_id = -1,
991 .track_queue_depth = 1, 993 .track_queue_depth = 1,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index a5edd6ede692..8a5998e6a407 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -227,18 +227,13 @@ enum iser_data_dir {
227 * @size: num entries of this sg 227 * @size: num entries of this sg
228 * @data_len: total beffer byte len 228 * @data_len: total beffer byte len
229 * @dma_nents: returned by dma_map_sg 229 * @dma_nents: returned by dma_map_sg
230 * @orig_sg: pointer to the original sg list (in case
231 * we used a copy)
232 * @orig_size: num entris of orig sg list
233 */ 230 */
234struct iser_data_buf { 231struct iser_data_buf {
235 struct scatterlist *sg; 232 struct scatterlist *sg;
236 unsigned int size; 233 int size;
237 unsigned long data_len; 234 unsigned long data_len;
238 unsigned int dma_nents; 235 unsigned int dma_nents;
239 struct scatterlist *orig_sg; 236};
240 unsigned int orig_size;
241 };
242 237
243/* fwd declarations */ 238/* fwd declarations */
244struct iser_device; 239struct iser_device;
@@ -300,7 +295,11 @@ struct iser_tx_desc {
300 int num_sge; 295 int num_sge;
301 bool mapped; 296 bool mapped;
302 u8 wr_idx; 297 u8 wr_idx;
303 struct ib_send_wr wrs[ISER_MAX_WRS]; 298 union iser_wr {
299 struct ib_send_wr send;
300 struct ib_reg_wr fast_reg;
301 struct ib_sig_handover_wr sig;
302 } wrs[ISER_MAX_WRS];
304 struct iser_mem_reg data_reg; 303 struct iser_mem_reg data_reg;
305 struct iser_mem_reg prot_reg; 304 struct iser_mem_reg prot_reg;
306 struct ib_sig_attrs sig_attrs; 305 struct ib_sig_attrs sig_attrs;
@@ -413,7 +412,6 @@ struct iser_device {
413 * 412 *
414 * @mr: memory region 413 * @mr: memory region
415 * @fmr_pool: pool of fmrs 414 * @fmr_pool: pool of fmrs
416 * @frpl: fast reg page list used by frwrs
417 * @page_vec: fast reg page list used by fmr pool 415 * @page_vec: fast reg page list used by fmr pool
418 * @mr_valid: is mr valid indicator 416 * @mr_valid: is mr valid indicator
419 */ 417 */
@@ -422,10 +420,7 @@ struct iser_reg_resources {
422 struct ib_mr *mr; 420 struct ib_mr *mr;
423 struct ib_fmr_pool *fmr_pool; 421 struct ib_fmr_pool *fmr_pool;
424 }; 422 };
425 union { 423 struct iser_page_vec *page_vec;
426 struct ib_fast_reg_page_list *frpl;
427 struct iser_page_vec *page_vec;
428 };
429 u8 mr_valid:1; 424 u8 mr_valid:1;
430}; 425};
431 426
@@ -712,11 +707,11 @@ iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
712static inline struct ib_send_wr * 707static inline struct ib_send_wr *
713iser_tx_next_wr(struct iser_tx_desc *tx_desc) 708iser_tx_next_wr(struct iser_tx_desc *tx_desc)
714{ 709{
715 struct ib_send_wr *cur_wr = &tx_desc->wrs[tx_desc->wr_idx]; 710 struct ib_send_wr *cur_wr = &tx_desc->wrs[tx_desc->wr_idx].send;
716 struct ib_send_wr *last_wr; 711 struct ib_send_wr *last_wr;
717 712
718 if (tx_desc->wr_idx) { 713 if (tx_desc->wr_idx) {
719 last_wr = &tx_desc->wrs[tx_desc->wr_idx - 1]; 714 last_wr = &tx_desc->wrs[tx_desc->wr_idx - 1].send;
720 last_wr->next = cur_wr; 715 last_wr->next = cur_wr;
721 } 716 }
722 tx_desc->wr_idx++; 717 tx_desc->wr_idx++;
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index d511879d8cdf..ffd00c420729 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -661,48 +661,14 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
661 661
662void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) 662void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
663{ 663{
664 int is_rdma_data_aligned = 1;
665 int is_rdma_prot_aligned = 1;
666 int prot_count = scsi_prot_sg_count(iser_task->sc); 664 int prot_count = scsi_prot_sg_count(iser_task->sc);
667 665
668 /* if we were reading, copy back to unaligned sglist,
669 * anyway dma_unmap and free the copy
670 */
671 if (iser_task->data[ISER_DIR_IN].orig_sg) {
672 is_rdma_data_aligned = 0;
673 iser_finalize_rdma_unaligned_sg(iser_task,
674 &iser_task->data[ISER_DIR_IN],
675 ISER_DIR_IN);
676 }
677
678 if (iser_task->data[ISER_DIR_OUT].orig_sg) {
679 is_rdma_data_aligned = 0;
680 iser_finalize_rdma_unaligned_sg(iser_task,
681 &iser_task->data[ISER_DIR_OUT],
682 ISER_DIR_OUT);
683 }
684
685 if (iser_task->prot[ISER_DIR_IN].orig_sg) {
686 is_rdma_prot_aligned = 0;
687 iser_finalize_rdma_unaligned_sg(iser_task,
688 &iser_task->prot[ISER_DIR_IN],
689 ISER_DIR_IN);
690 }
691
692 if (iser_task->prot[ISER_DIR_OUT].orig_sg) {
693 is_rdma_prot_aligned = 0;
694 iser_finalize_rdma_unaligned_sg(iser_task,
695 &iser_task->prot[ISER_DIR_OUT],
696 ISER_DIR_OUT);
697 }
698
699 if (iser_task->dir[ISER_DIR_IN]) { 666 if (iser_task->dir[ISER_DIR_IN]) {
700 iser_unreg_rdma_mem(iser_task, ISER_DIR_IN); 667 iser_unreg_rdma_mem(iser_task, ISER_DIR_IN);
701 if (is_rdma_data_aligned) 668 iser_dma_unmap_task_data(iser_task,
702 iser_dma_unmap_task_data(iser_task, 669 &iser_task->data[ISER_DIR_IN],
703 &iser_task->data[ISER_DIR_IN], 670 DMA_FROM_DEVICE);
704 DMA_FROM_DEVICE); 671 if (prot_count)
705 if (prot_count && is_rdma_prot_aligned)
706 iser_dma_unmap_task_data(iser_task, 672 iser_dma_unmap_task_data(iser_task,
707 &iser_task->prot[ISER_DIR_IN], 673 &iser_task->prot[ISER_DIR_IN],
708 DMA_FROM_DEVICE); 674 DMA_FROM_DEVICE);
@@ -710,11 +676,10 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
710 676
711 if (iser_task->dir[ISER_DIR_OUT]) { 677 if (iser_task->dir[ISER_DIR_OUT]) {
712 iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT); 678 iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT);
713 if (is_rdma_data_aligned) 679 iser_dma_unmap_task_data(iser_task,
714 iser_dma_unmap_task_data(iser_task, 680 &iser_task->data[ISER_DIR_OUT],
715 &iser_task->data[ISER_DIR_OUT], 681 DMA_TO_DEVICE);
716 DMA_TO_DEVICE); 682 if (prot_count)
717 if (prot_count && is_rdma_prot_aligned)
718 iser_dma_unmap_task_data(iser_task, 683 iser_dma_unmap_task_data(iser_task,
719 &iser_task->prot[ISER_DIR_OUT], 684 &iser_task->prot[ISER_DIR_OUT],
720 DMA_TO_DEVICE); 685 DMA_TO_DEVICE);
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 4c46d67d37a1..ea765fb9664d 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -88,113 +88,6 @@ int iser_assign_reg_ops(struct iser_device *device)
88 return 0; 88 return 0;
89} 89}
90 90
91static void
92iser_free_bounce_sg(struct iser_data_buf *data)
93{
94 struct scatterlist *sg;
95 int count;
96
97 for_each_sg(data->sg, sg, data->size, count)
98 __free_page(sg_page(sg));
99
100 kfree(data->sg);
101
102 data->sg = data->orig_sg;
103 data->size = data->orig_size;
104 data->orig_sg = NULL;
105 data->orig_size = 0;
106}
107
108static int
109iser_alloc_bounce_sg(struct iser_data_buf *data)
110{
111 struct scatterlist *sg;
112 struct page *page;
113 unsigned long length = data->data_len;
114 int i = 0, nents = DIV_ROUND_UP(length, PAGE_SIZE);
115
116 sg = kcalloc(nents, sizeof(*sg), GFP_ATOMIC);
117 if (!sg)
118 goto err;
119
120 sg_init_table(sg, nents);
121 while (length) {
122 u32 page_len = min_t(u32, length, PAGE_SIZE);
123
124 page = alloc_page(GFP_ATOMIC);
125 if (!page)
126 goto err;
127
128 sg_set_page(&sg[i], page, page_len, 0);
129 length -= page_len;
130 i++;
131 }
132
133 data->orig_sg = data->sg;
134 data->orig_size = data->size;
135 data->sg = sg;
136 data->size = nents;
137
138 return 0;
139
140err:
141 for (; i > 0; i--)
142 __free_page(sg_page(&sg[i - 1]));
143 kfree(sg);
144
145 return -ENOMEM;
146}
147
148static void
149iser_copy_bounce(struct iser_data_buf *data, bool to_buffer)
150{
151 struct scatterlist *osg, *bsg = data->sg;
152 void *oaddr, *baddr;
153 unsigned int left = data->data_len;
154 unsigned int bsg_off = 0;
155 int i;
156
157 for_each_sg(data->orig_sg, osg, data->orig_size, i) {
158 unsigned int copy_len, osg_off = 0;
159
160 oaddr = kmap_atomic(sg_page(osg)) + osg->offset;
161 copy_len = min(left, osg->length);
162 while (copy_len) {
163 unsigned int len = min(copy_len, bsg->length - bsg_off);
164
165 baddr = kmap_atomic(sg_page(bsg)) + bsg->offset;
166 if (to_buffer)
167 memcpy(baddr + bsg_off, oaddr + osg_off, len);
168 else
169 memcpy(oaddr + osg_off, baddr + bsg_off, len);
170
171 kunmap_atomic(baddr - bsg->offset);
172 osg_off += len;
173 bsg_off += len;
174 copy_len -= len;
175
176 if (bsg_off >= bsg->length) {
177 bsg = sg_next(bsg);
178 bsg_off = 0;
179 }
180 }
181 kunmap_atomic(oaddr - osg->offset);
182 left -= osg_off;
183 }
184}
185
186static inline void
187iser_copy_from_bounce(struct iser_data_buf *data)
188{
189 iser_copy_bounce(data, false);
190}
191
192static inline void
193iser_copy_to_bounce(struct iser_data_buf *data)
194{
195 iser_copy_bounce(data, true);
196}
197
198struct iser_fr_desc * 91struct iser_fr_desc *
199iser_reg_desc_get_fr(struct ib_conn *ib_conn) 92iser_reg_desc_get_fr(struct ib_conn *ib_conn)
200{ 93{
@@ -238,62 +131,6 @@ iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
238{ 131{
239} 132}
240 133
241/**
242 * iser_start_rdma_unaligned_sg
243 */
244static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
245 struct iser_data_buf *data,
246 enum iser_data_dir cmd_dir)
247{
248 struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
249 int rc;
250
251 rc = iser_alloc_bounce_sg(data);
252 if (rc) {
253 iser_err("Failed to allocate bounce for data len %lu\n",
254 data->data_len);
255 return rc;
256 }
257
258 if (cmd_dir == ISER_DIR_OUT)
259 iser_copy_to_bounce(data);
260
261 data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size,
262 (cmd_dir == ISER_DIR_OUT) ?
263 DMA_TO_DEVICE : DMA_FROM_DEVICE);
264 if (!data->dma_nents) {
265 iser_err("Got dma_nents %d, something went wrong...\n",
266 data->dma_nents);
267 rc = -ENOMEM;
268 goto err;
269 }
270
271 return 0;
272err:
273 iser_free_bounce_sg(data);
274 return rc;
275}
276
277/**
278 * iser_finalize_rdma_unaligned_sg
279 */
280
281void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
282 struct iser_data_buf *data,
283 enum iser_data_dir cmd_dir)
284{
285 struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
286
287 ib_dma_unmap_sg(dev, data->sg, data->size,
288 (cmd_dir == ISER_DIR_OUT) ?
289 DMA_TO_DEVICE : DMA_FROM_DEVICE);
290
291 if (cmd_dir == ISER_DIR_IN)
292 iser_copy_from_bounce(data);
293
294 iser_free_bounce_sg(data);
295}
296
297#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0) 134#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
298 135
299/** 136/**
@@ -355,64 +192,6 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
355 return cur_page; 192 return cur_page;
356} 193}
357 194
358
359/**
360 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
361 * for RDMA sub-list of a scatter-gather list of memory buffers, and returns
362 * the number of entries which are aligned correctly. Supports the case where
363 * consecutive SG elements are actually fragments of the same physcial page.
364 */
365static int iser_data_buf_aligned_len(struct iser_data_buf *data,
366 struct ib_device *ibdev,
367 unsigned sg_tablesize)
368{
369 struct scatterlist *sg, *sgl, *next_sg = NULL;
370 u64 start_addr, end_addr;
371 int i, ret_len, start_check = 0;
372
373 if (data->dma_nents == 1)
374 return 1;
375
376 sgl = data->sg;
377 start_addr = ib_sg_dma_address(ibdev, sgl);
378
379 if (unlikely(sgl[0].offset &&
380 data->data_len >= sg_tablesize * PAGE_SIZE)) {
381 iser_dbg("can't register length %lx with offset %x "
382 "fall to bounce buffer\n", data->data_len,
383 sgl[0].offset);
384 return 0;
385 }
386
387 for_each_sg(sgl, sg, data->dma_nents, i) {
388 if (start_check && !IS_4K_ALIGNED(start_addr))
389 break;
390
391 next_sg = sg_next(sg);
392 if (!next_sg)
393 break;
394
395 end_addr = start_addr + ib_sg_dma_len(ibdev, sg);
396 start_addr = ib_sg_dma_address(ibdev, next_sg);
397
398 if (end_addr == start_addr) {
399 start_check = 0;
400 continue;
401 } else
402 start_check = 1;
403
404 if (!IS_4K_ALIGNED(end_addr))
405 break;
406 }
407 ret_len = (next_sg) ? i : i+1;
408
409 if (unlikely(ret_len != data->dma_nents))
410 iser_warn("rdma alignment violation (%d/%d aligned)\n",
411 ret_len, data->dma_nents);
412
413 return ret_len;
414}
415
416static void iser_data_buf_dump(struct iser_data_buf *data, 195static void iser_data_buf_dump(struct iser_data_buf *data,
417 struct ib_device *ibdev) 196 struct ib_device *ibdev)
418{ 197{
@@ -483,31 +262,6 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
483 return 0; 262 return 0;
484} 263}
485 264
486static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
487 struct iser_data_buf *mem,
488 enum iser_data_dir cmd_dir)
489{
490 struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
491 struct iser_device *device = iser_task->iser_conn->ib_conn.device;
492
493 iscsi_conn->fmr_unalign_cnt++;
494
495 if (iser_debug_level > 0)
496 iser_data_buf_dump(mem, device->ib_device);
497
498 /* unmap the command data before accessing it */
499 iser_dma_unmap_task_data(iser_task, mem,
500 (cmd_dir == ISER_DIR_OUT) ?
501 DMA_TO_DEVICE : DMA_FROM_DEVICE);
502
503 /* allocate copy buf, if we are writing, copy the */
504 /* unaligned scatterlist, dma map the copy */
505 if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0)
506 return -ENOMEM;
507
508 return 0;
509}
510
511/** 265/**
512 * iser_reg_page_vec - Register physical memory 266 * iser_reg_page_vec - Register physical memory
513 * 267 *
@@ -683,7 +437,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
683{ 437{
684 struct iser_tx_desc *tx_desc = &iser_task->desc; 438 struct iser_tx_desc *tx_desc = &iser_task->desc;
685 struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs; 439 struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs;
686 struct ib_send_wr *wr; 440 struct ib_sig_handover_wr *wr;
687 int ret; 441 int ret;
688 442
689 memset(sig_attrs, 0, sizeof(*sig_attrs)); 443 memset(sig_attrs, 0, sizeof(*sig_attrs));
@@ -693,26 +447,24 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
693 447
694 iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask); 448 iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask);
695 449
696 if (!pi_ctx->sig_mr_valid) { 450 if (!pi_ctx->sig_mr_valid)
697 wr = iser_tx_next_wr(tx_desc); 451 iser_inv_rkey(iser_tx_next_wr(tx_desc), pi_ctx->sig_mr);
698 iser_inv_rkey(wr, pi_ctx->sig_mr); 452
699 } 453 wr = sig_handover_wr(iser_tx_next_wr(tx_desc));
700 454 wr->wr.opcode = IB_WR_REG_SIG_MR;
701 wr = iser_tx_next_wr(tx_desc); 455 wr->wr.wr_id = ISER_FASTREG_LI_WRID;
702 wr->opcode = IB_WR_REG_SIG_MR; 456 wr->wr.sg_list = &data_reg->sge;
703 wr->wr_id = ISER_FASTREG_LI_WRID; 457 wr->wr.num_sge = 1;
704 wr->sg_list = &data_reg->sge; 458 wr->wr.send_flags = 0;
705 wr->num_sge = 1; 459 wr->sig_attrs = sig_attrs;
706 wr->send_flags = 0; 460 wr->sig_mr = pi_ctx->sig_mr;
707 wr->wr.sig_handover.sig_attrs = sig_attrs;
708 wr->wr.sig_handover.sig_mr = pi_ctx->sig_mr;
709 if (scsi_prot_sg_count(iser_task->sc)) 461 if (scsi_prot_sg_count(iser_task->sc))
710 wr->wr.sig_handover.prot = &prot_reg->sge; 462 wr->prot = &prot_reg->sge;
711 else 463 else
712 wr->wr.sig_handover.prot = NULL; 464 wr->prot = NULL;
713 wr->wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE | 465 wr->access_flags = IB_ACCESS_LOCAL_WRITE |
714 IB_ACCESS_REMOTE_READ | 466 IB_ACCESS_REMOTE_READ |
715 IB_ACCESS_REMOTE_WRITE; 467 IB_ACCESS_REMOTE_WRITE;
716 pi_ctx->sig_mr_valid = 0; 468 pi_ctx->sig_mr_valid = 0;
717 469
718 sig_reg->sge.lkey = pi_ctx->sig_mr->lkey; 470 sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
@@ -720,7 +472,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
720 sig_reg->sge.addr = 0; 472 sig_reg->sge.addr = 0;
721 sig_reg->sge.length = scsi_transfer_length(iser_task->sc); 473 sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
722 474
723 iser_dbg("sig reg: lkey: 0x%x, rkey: 0x%x, addr: 0x%llx, length: %u\n", 475 iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=%u\n",
724 sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr, 476 sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr,
725 sig_reg->sge.length); 477 sig_reg->sge.length);
726err: 478err:
@@ -732,69 +484,41 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
732 struct iser_reg_resources *rsc, 484 struct iser_reg_resources *rsc,
733 struct iser_mem_reg *reg) 485 struct iser_mem_reg *reg)
734{ 486{
735 struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
736 struct iser_device *device = ib_conn->device;
737 struct ib_mr *mr = rsc->mr;
738 struct ib_fast_reg_page_list *frpl = rsc->frpl;
739 struct iser_tx_desc *tx_desc = &iser_task->desc; 487 struct iser_tx_desc *tx_desc = &iser_task->desc;
740 struct ib_send_wr *wr; 488 struct ib_mr *mr = rsc->mr;
741 int offset, size, plen; 489 struct ib_reg_wr *wr;
490 int n;
742 491
743 plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list, 492 if (!rsc->mr_valid)
744 &offset, &size); 493 iser_inv_rkey(iser_tx_next_wr(tx_desc), mr);
745 if (plen * SIZE_4K < size) {
746 iser_err("fast reg page_list too short to hold this SG\n");
747 return -EINVAL;
748 }
749 494
750 if (!rsc->mr_valid) { 495 n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K);
751 wr = iser_tx_next_wr(tx_desc); 496 if (unlikely(n != mem->size)) {
752 iser_inv_rkey(wr, mr); 497 iser_err("failed to map sg (%d/%d)\n",
498 n, mem->size);
499 return n < 0 ? n : -EINVAL;
753 } 500 }
754 501
755 wr = iser_tx_next_wr(tx_desc); 502 wr = reg_wr(iser_tx_next_wr(tx_desc));
756 wr->opcode = IB_WR_FAST_REG_MR; 503 wr->wr.opcode = IB_WR_REG_MR;
757 wr->wr_id = ISER_FASTREG_LI_WRID; 504 wr->wr.wr_id = ISER_FASTREG_LI_WRID;
758 wr->send_flags = 0; 505 wr->wr.send_flags = 0;
759 wr->wr.fast_reg.iova_start = frpl->page_list[0] + offset; 506 wr->wr.num_sge = 0;
760 wr->wr.fast_reg.page_list = frpl; 507 wr->mr = mr;
761 wr->wr.fast_reg.page_list_len = plen; 508 wr->key = mr->rkey;
762 wr->wr.fast_reg.page_shift = SHIFT_4K; 509 wr->access = IB_ACCESS_LOCAL_WRITE |
763 wr->wr.fast_reg.length = size; 510 IB_ACCESS_REMOTE_WRITE |
764 wr->wr.fast_reg.rkey = mr->rkey; 511 IB_ACCESS_REMOTE_READ;
765 wr->wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | 512
766 IB_ACCESS_REMOTE_WRITE |
767 IB_ACCESS_REMOTE_READ);
768 rsc->mr_valid = 0; 513 rsc->mr_valid = 0;
769 514
770 reg->sge.lkey = mr->lkey; 515 reg->sge.lkey = mr->lkey;
771 reg->rkey = mr->rkey; 516 reg->rkey = mr->rkey;
772 reg->sge.addr = frpl->page_list[0] + offset; 517 reg->sge.addr = mr->iova;
773 reg->sge.length = size; 518 reg->sge.length = mr->length;
774 519
775 iser_dbg("fast reg: lkey=0x%x, rkey=0x%x, addr=0x%llx," 520 iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=0x%x\n",
776 " length=0x%x\n", reg->sge.lkey, reg->rkey, 521 reg->sge.lkey, reg->rkey, reg->sge.addr, reg->sge.length);
777 reg->sge.addr, reg->sge.length);
778
779 return 0;
780}
781
782static int
783iser_handle_unaligned_buf(struct iscsi_iser_task *task,
784 struct iser_data_buf *mem,
785 enum iser_data_dir dir)
786{
787 struct iser_conn *iser_conn = task->iser_conn;
788 struct iser_device *device = iser_conn->ib_conn.device;
789 int err, aligned_len;
790
791 aligned_len = iser_data_buf_aligned_len(mem, device->ib_device,
792 iser_conn->scsi_sg_tablesize);
793 if (aligned_len != mem->dma_nents) {
794 err = fall_to_bounce_buf(task, mem, dir);
795 if (err)
796 return err;
797 }
798 522
799 return 0; 523 return 0;
800} 524}
@@ -841,10 +565,6 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
841 bool use_dma_key; 565 bool use_dma_key;
842 int err; 566 int err;
843 567
844 err = iser_handle_unaligned_buf(task, mem, dir);
845 if (unlikely(err))
846 return err;
847
848 use_dma_key = (mem->dma_nents == 1 && !iser_always_reg && 568 use_dma_key = (mem->dma_nents == 1 && !iser_always_reg &&
849 scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL); 569 scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL);
850 570
@@ -867,10 +587,6 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
867 587
868 if (scsi_prot_sg_count(task->sc)) { 588 if (scsi_prot_sg_count(task->sc)) {
869 mem = &task->prot[dir]; 589 mem = &task->prot[dir];
870 err = iser_handle_unaligned_buf(task, mem, dir);
871 if (unlikely(err))
872 goto err_reg;
873
874 err = iser_reg_prot_sg(task, mem, desc, 590 err = iser_reg_prot_sg(task, mem, desc,
875 use_dma_key, prot_reg); 591 use_dma_key, prot_reg);
876 if (unlikely(err)) 592 if (unlikely(err))
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 85132d867bc8..a93070210109 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -293,35 +293,21 @@ iser_alloc_reg_res(struct ib_device *ib_device,
293{ 293{
294 int ret; 294 int ret;
295 295
296 res->frpl = ib_alloc_fast_reg_page_list(ib_device, size);
297 if (IS_ERR(res->frpl)) {
298 ret = PTR_ERR(res->frpl);
299 iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n",
300 ret);
301 return PTR_ERR(res->frpl);
302 }
303
304 res->mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, size); 296 res->mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, size);
305 if (IS_ERR(res->mr)) { 297 if (IS_ERR(res->mr)) {
306 ret = PTR_ERR(res->mr); 298 ret = PTR_ERR(res->mr);
307 iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); 299 iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
308 goto fast_reg_mr_failure; 300 return ret;
309 } 301 }
310 res->mr_valid = 1; 302 res->mr_valid = 1;
311 303
312 return 0; 304 return 0;
313
314fast_reg_mr_failure:
315 ib_free_fast_reg_page_list(res->frpl);
316
317 return ret;
318} 305}
319 306
320static void 307static void
321iser_free_reg_res(struct iser_reg_resources *rsc) 308iser_free_reg_res(struct iser_reg_resources *rsc)
322{ 309{
323 ib_dereg_mr(rsc->mr); 310 ib_dereg_mr(rsc->mr);
324 ib_free_fast_reg_page_list(rsc->frpl);
325} 311}
326 312
327static int 313static int
@@ -1017,7 +1003,7 @@ int iser_connect(struct iser_conn *iser_conn,
1017 ib_conn->beacon.wr_id = ISER_BEACON_WRID; 1003 ib_conn->beacon.wr_id = ISER_BEACON_WRID;
1018 ib_conn->beacon.opcode = IB_WR_SEND; 1004 ib_conn->beacon.opcode = IB_WR_SEND;
1019 1005
1020 ib_conn->cma_id = rdma_create_id(iser_cma_handler, 1006 ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler,
1021 (void *)iser_conn, 1007 (void *)iser_conn,
1022 RDMA_PS_TCP, IB_QPT_RC); 1008 RDMA_PS_TCP, IB_QPT_RC);
1023 if (IS_ERR(ib_conn->cma_id)) { 1009 if (IS_ERR(ib_conn->cma_id)) {
@@ -1135,7 +1121,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
1135 wr->opcode = IB_WR_SEND; 1121 wr->opcode = IB_WR_SEND;
1136 wr->send_flags = signal ? IB_SEND_SIGNALED : 0; 1122 wr->send_flags = signal ? IB_SEND_SIGNALED : 0;
1137 1123
1138 ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0], &bad_wr); 1124 ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0].send, &bad_wr);
1139 if (ib_ret) 1125 if (ib_ret)
1140 iser_err("ib_post_send failed, ret:%d opcode:%d\n", 1126 iser_err("ib_post_send failed, ret:%d opcode:%d\n",
1141 ib_ret, bad_wr->opcode); 1127 ib_ret, bad_wr->opcode);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index aa59037d7504..dfbbbb28090b 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -473,10 +473,8 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn)
473 list_for_each_entry_safe(fr_desc, tmp, 473 list_for_each_entry_safe(fr_desc, tmp,
474 &isert_conn->fr_pool, list) { 474 &isert_conn->fr_pool, list) {
475 list_del(&fr_desc->list); 475 list_del(&fr_desc->list);
476 ib_free_fast_reg_page_list(fr_desc->data_frpl);
477 ib_dereg_mr(fr_desc->data_mr); 476 ib_dereg_mr(fr_desc->data_mr);
478 if (fr_desc->pi_ctx) { 477 if (fr_desc->pi_ctx) {
479 ib_free_fast_reg_page_list(fr_desc->pi_ctx->prot_frpl);
480 ib_dereg_mr(fr_desc->pi_ctx->prot_mr); 478 ib_dereg_mr(fr_desc->pi_ctx->prot_mr);
481 ib_dereg_mr(fr_desc->pi_ctx->sig_mr); 479 ib_dereg_mr(fr_desc->pi_ctx->sig_mr);
482 kfree(fr_desc->pi_ctx); 480 kfree(fr_desc->pi_ctx);
@@ -504,22 +502,13 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc,
504 return -ENOMEM; 502 return -ENOMEM;
505 } 503 }
506 504
507 pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(device,
508 ISCSI_ISER_SG_TABLESIZE);
509 if (IS_ERR(pi_ctx->prot_frpl)) {
510 isert_err("Failed to allocate prot frpl err=%ld\n",
511 PTR_ERR(pi_ctx->prot_frpl));
512 ret = PTR_ERR(pi_ctx->prot_frpl);
513 goto err_pi_ctx;
514 }
515
516 pi_ctx->prot_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 505 pi_ctx->prot_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
517 ISCSI_ISER_SG_TABLESIZE); 506 ISCSI_ISER_SG_TABLESIZE);
518 if (IS_ERR(pi_ctx->prot_mr)) { 507 if (IS_ERR(pi_ctx->prot_mr)) {
519 isert_err("Failed to allocate prot frmr err=%ld\n", 508 isert_err("Failed to allocate prot frmr err=%ld\n",
520 PTR_ERR(pi_ctx->prot_mr)); 509 PTR_ERR(pi_ctx->prot_mr));
521 ret = PTR_ERR(pi_ctx->prot_mr); 510 ret = PTR_ERR(pi_ctx->prot_mr);
522 goto err_prot_frpl; 511 goto err_pi_ctx;
523 } 512 }
524 desc->ind |= ISERT_PROT_KEY_VALID; 513 desc->ind |= ISERT_PROT_KEY_VALID;
525 514
@@ -539,8 +528,6 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc,
539 528
540err_prot_mr: 529err_prot_mr:
541 ib_dereg_mr(pi_ctx->prot_mr); 530 ib_dereg_mr(pi_ctx->prot_mr);
542err_prot_frpl:
543 ib_free_fast_reg_page_list(pi_ctx->prot_frpl);
544err_pi_ctx: 531err_pi_ctx:
545 kfree(pi_ctx); 532 kfree(pi_ctx);
546 533
@@ -551,34 +538,18 @@ static int
551isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd, 538isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd,
552 struct fast_reg_descriptor *fr_desc) 539 struct fast_reg_descriptor *fr_desc)
553{ 540{
554 int ret;
555
556 fr_desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device,
557 ISCSI_ISER_SG_TABLESIZE);
558 if (IS_ERR(fr_desc->data_frpl)) {
559 isert_err("Failed to allocate data frpl err=%ld\n",
560 PTR_ERR(fr_desc->data_frpl));
561 return PTR_ERR(fr_desc->data_frpl);
562 }
563
564 fr_desc->data_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 541 fr_desc->data_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
565 ISCSI_ISER_SG_TABLESIZE); 542 ISCSI_ISER_SG_TABLESIZE);
566 if (IS_ERR(fr_desc->data_mr)) { 543 if (IS_ERR(fr_desc->data_mr)) {
567 isert_err("Failed to allocate data frmr err=%ld\n", 544 isert_err("Failed to allocate data frmr err=%ld\n",
568 PTR_ERR(fr_desc->data_mr)); 545 PTR_ERR(fr_desc->data_mr));
569 ret = PTR_ERR(fr_desc->data_mr); 546 return PTR_ERR(fr_desc->data_mr);
570 goto err_data_frpl;
571 } 547 }
572 fr_desc->ind |= ISERT_DATA_KEY_VALID; 548 fr_desc->ind |= ISERT_DATA_KEY_VALID;
573 549
574 isert_dbg("Created fr_desc %p\n", fr_desc); 550 isert_dbg("Created fr_desc %p\n", fr_desc);
575 551
576 return 0; 552 return 0;
577
578err_data_frpl:
579 ib_free_fast_reg_page_list(fr_desc->data_frpl);
580
581 return ret;
582} 553}
583 554
584static int 555static int
@@ -1579,7 +1550,6 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
1579 struct iser_hdr *iser_hdr = &rx_desc->iser_header; 1550 struct iser_hdr *iser_hdr = &rx_desc->iser_header;
1580 uint64_t read_va = 0, write_va = 0; 1551 uint64_t read_va = 0, write_va = 0;
1581 uint32_t read_stag = 0, write_stag = 0; 1552 uint32_t read_stag = 0, write_stag = 0;
1582 int rc;
1583 1553
1584 switch (iser_hdr->flags & 0xF0) { 1554 switch (iser_hdr->flags & 0xF0) {
1585 case ISCSI_CTRL: 1555 case ISCSI_CTRL:
@@ -1606,8 +1576,8 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
1606 break; 1576 break;
1607 } 1577 }
1608 1578
1609 rc = isert_rx_opcode(isert_conn, rx_desc, 1579 isert_rx_opcode(isert_conn, rx_desc,
1610 read_stag, read_va, write_stag, write_va); 1580 read_stag, read_va, write_stag, write_va);
1611} 1581}
1612 1582
1613static void 1583static void
@@ -1716,10 +1686,10 @@ isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
1716 isert_unmap_data_buf(isert_conn, &wr->data); 1686 isert_unmap_data_buf(isert_conn, &wr->data);
1717 } 1687 }
1718 1688
1719 if (wr->send_wr) { 1689 if (wr->rdma_wr) {
1720 isert_dbg("Cmd %p free send_wr\n", isert_cmd); 1690 isert_dbg("Cmd %p free send_wr\n", isert_cmd);
1721 kfree(wr->send_wr); 1691 kfree(wr->rdma_wr);
1722 wr->send_wr = NULL; 1692 wr->rdma_wr = NULL;
1723 } 1693 }
1724 1694
1725 if (wr->ib_sge) { 1695 if (wr->ib_sge) {
@@ -1754,7 +1724,7 @@ isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
1754 } 1724 }
1755 1725
1756 wr->ib_sge = NULL; 1726 wr->ib_sge = NULL;
1757 wr->send_wr = NULL; 1727 wr->rdma_wr = NULL;
1758} 1728}
1759 1729
1760static void 1730static void
@@ -1923,7 +1893,7 @@ isert_completion_rdma_write(struct iser_tx_desc *tx_desc,
1923 } 1893 }
1924 1894
1925 device->unreg_rdma_mem(isert_cmd, isert_conn); 1895 device->unreg_rdma_mem(isert_cmd, isert_conn);
1926 wr->send_wr_num = 0; 1896 wr->rdma_wr_num = 0;
1927 if (ret) 1897 if (ret)
1928 transport_send_check_condition_and_sense(se_cmd, 1898 transport_send_check_condition_and_sense(se_cmd,
1929 se_cmd->pi_err, 0); 1899 se_cmd->pi_err, 0);
@@ -1951,7 +1921,7 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc,
1951 iscsit_stop_dataout_timer(cmd); 1921 iscsit_stop_dataout_timer(cmd);
1952 device->unreg_rdma_mem(isert_cmd, isert_conn); 1922 device->unreg_rdma_mem(isert_cmd, isert_conn);
1953 cmd->write_data_done = wr->data.len; 1923 cmd->write_data_done = wr->data.len;
1954 wr->send_wr_num = 0; 1924 wr->rdma_wr_num = 0;
1955 1925
1956 isert_dbg("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd); 1926 isert_dbg("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd);
1957 spin_lock_bh(&cmd->istate_lock); 1927 spin_lock_bh(&cmd->istate_lock);
@@ -2403,7 +2373,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
2403 2373
2404static int 2374static int
2405isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, 2375isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
2406 struct ib_sge *ib_sge, struct ib_send_wr *send_wr, 2376 struct ib_sge *ib_sge, struct ib_rdma_wr *rdma_wr,
2407 u32 data_left, u32 offset) 2377 u32 data_left, u32 offset)
2408{ 2378{
2409 struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; 2379 struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
@@ -2418,8 +2388,8 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
2418 sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, isert_conn->max_sge); 2388 sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, isert_conn->max_sge);
2419 page_off = offset % PAGE_SIZE; 2389 page_off = offset % PAGE_SIZE;
2420 2390
2421 send_wr->sg_list = ib_sge; 2391 rdma_wr->wr.sg_list = ib_sge;
2422 send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc; 2392 rdma_wr->wr.wr_id = (uintptr_t)&isert_cmd->tx_desc;
2423 /* 2393 /*
2424 * Perform mapping of TCM scatterlist memory ib_sge dma_addr. 2394 * Perform mapping of TCM scatterlist memory ib_sge dma_addr.
2425 */ 2395 */
@@ -2444,11 +2414,11 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
2444 isert_dbg("Incrementing ib_sge pointer to %p\n", ib_sge); 2414 isert_dbg("Incrementing ib_sge pointer to %p\n", ib_sge);
2445 } 2415 }
2446 2416
2447 send_wr->num_sge = ++i; 2417 rdma_wr->wr.num_sge = ++i;
2448 isert_dbg("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n", 2418 isert_dbg("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n",
2449 send_wr->sg_list, send_wr->num_sge); 2419 rdma_wr->wr.sg_list, rdma_wr->wr.num_sge);
2450 2420
2451 return send_wr->num_sge; 2421 return rdma_wr->wr.num_sge;
2452} 2422}
2453 2423
2454static int 2424static int
@@ -2459,7 +2429,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
2459 struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); 2429 struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
2460 struct isert_conn *isert_conn = conn->context; 2430 struct isert_conn *isert_conn = conn->context;
2461 struct isert_data_buf *data = &wr->data; 2431 struct isert_data_buf *data = &wr->data;
2462 struct ib_send_wr *send_wr; 2432 struct ib_rdma_wr *rdma_wr;
2463 struct ib_sge *ib_sge; 2433 struct ib_sge *ib_sge;
2464 u32 offset, data_len, data_left, rdma_write_max, va_offset = 0; 2434 u32 offset, data_len, data_left, rdma_write_max, va_offset = 0;
2465 int ret = 0, i, ib_sge_cnt; 2435 int ret = 0, i, ib_sge_cnt;
@@ -2484,11 +2454,11 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
2484 } 2454 }
2485 wr->ib_sge = ib_sge; 2455 wr->ib_sge = ib_sge;
2486 2456
2487 wr->send_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge); 2457 wr->rdma_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge);
2488 wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num, 2458 wr->rdma_wr = kzalloc(sizeof(struct ib_rdma_wr) * wr->rdma_wr_num,
2489 GFP_KERNEL); 2459 GFP_KERNEL);
2490 if (!wr->send_wr) { 2460 if (!wr->rdma_wr) {
2491 isert_dbg("Unable to allocate wr->send_wr\n"); 2461 isert_dbg("Unable to allocate wr->rdma_wr\n");
2492 ret = -ENOMEM; 2462 ret = -ENOMEM;
2493 goto unmap_cmd; 2463 goto unmap_cmd;
2494 } 2464 }
@@ -2496,31 +2466,31 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
2496 wr->isert_cmd = isert_cmd; 2466 wr->isert_cmd = isert_cmd;
2497 rdma_write_max = isert_conn->max_sge * PAGE_SIZE; 2467 rdma_write_max = isert_conn->max_sge * PAGE_SIZE;
2498 2468
2499 for (i = 0; i < wr->send_wr_num; i++) { 2469 for (i = 0; i < wr->rdma_wr_num; i++) {
2500 send_wr = &isert_cmd->rdma_wr.send_wr[i]; 2470 rdma_wr = &isert_cmd->rdma_wr.rdma_wr[i];
2501 data_len = min(data_left, rdma_write_max); 2471 data_len = min(data_left, rdma_write_max);
2502 2472
2503 send_wr->send_flags = 0; 2473 rdma_wr->wr.send_flags = 0;
2504 if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { 2474 if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
2505 send_wr->opcode = IB_WR_RDMA_WRITE; 2475 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
2506 send_wr->wr.rdma.remote_addr = isert_cmd->read_va + offset; 2476 rdma_wr->remote_addr = isert_cmd->read_va + offset;
2507 send_wr->wr.rdma.rkey = isert_cmd->read_stag; 2477 rdma_wr->rkey = isert_cmd->read_stag;
2508 if (i + 1 == wr->send_wr_num) 2478 if (i + 1 == wr->rdma_wr_num)
2509 send_wr->next = &isert_cmd->tx_desc.send_wr; 2479 rdma_wr->wr.next = &isert_cmd->tx_desc.send_wr;
2510 else 2480 else
2511 send_wr->next = &wr->send_wr[i + 1]; 2481 rdma_wr->wr.next = &wr->rdma_wr[i + 1].wr;
2512 } else { 2482 } else {
2513 send_wr->opcode = IB_WR_RDMA_READ; 2483 rdma_wr->wr.opcode = IB_WR_RDMA_READ;
2514 send_wr->wr.rdma.remote_addr = isert_cmd->write_va + va_offset; 2484 rdma_wr->remote_addr = isert_cmd->write_va + va_offset;
2515 send_wr->wr.rdma.rkey = isert_cmd->write_stag; 2485 rdma_wr->rkey = isert_cmd->write_stag;
2516 if (i + 1 == wr->send_wr_num) 2486 if (i + 1 == wr->rdma_wr_num)
2517 send_wr->send_flags = IB_SEND_SIGNALED; 2487 rdma_wr->wr.send_flags = IB_SEND_SIGNALED;
2518 else 2488 else
2519 send_wr->next = &wr->send_wr[i + 1]; 2489 rdma_wr->wr.next = &wr->rdma_wr[i + 1].wr;
2520 } 2490 }
2521 2491
2522 ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge, 2492 ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge,
2523 send_wr, data_len, offset); 2493 rdma_wr, data_len, offset);
2524 ib_sge += ib_sge_cnt; 2494 ib_sge += ib_sge_cnt;
2525 2495
2526 offset += data_len; 2496 offset += data_len;
@@ -2535,45 +2505,6 @@ unmap_cmd:
2535 return ret; 2505 return ret;
2536} 2506}
2537 2507
2538static int
2539isert_map_fr_pagelist(struct ib_device *ib_dev,
2540 struct scatterlist *sg_start, int sg_nents, u64 *fr_pl)
2541{
2542 u64 start_addr, end_addr, page, chunk_start = 0;
2543 struct scatterlist *tmp_sg;
2544 int i = 0, new_chunk, last_ent, n_pages;
2545
2546 n_pages = 0;
2547 new_chunk = 1;
2548 last_ent = sg_nents - 1;
2549 for_each_sg(sg_start, tmp_sg, sg_nents, i) {
2550 start_addr = ib_sg_dma_address(ib_dev, tmp_sg);
2551 if (new_chunk)
2552 chunk_start = start_addr;
2553 end_addr = start_addr + ib_sg_dma_len(ib_dev, tmp_sg);
2554
2555 isert_dbg("SGL[%d] dma_addr: 0x%llx len: %u\n",
2556 i, (unsigned long long)tmp_sg->dma_address,
2557 tmp_sg->length);
2558
2559 if ((end_addr & ~PAGE_MASK) && i < last_ent) {
2560 new_chunk = 0;
2561 continue;
2562 }
2563 new_chunk = 1;
2564
2565 page = chunk_start & PAGE_MASK;
2566 do {
2567 fr_pl[n_pages++] = page;
2568 isert_dbg("Mapped page_list[%d] page_addr: 0x%llx\n",
2569 n_pages - 1, page);
2570 page += PAGE_SIZE;
2571 } while (page < end_addr);
2572 }
2573
2574 return n_pages;
2575}
2576
2577static inline void 2508static inline void
2578isert_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr) 2509isert_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
2579{ 2510{
@@ -2599,11 +2530,9 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
2599 struct isert_device *device = isert_conn->device; 2530 struct isert_device *device = isert_conn->device;
2600 struct ib_device *ib_dev = device->ib_device; 2531 struct ib_device *ib_dev = device->ib_device;
2601 struct ib_mr *mr; 2532 struct ib_mr *mr;
2602 struct ib_fast_reg_page_list *frpl; 2533 struct ib_reg_wr reg_wr;
2603 struct ib_send_wr fr_wr, inv_wr; 2534 struct ib_send_wr inv_wr, *bad_wr, *wr = NULL;
2604 struct ib_send_wr *bad_wr, *wr = NULL; 2535 int ret, n;
2605 int ret, pagelist_len;
2606 u32 page_off;
2607 2536
2608 if (mem->dma_nents == 1) { 2537 if (mem->dma_nents == 1) {
2609 sge->lkey = device->pd->local_dma_lkey; 2538 sge->lkey = device->pd->local_dma_lkey;
@@ -2614,45 +2543,41 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
2614 return 0; 2543 return 0;
2615 } 2544 }
2616 2545
2617 if (ind == ISERT_DATA_KEY_VALID) { 2546 if (ind == ISERT_DATA_KEY_VALID)
2618 /* Registering data buffer */ 2547 /* Registering data buffer */
2619 mr = fr_desc->data_mr; 2548 mr = fr_desc->data_mr;
2620 frpl = fr_desc->data_frpl; 2549 else
2621 } else {
2622 /* Registering protection buffer */ 2550 /* Registering protection buffer */
2623 mr = fr_desc->pi_ctx->prot_mr; 2551 mr = fr_desc->pi_ctx->prot_mr;
2624 frpl = fr_desc->pi_ctx->prot_frpl;
2625 }
2626
2627 page_off = mem->offset % PAGE_SIZE;
2628
2629 isert_dbg("Use fr_desc %p sg_nents %d offset %u\n",
2630 fr_desc, mem->nents, mem->offset);
2631
2632 pagelist_len = isert_map_fr_pagelist(ib_dev, mem->sg, mem->nents,
2633 &frpl->page_list[0]);
2634 2552
2635 if (!(fr_desc->ind & ind)) { 2553 if (!(fr_desc->ind & ind)) {
2636 isert_inv_rkey(&inv_wr, mr); 2554 isert_inv_rkey(&inv_wr, mr);
2637 wr = &inv_wr; 2555 wr = &inv_wr;
2638 } 2556 }
2639 2557
2640 /* Prepare FASTREG WR */ 2558 n = ib_map_mr_sg(mr, mem->sg, mem->nents, PAGE_SIZE);
2641 memset(&fr_wr, 0, sizeof(fr_wr)); 2559 if (unlikely(n != mem->nents)) {
2642 fr_wr.wr_id = ISER_FASTREG_LI_WRID; 2560 isert_err("failed to map mr sg (%d/%d)\n",
2643 fr_wr.opcode = IB_WR_FAST_REG_MR; 2561 n, mem->nents);
2644 fr_wr.wr.fast_reg.iova_start = frpl->page_list[0] + page_off; 2562 return n < 0 ? n : -EINVAL;
2645 fr_wr.wr.fast_reg.page_list = frpl; 2563 }
2646 fr_wr.wr.fast_reg.page_list_len = pagelist_len; 2564
2647 fr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; 2565 isert_dbg("Use fr_desc %p sg_nents %d offset %u\n",
2648 fr_wr.wr.fast_reg.length = mem->len; 2566 fr_desc, mem->nents, mem->offset);
2649 fr_wr.wr.fast_reg.rkey = mr->rkey; 2567
2650 fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE; 2568 reg_wr.wr.next = NULL;
2569 reg_wr.wr.opcode = IB_WR_REG_MR;
2570 reg_wr.wr.wr_id = ISER_FASTREG_LI_WRID;
2571 reg_wr.wr.send_flags = 0;
2572 reg_wr.wr.num_sge = 0;
2573 reg_wr.mr = mr;
2574 reg_wr.key = mr->lkey;
2575 reg_wr.access = IB_ACCESS_LOCAL_WRITE;
2651 2576
2652 if (!wr) 2577 if (!wr)
2653 wr = &fr_wr; 2578 wr = &reg_wr.wr;
2654 else 2579 else
2655 wr->next = &fr_wr; 2580 wr->next = &reg_wr.wr;
2656 2581
2657 ret = ib_post_send(isert_conn->qp, wr, &bad_wr); 2582 ret = ib_post_send(isert_conn->qp, wr, &bad_wr);
2658 if (ret) { 2583 if (ret) {
@@ -2662,8 +2587,8 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
2662 fr_desc->ind &= ~ind; 2587 fr_desc->ind &= ~ind;
2663 2588
2664 sge->lkey = mr->lkey; 2589 sge->lkey = mr->lkey;
2665 sge->addr = frpl->page_list[0] + page_off; 2590 sge->addr = mr->iova;
2666 sge->length = mem->len; 2591 sge->length = mr->length;
2667 2592
2668 isert_dbg("sge: addr: 0x%llx length: %u lkey: %x\n", 2593 isert_dbg("sge: addr: 0x%llx length: %u lkey: %x\n",
2669 sge->addr, sge->length, sge->lkey); 2594 sge->addr, sge->length, sge->lkey);
@@ -2733,8 +2658,8 @@ isert_reg_sig_mr(struct isert_conn *isert_conn,
2733 struct isert_rdma_wr *rdma_wr, 2658 struct isert_rdma_wr *rdma_wr,
2734 struct fast_reg_descriptor *fr_desc) 2659 struct fast_reg_descriptor *fr_desc)
2735{ 2660{
2736 struct ib_send_wr sig_wr, inv_wr; 2661 struct ib_sig_handover_wr sig_wr;
2737 struct ib_send_wr *bad_wr, *wr = NULL; 2662 struct ib_send_wr inv_wr, *bad_wr, *wr = NULL;
2738 struct pi_context *pi_ctx = fr_desc->pi_ctx; 2663 struct pi_context *pi_ctx = fr_desc->pi_ctx;
2739 struct ib_sig_attrs sig_attrs; 2664 struct ib_sig_attrs sig_attrs;
2740 int ret; 2665 int ret;
@@ -2752,20 +2677,20 @@ isert_reg_sig_mr(struct isert_conn *isert_conn,
2752 } 2677 }
2753 2678
2754 memset(&sig_wr, 0, sizeof(sig_wr)); 2679 memset(&sig_wr, 0, sizeof(sig_wr));
2755 sig_wr.opcode = IB_WR_REG_SIG_MR; 2680 sig_wr.wr.opcode = IB_WR_REG_SIG_MR;
2756 sig_wr.wr_id = ISER_FASTREG_LI_WRID; 2681 sig_wr.wr.wr_id = ISER_FASTREG_LI_WRID;
2757 sig_wr.sg_list = &rdma_wr->ib_sg[DATA]; 2682 sig_wr.wr.sg_list = &rdma_wr->ib_sg[DATA];
2758 sig_wr.num_sge = 1; 2683 sig_wr.wr.num_sge = 1;
2759 sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE; 2684 sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE;
2760 sig_wr.wr.sig_handover.sig_attrs = &sig_attrs; 2685 sig_wr.sig_attrs = &sig_attrs;
2761 sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr; 2686 sig_wr.sig_mr = pi_ctx->sig_mr;
2762 if (se_cmd->t_prot_sg) 2687 if (se_cmd->t_prot_sg)
2763 sig_wr.wr.sig_handover.prot = &rdma_wr->ib_sg[PROT]; 2688 sig_wr.prot = &rdma_wr->ib_sg[PROT];
2764 2689
2765 if (!wr) 2690 if (!wr)
2766 wr = &sig_wr; 2691 wr = &sig_wr.wr;
2767 else 2692 else
2768 wr->next = &sig_wr; 2693 wr->next = &sig_wr.wr;
2769 2694
2770 ret = ib_post_send(isert_conn->qp, wr, &bad_wr); 2695 ret = ib_post_send(isert_conn->qp, wr, &bad_wr);
2771 if (ret) { 2696 if (ret) {
@@ -2859,7 +2784,7 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
2859 struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); 2784 struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
2860 struct isert_conn *isert_conn = conn->context; 2785 struct isert_conn *isert_conn = conn->context;
2861 struct fast_reg_descriptor *fr_desc = NULL; 2786 struct fast_reg_descriptor *fr_desc = NULL;
2862 struct ib_send_wr *send_wr; 2787 struct ib_rdma_wr *rdma_wr;
2863 struct ib_sge *ib_sg; 2788 struct ib_sge *ib_sg;
2864 u32 offset; 2789 u32 offset;
2865 int ret = 0; 2790 int ret = 0;
@@ -2900,26 +2825,26 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
2900 2825
2901 memcpy(&wr->s_ib_sge, ib_sg, sizeof(*ib_sg)); 2826 memcpy(&wr->s_ib_sge, ib_sg, sizeof(*ib_sg));
2902 wr->ib_sge = &wr->s_ib_sge; 2827 wr->ib_sge = &wr->s_ib_sge;
2903 wr->send_wr_num = 1; 2828 wr->rdma_wr_num = 1;
2904 memset(&wr->s_send_wr, 0, sizeof(*send_wr)); 2829 memset(&wr->s_rdma_wr, 0, sizeof(wr->s_rdma_wr));
2905 wr->send_wr = &wr->s_send_wr; 2830 wr->rdma_wr = &wr->s_rdma_wr;
2906 wr->isert_cmd = isert_cmd; 2831 wr->isert_cmd = isert_cmd;
2907 2832
2908 send_wr = &isert_cmd->rdma_wr.s_send_wr; 2833 rdma_wr = &isert_cmd->rdma_wr.s_rdma_wr;
2909 send_wr->sg_list = &wr->s_ib_sge; 2834 rdma_wr->wr.sg_list = &wr->s_ib_sge;
2910 send_wr->num_sge = 1; 2835 rdma_wr->wr.num_sge = 1;
2911 send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc; 2836 rdma_wr->wr.wr_id = (uintptr_t)&isert_cmd->tx_desc;
2912 if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { 2837 if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
2913 send_wr->opcode = IB_WR_RDMA_WRITE; 2838 rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
2914 send_wr->wr.rdma.remote_addr = isert_cmd->read_va; 2839 rdma_wr->remote_addr = isert_cmd->read_va;
2915 send_wr->wr.rdma.rkey = isert_cmd->read_stag; 2840 rdma_wr->rkey = isert_cmd->read_stag;
2916 send_wr->send_flags = !isert_prot_cmd(isert_conn, se_cmd) ? 2841 rdma_wr->wr.send_flags = !isert_prot_cmd(isert_conn, se_cmd) ?
2917 0 : IB_SEND_SIGNALED; 2842 0 : IB_SEND_SIGNALED;
2918 } else { 2843 } else {
2919 send_wr->opcode = IB_WR_RDMA_READ; 2844 rdma_wr->wr.opcode = IB_WR_RDMA_READ;
2920 send_wr->wr.rdma.remote_addr = isert_cmd->write_va; 2845 rdma_wr->remote_addr = isert_cmd->write_va;
2921 send_wr->wr.rdma.rkey = isert_cmd->write_stag; 2846 rdma_wr->rkey = isert_cmd->write_stag;
2922 send_wr->send_flags = IB_SEND_SIGNALED; 2847 rdma_wr->wr.send_flags = IB_SEND_SIGNALED;
2923 } 2848 }
2924 2849
2925 return 0; 2850 return 0;
@@ -2967,8 +2892,8 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
2967 isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); 2892 isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
2968 isert_init_send_wr(isert_conn, isert_cmd, 2893 isert_init_send_wr(isert_conn, isert_cmd,
2969 &isert_cmd->tx_desc.send_wr); 2894 &isert_cmd->tx_desc.send_wr);
2970 isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr; 2895 isert_cmd->rdma_wr.s_rdma_wr.wr.next = &isert_cmd->tx_desc.send_wr;
2971 wr->send_wr_num += 1; 2896 wr->rdma_wr_num += 1;
2972 2897
2973 rc = isert_post_recv(isert_conn, isert_cmd->rx_desc); 2898 rc = isert_post_recv(isert_conn, isert_cmd->rx_desc);
2974 if (rc) { 2899 if (rc) {
@@ -2977,7 +2902,7 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
2977 } 2902 }
2978 } 2903 }
2979 2904
2980 rc = ib_post_send(isert_conn->qp, wr->send_wr, &wr_failed); 2905 rc = ib_post_send(isert_conn->qp, &wr->rdma_wr->wr, &wr_failed);
2981 if (rc) 2906 if (rc)
2982 isert_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n"); 2907 isert_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
2983 2908
@@ -3011,7 +2936,7 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
3011 return rc; 2936 return rc;
3012 } 2937 }
3013 2938
3014 rc = ib_post_send(isert_conn->qp, wr->send_wr, &wr_failed); 2939 rc = ib_post_send(isert_conn->qp, &wr->rdma_wr->wr, &wr_failed);
3015 if (rc) 2940 if (rc)
3016 isert_warn("ib_post_send() failed for IB_WR_RDMA_READ\n"); 2941 isert_warn("ib_post_send() failed for IB_WR_RDMA_READ\n");
3017 2942
@@ -3097,7 +3022,7 @@ isert_setup_id(struct isert_np *isert_np)
3097 sa = (struct sockaddr *)&np->np_sockaddr; 3022 sa = (struct sockaddr *)&np->np_sockaddr;
3098 isert_dbg("ksockaddr: %p, sa: %p\n", &np->np_sockaddr, sa); 3023 isert_dbg("ksockaddr: %p, sa: %p\n", &np->np_sockaddr, sa);
3099 3024
3100 id = rdma_create_id(isert_cma_handler, isert_np, 3025 id = rdma_create_id(&init_net, isert_cma_handler, isert_np,
3101 RDMA_PS_TCP, IB_QPT_RC); 3026 RDMA_PS_TCP, IB_QPT_RC);
3102 if (IS_ERR(id)) { 3027 if (IS_ERR(id)) {
3103 isert_err("rdma_create_id() failed: %ld\n", PTR_ERR(id)); 3028 isert_err("rdma_create_id() failed: %ld\n", PTR_ERR(id));
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index c5b99bcecbcf..3d7fbc47c343 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -84,14 +84,12 @@ enum isert_indicator {
84 84
85struct pi_context { 85struct pi_context {
86 struct ib_mr *prot_mr; 86 struct ib_mr *prot_mr;
87 struct ib_fast_reg_page_list *prot_frpl;
88 struct ib_mr *sig_mr; 87 struct ib_mr *sig_mr;
89}; 88};
90 89
91struct fast_reg_descriptor { 90struct fast_reg_descriptor {
92 struct list_head list; 91 struct list_head list;
93 struct ib_mr *data_mr; 92 struct ib_mr *data_mr;
94 struct ib_fast_reg_page_list *data_frpl;
95 u8 ind; 93 u8 ind;
96 struct pi_context *pi_ctx; 94 struct pi_context *pi_ctx;
97}; 95};
@@ -117,9 +115,9 @@ struct isert_rdma_wr {
117 enum iser_ib_op_code iser_ib_op; 115 enum iser_ib_op_code iser_ib_op;
118 struct ib_sge *ib_sge; 116 struct ib_sge *ib_sge;
119 struct ib_sge s_ib_sge; 117 struct ib_sge s_ib_sge;
120 int send_wr_num; 118 int rdma_wr_num;
121 struct ib_send_wr *send_wr; 119 struct ib_rdma_wr *rdma_wr;
122 struct ib_send_wr s_send_wr; 120 struct ib_rdma_wr s_rdma_wr;
123 struct ib_sge ib_sg[3]; 121 struct ib_sge ib_sg[3];
124 struct isert_data_buf data; 122 struct isert_data_buf data;
125 struct isert_data_buf prot; 123 struct isert_data_buf prot;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index b481490ad257..32f79624dd28 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -340,8 +340,6 @@ static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
340 return; 340 return;
341 341
342 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { 342 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
343 if (d->frpl)
344 ib_free_fast_reg_page_list(d->frpl);
345 if (d->mr) 343 if (d->mr)
346 ib_dereg_mr(d->mr); 344 ib_dereg_mr(d->mr);
347 } 345 }
@@ -362,7 +360,6 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
362 struct srp_fr_pool *pool; 360 struct srp_fr_pool *pool;
363 struct srp_fr_desc *d; 361 struct srp_fr_desc *d;
364 struct ib_mr *mr; 362 struct ib_mr *mr;
365 struct ib_fast_reg_page_list *frpl;
366 int i, ret = -EINVAL; 363 int i, ret = -EINVAL;
367 364
368 if (pool_size <= 0) 365 if (pool_size <= 0)
@@ -385,12 +382,6 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
385 goto destroy_pool; 382 goto destroy_pool;
386 } 383 }
387 d->mr = mr; 384 d->mr = mr;
388 frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len);
389 if (IS_ERR(frpl)) {
390 ret = PTR_ERR(frpl);
391 goto destroy_pool;
392 }
393 d->frpl = frpl;
394 list_add_tail(&d->entry, &pool->free_list); 385 list_add_tail(&d->entry, &pool->free_list);
395 } 386 }
396 387
@@ -849,11 +840,12 @@ static void srp_free_req_data(struct srp_target_port *target,
849 840
850 for (i = 0; i < target->req_ring_size; ++i) { 841 for (i = 0; i < target->req_ring_size; ++i) {
851 req = &ch->req_ring[i]; 842 req = &ch->req_ring[i];
852 if (dev->use_fast_reg) 843 if (dev->use_fast_reg) {
853 kfree(req->fr_list); 844 kfree(req->fr_list);
854 else 845 } else {
855 kfree(req->fmr_list); 846 kfree(req->fmr_list);
856 kfree(req->map_page); 847 kfree(req->map_page);
848 }
857 if (req->indirect_dma_addr) { 849 if (req->indirect_dma_addr) {
858 ib_dma_unmap_single(ibdev, req->indirect_dma_addr, 850 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
859 target->indirect_size, 851 target->indirect_size,
@@ -887,14 +879,15 @@ static int srp_alloc_req_data(struct srp_rdma_ch *ch)
887 GFP_KERNEL); 879 GFP_KERNEL);
888 if (!mr_list) 880 if (!mr_list)
889 goto out; 881 goto out;
890 if (srp_dev->use_fast_reg) 882 if (srp_dev->use_fast_reg) {
891 req->fr_list = mr_list; 883 req->fr_list = mr_list;
892 else 884 } else {
893 req->fmr_list = mr_list; 885 req->fmr_list = mr_list;
894 req->map_page = kmalloc(srp_dev->max_pages_per_mr * 886 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
895 sizeof(void *), GFP_KERNEL); 887 sizeof(void *), GFP_KERNEL);
896 if (!req->map_page) 888 if (!req->map_page)
897 goto out; 889 goto out;
890 }
898 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); 891 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
899 if (!req->indirect_desc) 892 if (!req->indirect_desc)
900 goto out; 893 goto out;
@@ -1286,6 +1279,17 @@ static int srp_map_finish_fmr(struct srp_map_state *state,
1286 if (state->fmr.next >= state->fmr.end) 1279 if (state->fmr.next >= state->fmr.end)
1287 return -ENOMEM; 1280 return -ENOMEM;
1288 1281
1282 WARN_ON_ONCE(!dev->use_fmr);
1283
1284 if (state->npages == 0)
1285 return 0;
1286
1287 if (state->npages == 1 && target->global_mr) {
1288 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1289 target->global_mr->rkey);
1290 goto reset_state;
1291 }
1292
1289 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages, 1293 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1290 state->npages, io_addr); 1294 state->npages, io_addr);
1291 if (IS_ERR(fmr)) 1295 if (IS_ERR(fmr))
@@ -1297,6 +1301,10 @@ static int srp_map_finish_fmr(struct srp_map_state *state,
1297 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask, 1301 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1298 state->dma_len, fmr->fmr->rkey); 1302 state->dma_len, fmr->fmr->rkey);
1299 1303
1304reset_state:
1305 state->npages = 0;
1306 state->dma_len = 0;
1307
1300 return 0; 1308 return 0;
1301} 1309}
1302 1310
@@ -1306,13 +1314,26 @@ static int srp_map_finish_fr(struct srp_map_state *state,
1306 struct srp_target_port *target = ch->target; 1314 struct srp_target_port *target = ch->target;
1307 struct srp_device *dev = target->srp_host->srp_dev; 1315 struct srp_device *dev = target->srp_host->srp_dev;
1308 struct ib_send_wr *bad_wr; 1316 struct ib_send_wr *bad_wr;
1309 struct ib_send_wr wr; 1317 struct ib_reg_wr wr;
1310 struct srp_fr_desc *desc; 1318 struct srp_fr_desc *desc;
1311 u32 rkey; 1319 u32 rkey;
1320 int n, err;
1312 1321
1313 if (state->fr.next >= state->fr.end) 1322 if (state->fr.next >= state->fr.end)
1314 return -ENOMEM; 1323 return -ENOMEM;
1315 1324
1325 WARN_ON_ONCE(!dev->use_fast_reg);
1326
1327 if (state->sg_nents == 0)
1328 return 0;
1329
1330 if (state->sg_nents == 1 && target->global_mr) {
1331 srp_map_desc(state, sg_dma_address(state->sg),
1332 sg_dma_len(state->sg),
1333 target->global_mr->rkey);
1334 return 1;
1335 }
1336
1316 desc = srp_fr_pool_get(ch->fr_pool); 1337 desc = srp_fr_pool_get(ch->fr_pool);
1317 if (!desc) 1338 if (!desc)
1318 return -ENOMEM; 1339 return -ENOMEM;
@@ -1320,56 +1341,33 @@ static int srp_map_finish_fr(struct srp_map_state *state,
1320 rkey = ib_inc_rkey(desc->mr->rkey); 1341 rkey = ib_inc_rkey(desc->mr->rkey);
1321 ib_update_fast_reg_key(desc->mr, rkey); 1342 ib_update_fast_reg_key(desc->mr, rkey);
1322 1343
1323 memcpy(desc->frpl->page_list, state->pages, 1344 n = ib_map_mr_sg(desc->mr, state->sg, state->sg_nents,
1324 sizeof(state->pages[0]) * state->npages); 1345 dev->mr_page_size);
1325 1346 if (unlikely(n < 0))
1326 memset(&wr, 0, sizeof(wr)); 1347 return n;
1327 wr.opcode = IB_WR_FAST_REG_MR; 1348
1328 wr.wr_id = FAST_REG_WR_ID_MASK; 1349 wr.wr.next = NULL;
1329 wr.wr.fast_reg.iova_start = state->base_dma_addr; 1350 wr.wr.opcode = IB_WR_REG_MR;
1330 wr.wr.fast_reg.page_list = desc->frpl; 1351 wr.wr.wr_id = FAST_REG_WR_ID_MASK;
1331 wr.wr.fast_reg.page_list_len = state->npages; 1352 wr.wr.num_sge = 0;
1332 wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size); 1353 wr.wr.send_flags = 0;
1333 wr.wr.fast_reg.length = state->dma_len; 1354 wr.mr = desc->mr;
1334 wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | 1355 wr.key = desc->mr->rkey;
1335 IB_ACCESS_REMOTE_READ | 1356 wr.access = (IB_ACCESS_LOCAL_WRITE |
1336 IB_ACCESS_REMOTE_WRITE); 1357 IB_ACCESS_REMOTE_READ |
1337 wr.wr.fast_reg.rkey = desc->mr->lkey; 1358 IB_ACCESS_REMOTE_WRITE);
1338 1359
1339 *state->fr.next++ = desc; 1360 *state->fr.next++ = desc;
1340 state->nmdesc++; 1361 state->nmdesc++;
1341 1362
1342 srp_map_desc(state, state->base_dma_addr, state->dma_len, 1363 srp_map_desc(state, desc->mr->iova,
1343 desc->mr->rkey); 1364 desc->mr->length, desc->mr->rkey);
1344 1365
1345 return ib_post_send(ch->qp, &wr, &bad_wr); 1366 err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1346} 1367 if (unlikely(err))
1368 return err;
1347 1369
1348static int srp_finish_mapping(struct srp_map_state *state, 1370 return n;
1349 struct srp_rdma_ch *ch)
1350{
1351 struct srp_target_port *target = ch->target;
1352 struct srp_device *dev = target->srp_host->srp_dev;
1353 int ret = 0;
1354
1355 WARN_ON_ONCE(!dev->use_fast_reg && !dev->use_fmr);
1356
1357 if (state->npages == 0)
1358 return 0;
1359
1360 if (state->npages == 1 && target->global_mr)
1361 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1362 target->global_mr->rkey);
1363 else
1364 ret = dev->use_fast_reg ? srp_map_finish_fr(state, ch) :
1365 srp_map_finish_fmr(state, ch);
1366
1367 if (ret == 0) {
1368 state->npages = 0;
1369 state->dma_len = 0;
1370 }
1371
1372 return ret;
1373} 1371}
1374 1372
1375static int srp_map_sg_entry(struct srp_map_state *state, 1373static int srp_map_sg_entry(struct srp_map_state *state,
@@ -1389,7 +1387,7 @@ static int srp_map_sg_entry(struct srp_map_state *state,
1389 while (dma_len) { 1387 while (dma_len) {
1390 unsigned offset = dma_addr & ~dev->mr_page_mask; 1388 unsigned offset = dma_addr & ~dev->mr_page_mask;
1391 if (state->npages == dev->max_pages_per_mr || offset != 0) { 1389 if (state->npages == dev->max_pages_per_mr || offset != 0) {
1392 ret = srp_finish_mapping(state, ch); 1390 ret = srp_map_finish_fmr(state, ch);
1393 if (ret) 1391 if (ret)
1394 return ret; 1392 return ret;
1395 } 1393 }
@@ -1411,51 +1409,83 @@ static int srp_map_sg_entry(struct srp_map_state *state,
1411 */ 1409 */
1412 ret = 0; 1410 ret = 0;
1413 if (len != dev->mr_page_size) 1411 if (len != dev->mr_page_size)
1414 ret = srp_finish_mapping(state, ch); 1412 ret = srp_map_finish_fmr(state, ch);
1415 return ret; 1413 return ret;
1416} 1414}
1417 1415
1418static int srp_map_sg(struct srp_map_state *state, struct srp_rdma_ch *ch, 1416static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1419 struct srp_request *req, struct scatterlist *scat, 1417 struct srp_request *req, struct scatterlist *scat,
1420 int count) 1418 int count)
1421{ 1419{
1422 struct srp_target_port *target = ch->target;
1423 struct srp_device *dev = target->srp_host->srp_dev;
1424 struct scatterlist *sg; 1420 struct scatterlist *sg;
1425 int i, ret; 1421 int i, ret;
1426 1422
1427 state->desc = req->indirect_desc; 1423 state->desc = req->indirect_desc;
1428 state->pages = req->map_page; 1424 state->pages = req->map_page;
1429 if (dev->use_fast_reg) { 1425 state->fmr.next = req->fmr_list;
1430 state->fr.next = req->fr_list; 1426 state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt;
1431 state->fr.end = req->fr_list + target->cmd_sg_cnt;
1432 } else if (dev->use_fmr) {
1433 state->fmr.next = req->fmr_list;
1434 state->fmr.end = req->fmr_list + target->cmd_sg_cnt;
1435 }
1436 1427
1437 if (dev->use_fast_reg || dev->use_fmr) { 1428 for_each_sg(scat, sg, count, i) {
1438 for_each_sg(scat, sg, count, i) { 1429 ret = srp_map_sg_entry(state, ch, sg, i);
1439 ret = srp_map_sg_entry(state, ch, sg, i);
1440 if (ret)
1441 goto out;
1442 }
1443 ret = srp_finish_mapping(state, ch);
1444 if (ret) 1430 if (ret)
1445 goto out; 1431 return ret;
1446 } else {
1447 for_each_sg(scat, sg, count, i) {
1448 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1449 ib_sg_dma_len(dev->dev, sg),
1450 target->global_mr->rkey);
1451 }
1452 } 1432 }
1453 1433
1434 ret = srp_map_finish_fmr(state, ch);
1435 if (ret)
1436 return ret;
1437
1454 req->nmdesc = state->nmdesc; 1438 req->nmdesc = state->nmdesc;
1455 ret = 0;
1456 1439
1457out: 1440 return 0;
1458 return ret; 1441}
1442
1443static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1444 struct srp_request *req, struct scatterlist *scat,
1445 int count)
1446{
1447 state->desc = req->indirect_desc;
1448 state->fr.next = req->fr_list;
1449 state->fr.end = req->fr_list + ch->target->cmd_sg_cnt;
1450 state->sg = scat;
1451 state->sg_nents = scsi_sg_count(req->scmnd);
1452
1453 while (state->sg_nents) {
1454 int i, n;
1455
1456 n = srp_map_finish_fr(state, ch);
1457 if (unlikely(n < 0))
1458 return n;
1459
1460 state->sg_nents -= n;
1461 for (i = 0; i < n; i++)
1462 state->sg = sg_next(state->sg);
1463 }
1464
1465 req->nmdesc = state->nmdesc;
1466
1467 return 0;
1468}
1469
1470static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1471 struct srp_request *req, struct scatterlist *scat,
1472 int count)
1473{
1474 struct srp_target_port *target = ch->target;
1475 struct srp_device *dev = target->srp_host->srp_dev;
1476 struct scatterlist *sg;
1477 int i;
1478
1479 state->desc = req->indirect_desc;
1480 for_each_sg(scat, sg, count, i) {
1481 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1482 ib_sg_dma_len(dev->dev, sg),
1483 target->global_mr->rkey);
1484 }
1485
1486 req->nmdesc = state->nmdesc;
1487
1488 return 0;
1459} 1489}
1460 1490
1461/* 1491/*
@@ -1474,6 +1504,7 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1474 struct srp_map_state state; 1504 struct srp_map_state state;
1475 struct srp_direct_buf idb_desc; 1505 struct srp_direct_buf idb_desc;
1476 u64 idb_pages[1]; 1506 u64 idb_pages[1];
1507 struct scatterlist idb_sg[1];
1477 int ret; 1508 int ret;
1478 1509
1479 memset(&state, 0, sizeof(state)); 1510 memset(&state, 0, sizeof(state));
@@ -1481,20 +1512,32 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1481 state.gen.next = next_mr; 1512 state.gen.next = next_mr;
1482 state.gen.end = end_mr; 1513 state.gen.end = end_mr;
1483 state.desc = &idb_desc; 1514 state.desc = &idb_desc;
1484 state.pages = idb_pages;
1485 state.pages[0] = (req->indirect_dma_addr &
1486 dev->mr_page_mask);
1487 state.npages = 1;
1488 state.base_dma_addr = req->indirect_dma_addr; 1515 state.base_dma_addr = req->indirect_dma_addr;
1489 state.dma_len = idb_len; 1516 state.dma_len = idb_len;
1490 ret = srp_finish_mapping(&state, ch); 1517
1491 if (ret < 0) 1518 if (dev->use_fast_reg) {
1492 goto out; 1519 state.sg = idb_sg;
1520 state.sg_nents = 1;
1521 sg_set_buf(idb_sg, req->indirect_desc, idb_len);
1522 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1523 ret = srp_map_finish_fr(&state, ch);
1524 if (ret < 0)
1525 return ret;
1526 } else if (dev->use_fmr) {
1527 state.pages = idb_pages;
1528 state.pages[0] = (req->indirect_dma_addr &
1529 dev->mr_page_mask);
1530 state.npages = 1;
1531 ret = srp_map_finish_fmr(&state, ch);
1532 if (ret < 0)
1533 return ret;
1534 } else {
1535 return -EINVAL;
1536 }
1493 1537
1494 *idb_rkey = idb_desc.key; 1538 *idb_rkey = idb_desc.key;
1495 1539
1496out: 1540 return 0;
1497 return ret;
1498} 1541}
1499 1542
1500static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, 1543static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
@@ -1563,7 +1606,12 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1563 target->indirect_size, DMA_TO_DEVICE); 1606 target->indirect_size, DMA_TO_DEVICE);
1564 1607
1565 memset(&state, 0, sizeof(state)); 1608 memset(&state, 0, sizeof(state));
1566 srp_map_sg(&state, ch, req, scat, count); 1609 if (dev->use_fast_reg)
1610 srp_map_sg_fr(&state, ch, req, scat, count);
1611 else if (dev->use_fmr)
1612 srp_map_sg_fmr(&state, ch, req, scat, count);
1613 else
1614 srp_map_sg_dma(&state, ch, req, scat, count);
1567 1615
1568 /* We've mapped the request, now pull as much of the indirect 1616 /* We've mapped the request, now pull as much of the indirect
1569 * descriptor table as we can into the command buffer. If this 1617 * descriptor table as we can into the command buffer. If this
@@ -3213,7 +3261,7 @@ static ssize_t srp_create_target(struct device *dev,
3213 INIT_WORK(&target->tl_err_work, srp_tl_err_work); 3261 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3214 INIT_WORK(&target->remove_work, srp_remove_work); 3262 INIT_WORK(&target->remove_work, srp_remove_work);
3215 spin_lock_init(&target->lock); 3263 spin_lock_init(&target->lock);
3216 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid); 3264 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3217 if (ret) 3265 if (ret)
3218 goto out; 3266 goto out;
3219 3267
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 3608f2e4819c..87a2a919dc43 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -242,7 +242,6 @@ struct srp_iu {
242struct srp_fr_desc { 242struct srp_fr_desc {
243 struct list_head entry; 243 struct list_head entry;
244 struct ib_mr *mr; 244 struct ib_mr *mr;
245 struct ib_fast_reg_page_list *frpl;
246}; 245};
247 246
248/** 247/**
@@ -294,11 +293,17 @@ struct srp_map_state {
294 } gen; 293 } gen;
295 }; 294 };
296 struct srp_direct_buf *desc; 295 struct srp_direct_buf *desc;
297 u64 *pages; 296 union {
297 u64 *pages;
298 struct scatterlist *sg;
299 };
298 dma_addr_t base_dma_addr; 300 dma_addr_t base_dma_addr;
299 u32 dma_len; 301 u32 dma_len;
300 u32 total_len; 302 u32 total_len;
301 unsigned int npages; 303 union {
304 unsigned int npages;
305 int sg_nents;
306 };
302 unsigned int nmdesc; 307 unsigned int nmdesc;
303 unsigned int ndesc; 308 unsigned int ndesc;
304}; 309};
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index f6fe0414139b..47c4022fda76 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -546,7 +546,8 @@ static int srpt_refresh_port(struct srpt_port *sport)
546 sport->sm_lid = port_attr.sm_lid; 546 sport->sm_lid = port_attr.sm_lid;
547 sport->lid = port_attr.lid; 547 sport->lid = port_attr.lid;
548 548
549 ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid); 549 ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid,
550 NULL);
550 if (ret) 551 if (ret)
551 goto err_query_port; 552 goto err_query_port;
552 553
@@ -2822,7 +2823,7 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2822static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, 2823static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
2823 struct srpt_send_ioctx *ioctx) 2824 struct srpt_send_ioctx *ioctx)
2824{ 2825{
2825 struct ib_send_wr wr; 2826 struct ib_rdma_wr wr;
2826 struct ib_send_wr *bad_wr; 2827 struct ib_send_wr *bad_wr;
2827 struct rdma_iu *riu; 2828 struct rdma_iu *riu;
2828 int i; 2829 int i;
@@ -2850,29 +2851,29 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
2850 2851
2851 for (i = 0; i < n_rdma; ++i, ++riu) { 2852 for (i = 0; i < n_rdma; ++i, ++riu) {
2852 if (dir == DMA_FROM_DEVICE) { 2853 if (dir == DMA_FROM_DEVICE) {
2853 wr.opcode = IB_WR_RDMA_WRITE; 2854 wr.wr.opcode = IB_WR_RDMA_WRITE;
2854 wr.wr_id = encode_wr_id(i == n_rdma - 1 ? 2855 wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
2855 SRPT_RDMA_WRITE_LAST : 2856 SRPT_RDMA_WRITE_LAST :
2856 SRPT_RDMA_MID, 2857 SRPT_RDMA_MID,
2857 ioctx->ioctx.index); 2858 ioctx->ioctx.index);
2858 } else { 2859 } else {
2859 wr.opcode = IB_WR_RDMA_READ; 2860 wr.wr.opcode = IB_WR_RDMA_READ;
2860 wr.wr_id = encode_wr_id(i == n_rdma - 1 ? 2861 wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
2861 SRPT_RDMA_READ_LAST : 2862 SRPT_RDMA_READ_LAST :
2862 SRPT_RDMA_MID, 2863 SRPT_RDMA_MID,
2863 ioctx->ioctx.index); 2864 ioctx->ioctx.index);
2864 } 2865 }
2865 wr.next = NULL; 2866 wr.wr.next = NULL;
2866 wr.wr.rdma.remote_addr = riu->raddr; 2867 wr.remote_addr = riu->raddr;
2867 wr.wr.rdma.rkey = riu->rkey; 2868 wr.rkey = riu->rkey;
2868 wr.num_sge = riu->sge_cnt; 2869 wr.wr.num_sge = riu->sge_cnt;
2869 wr.sg_list = riu->sge; 2870 wr.wr.sg_list = riu->sge;
2870 2871
2871 /* only get completion event for the last rdma write */ 2872 /* only get completion event for the last rdma write */
2872 if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE) 2873 if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE)
2873 wr.send_flags = IB_SEND_SIGNALED; 2874 wr.wr.send_flags = IB_SEND_SIGNALED;
2874 2875
2875 ret = ib_post_send(ch->qp, &wr, &bad_wr); 2876 ret = ib_post_send(ch->qp, &wr.wr, &bad_wr);
2876 if (ret) 2877 if (ret)
2877 break; 2878 break;
2878 } 2879 }
@@ -2881,11 +2882,11 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
2881 pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n", 2882 pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
2882 __func__, __LINE__, ret, i, n_rdma); 2883 __func__, __LINE__, ret, i, n_rdma);
2883 if (ret && i > 0) { 2884 if (ret && i > 0) {
2884 wr.num_sge = 0; 2885 wr.wr.num_sge = 0;
2885 wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index); 2886 wr.wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index);
2886 wr.send_flags = IB_SEND_SIGNALED; 2887 wr.wr.send_flags = IB_SEND_SIGNALED;
2887 while (ch->state == CH_LIVE && 2888 while (ch->state == CH_LIVE &&
2888 ib_post_send(ch->qp, &wr, &bad_wr) != 0) { 2889 ib_post_send(ch->qp, &wr.wr, &bad_wr) != 0) {
2889 pr_info("Trying to abort failed RDMA transfer [%d]\n", 2890 pr_info("Trying to abort failed RDMA transfer [%d]\n",
2890 ioctx->ioctx.index); 2891 ioctx->ioctx.index);
2891 msleep(1000); 2892 msleep(1000);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 414fe7c487d5..55a47de544ea 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -49,6 +49,7 @@
49#include <linux/etherdevice.h> 49#include <linux/etherdevice.h>
50#include <linux/net_tstamp.h> 50#include <linux/net_tstamp.h>
51#include <asm/io.h> 51#include <asm/io.h>
52#include "t4_chip_type.h"
52#include "cxgb4_uld.h" 53#include "cxgb4_uld.h"
53 54
54#define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__) 55#define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__)
@@ -291,31 +292,6 @@ struct pci_params {
291 unsigned char width; 292 unsigned char width;
292}; 293};
293 294
294#define CHELSIO_CHIP_CODE(version, revision) (((version) << 4) | (revision))
295#define CHELSIO_CHIP_FPGA 0x100
296#define CHELSIO_CHIP_VERSION(code) (((code) >> 4) & 0xf)
297#define CHELSIO_CHIP_RELEASE(code) ((code) & 0xf)
298
299#define CHELSIO_T4 0x4
300#define CHELSIO_T5 0x5
301#define CHELSIO_T6 0x6
302
303enum chip_type {
304 T4_A1 = CHELSIO_CHIP_CODE(CHELSIO_T4, 1),
305 T4_A2 = CHELSIO_CHIP_CODE(CHELSIO_T4, 2),
306 T4_FIRST_REV = T4_A1,
307 T4_LAST_REV = T4_A2,
308
309 T5_A0 = CHELSIO_CHIP_CODE(CHELSIO_T5, 0),
310 T5_A1 = CHELSIO_CHIP_CODE(CHELSIO_T5, 1),
311 T5_FIRST_REV = T5_A0,
312 T5_LAST_REV = T5_A1,
313
314 T6_A0 = CHELSIO_CHIP_CODE(CHELSIO_T6, 0),
315 T6_FIRST_REV = T6_A0,
316 T6_LAST_REV = T6_A0,
317};
318
319struct devlog_params { 295struct devlog_params {
320 u32 memtype; /* which memory (EDC0, EDC1, MC) */ 296 u32 memtype; /* which memory (EDC0, EDC1, MC) */
321 u32 start; /* start of log in firmware memory */ 297 u32 start; /* start of log in firmware memory */
@@ -909,21 +885,6 @@ static inline int is_offload(const struct adapter *adap)
909 return adap->params.offload; 885 return adap->params.offload;
910} 886}
911 887
912static inline int is_t6(enum chip_type chip)
913{
914 return CHELSIO_CHIP_VERSION(chip) == CHELSIO_T6;
915}
916
917static inline int is_t5(enum chip_type chip)
918{
919 return CHELSIO_CHIP_VERSION(chip) == CHELSIO_T5;
920}
921
922static inline int is_t4(enum chip_type chip)
923{
924 return CHELSIO_CHIP_VERSION(chip) == CHELSIO_T4;
925}
926
927static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr) 888static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr)
928{ 889{
929 return readl(adap->regs + reg_addr); 890 return readl(adap->regs + reg_addr);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 2cf81857a297..0d147610a06f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1941,6 +1941,28 @@ unsigned int cxgb4_best_aligned_mtu(const unsigned short *mtus,
1941EXPORT_SYMBOL(cxgb4_best_aligned_mtu); 1941EXPORT_SYMBOL(cxgb4_best_aligned_mtu);
1942 1942
1943/** 1943/**
1944 * cxgb4_tp_smt_idx - Get the Source Mac Table index for this VI
1945 * @chip: chip type
1946 * @viid: VI id of the given port
1947 *
1948 * Return the SMT index for this VI.
1949 */
1950unsigned int cxgb4_tp_smt_idx(enum chip_type chip, unsigned int viid)
1951{
1952 /* In T4/T5, SMT contains 256 SMAC entries organized in
1953 * 128 rows of 2 entries each.
1954 * In T6, SMT contains 256 SMAC entries in 256 rows.
1955 * TODO: The below code needs to be updated when we add support
1956 * for 256 VFs.
1957 */
1958 if (CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5)
1959 return ((viid & 0x7f) << 1);
1960 else
1961 return (viid & 0x7f);
1962}
1963EXPORT_SYMBOL(cxgb4_tp_smt_idx);
1964
1965/**
1944 * cxgb4_port_chan - get the HW channel of a port 1966 * cxgb4_port_chan - get the HW channel of a port
1945 * @dev: the net device for the port 1967 * @dev: the net device for the port
1946 * 1968 *
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index c3a8be5541e7..cf711d5f15be 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -40,6 +40,7 @@
40#include <linux/skbuff.h> 40#include <linux/skbuff.h>
41#include <linux/inetdevice.h> 41#include <linux/inetdevice.h>
42#include <linux/atomic.h> 42#include <linux/atomic.h>
43#include "cxgb4.h"
43 44
44/* CPL message priority levels */ 45/* CPL message priority levels */
45enum { 46enum {
@@ -290,6 +291,7 @@ int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb);
290unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo); 291unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo);
291unsigned int cxgb4_port_chan(const struct net_device *dev); 292unsigned int cxgb4_port_chan(const struct net_device *dev);
292unsigned int cxgb4_port_viid(const struct net_device *dev); 293unsigned int cxgb4_port_viid(const struct net_device *dev);
294unsigned int cxgb4_tp_smt_idx(enum chip_type chip, unsigned int viid);
293unsigned int cxgb4_port_idx(const struct net_device *dev); 295unsigned int cxgb4_port_idx(const struct net_device *dev);
294unsigned int cxgb4_best_mtu(const unsigned short *mtus, unsigned short mtu, 296unsigned int cxgb4_best_mtu(const unsigned short *mtus, unsigned short mtu,
295 unsigned int *idx); 297 unsigned int *idx);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h b/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h
new file mode 100644
index 000000000000..54b718111e3f
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h
@@ -0,0 +1,85 @@
1/*
2 * This file is part of the Chelsio T4 Ethernet driver for Linux.
3 *
4 * Copyright (c) 2003-2015 Chelsio Communications, Inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34#ifndef __T4_CHIP_TYPE_H__
35#define __T4_CHIP_TYPE_H__
36
37#define CHELSIO_T4 0x4
38#define CHELSIO_T5 0x5
39#define CHELSIO_T6 0x6
40
41/* We code the Chelsio T4 Family "Chip Code" as a tuple:
42 *
43 * (Chip Version, Chip Revision)
44 *
45 * where:
46 *
47 * Chip Version: is T4, T5, etc.
48 * Chip Revision: is the FAB "spin" of the Chip Version.
49 */
50#define CHELSIO_CHIP_CODE(version, revision) (((version) << 4) | (revision))
51#define CHELSIO_CHIP_VERSION(code) (((code) >> 4) & 0xf)
52#define CHELSIO_CHIP_RELEASE(code) ((code) & 0xf)
53
54enum chip_type {
55 T4_A1 = CHELSIO_CHIP_CODE(CHELSIO_T4, 1),
56 T4_A2 = CHELSIO_CHIP_CODE(CHELSIO_T4, 2),
57 T4_FIRST_REV = T4_A1,
58 T4_LAST_REV = T4_A2,
59
60 T5_A0 = CHELSIO_CHIP_CODE(CHELSIO_T5, 0),
61 T5_A1 = CHELSIO_CHIP_CODE(CHELSIO_T5, 1),
62 T5_FIRST_REV = T5_A0,
63 T5_LAST_REV = T5_A1,
64
65 T6_A0 = CHELSIO_CHIP_CODE(CHELSIO_T6, 0),
66 T6_FIRST_REV = T6_A0,
67 T6_LAST_REV = T6_A0,
68};
69
70static inline int is_t4(enum chip_type chip)
71{
72 return (CHELSIO_CHIP_VERSION(chip) == CHELSIO_T4);
73}
74
75static inline int is_t5(enum chip_type chip)
76{
77 return (CHELSIO_CHIP_VERSION(chip) == CHELSIO_T5);
78}
79
80static inline int is_t6(enum chip_type chip)
81{
82 return (CHELSIO_CHIP_VERSION(chip) == CHELSIO_T6);
83}
84
85#endif /* __T4_CHIP_TYPE_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index b99144afd4ec..a072d341e205 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -417,6 +417,21 @@ struct cpl_t5_act_open_req {
417 __be64 params; 417 __be64 params;
418}; 418};
419 419
420struct cpl_t6_act_open_req {
421 WR_HDR;
422 union opcode_tid ot;
423 __be16 local_port;
424 __be16 peer_port;
425 __be32 local_ip;
426 __be32 peer_ip;
427 __be64 opt0;
428 __be32 rsvd;
429 __be32 opt2;
430 __be64 params;
431 __be32 rsvd2;
432 __be32 opt3;
433};
434
420struct cpl_act_open_req6 { 435struct cpl_act_open_req6 {
421 WR_HDR; 436 WR_HDR;
422 union opcode_tid ot; 437 union opcode_tid ot;
@@ -446,6 +461,23 @@ struct cpl_t5_act_open_req6 {
446 __be64 params; 461 __be64 params;
447}; 462};
448 463
464struct cpl_t6_act_open_req6 {
465 WR_HDR;
466 union opcode_tid ot;
467 __be16 local_port;
468 __be16 peer_port;
469 __be64 local_ip_hi;
470 __be64 local_ip_lo;
471 __be64 peer_ip_hi;
472 __be64 peer_ip_lo;
473 __be64 opt0;
474 __be32 rsvd;
475 __be32 opt2;
476 __be64 params;
477 __be32 rsvd2;
478 __be32 opt3;
479};
480
449struct cpl_act_open_rpl { 481struct cpl_act_open_rpl {
450 union opcode_tid ot; 482 union opcode_tid ot;
451 __be32 atid_status; 483 __be32 atid_status;
@@ -504,6 +536,19 @@ struct cpl_pass_establish {
504#define TCPOPT_MSS_M 0xF 536#define TCPOPT_MSS_M 0xF
505#define TCPOPT_MSS_G(x) (((x) >> TCPOPT_MSS_S) & TCPOPT_MSS_M) 537#define TCPOPT_MSS_G(x) (((x) >> TCPOPT_MSS_S) & TCPOPT_MSS_M)
506 538
539#define T6_TCP_HDR_LEN_S 8
540#define T6_TCP_HDR_LEN_V(x) ((x) << T6_TCP_HDR_LEN_S)
541#define T6_TCP_HDR_LEN_G(x) (((x) >> T6_TCP_HDR_LEN_S) & TCP_HDR_LEN_M)
542
543#define T6_IP_HDR_LEN_S 14
544#define T6_IP_HDR_LEN_V(x) ((x) << T6_IP_HDR_LEN_S)
545#define T6_IP_HDR_LEN_G(x) (((x) >> T6_IP_HDR_LEN_S) & IP_HDR_LEN_M)
546
547#define T6_ETH_HDR_LEN_S 24
548#define T6_ETH_HDR_LEN_M 0xFF
549#define T6_ETH_HDR_LEN_V(x) ((x) << T6_ETH_HDR_LEN_S)
550#define T6_ETH_HDR_LEN_G(x) (((x) >> T6_ETH_HDR_LEN_S) & T6_ETH_HDR_LEN_M)
551
507struct cpl_act_establish { 552struct cpl_act_establish {
508 union opcode_tid ot; 553 union opcode_tid ot;
509 __be32 rsvd; 554 __be32 rsvd;
@@ -833,6 +878,9 @@ struct cpl_rx_pkt {
833 __be16 err_vec; 878 __be16 err_vec;
834}; 879};
835 880
881#define RX_T6_ETHHDR_LEN_M 0xFF
882#define RX_T6_ETHHDR_LEN_G(x) (((x) >> RX_ETHHDR_LEN_S) & RX_T6_ETHHDR_LEN_M)
883
836#define RXF_PSH_S 20 884#define RXF_PSH_S 20
837#define RXF_PSH_V(x) ((x) << RXF_PSH_S) 885#define RXF_PSH_V(x) ((x) << RXF_PSH_S)
838#define RXF_PSH_F RXF_PSH_V(1U) 886#define RXF_PSH_F RXF_PSH_V(1U)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index a946e4bf71d2..005f910ec955 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -123,6 +123,28 @@ void mlx4_en_update_loopback_state(struct net_device *dev,
123 */ 123 */
124 if (mlx4_is_mfunc(priv->mdev->dev) || priv->validate_loopback) 124 if (mlx4_is_mfunc(priv->mdev->dev) || priv->validate_loopback)
125 priv->flags |= MLX4_EN_FLAG_ENABLE_HW_LOOPBACK; 125 priv->flags |= MLX4_EN_FLAG_ENABLE_HW_LOOPBACK;
126
127 mutex_lock(&priv->mdev->state_lock);
128 if (priv->mdev->dev->caps.flags2 &
129 MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB &&
130 priv->rss_map.indir_qp.qpn) {
131 int i;
132 int err = 0;
133 int loopback = !!(features & NETIF_F_LOOPBACK);
134
135 for (i = 0; i < priv->rx_ring_num; i++) {
136 int ret;
137
138 ret = mlx4_en_change_mcast_lb(priv,
139 &priv->rss_map.qps[i],
140 loopback);
141 if (!err)
142 err = ret;
143 }
144 if (err)
145 mlx4_warn(priv->mdev, "failed to change mcast loopback\n");
146 }
147 mutex_unlock(&priv->mdev->state_lock);
126} 148}
127 149
128static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) 150static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
index e482fa1bb741..12aab5a659d3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
@@ -69,6 +69,15 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
69 context->pri_path.counter_index = priv->counter_index; 69 context->pri_path.counter_index = priv->counter_index;
70 context->cqn_send = cpu_to_be32(cqn); 70 context->cqn_send = cpu_to_be32(cqn);
71 context->cqn_recv = cpu_to_be32(cqn); 71 context->cqn_recv = cpu_to_be32(cqn);
72 if (!rss &&
73 (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK) &&
74 context->pri_path.counter_index !=
75 MLX4_SINK_COUNTER_INDEX(mdev->dev)) {
76 /* disable multicast loopback to qp with same counter */
77 if (!(dev->features & NETIF_F_LOOPBACK))
78 context->pri_path.fl |= MLX4_FL_ETH_SRC_CHECK_MC_LB;
79 context->pri_path.control |= MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER;
80 }
72 context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2); 81 context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2);
73 if (!(dev->features & NETIF_F_HW_VLAN_CTAG_RX)) 82 if (!(dev->features & NETIF_F_HW_VLAN_CTAG_RX))
74 context->param3 |= cpu_to_be32(1 << 30); 83 context->param3 |= cpu_to_be32(1 << 30);
@@ -80,6 +89,22 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
80 } 89 }
81} 90}
82 91
92int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp,
93 int loopback)
94{
95 int ret;
96 struct mlx4_update_qp_params qp_params;
97
98 memset(&qp_params, 0, sizeof(qp_params));
99 if (!loopback)
100 qp_params.flags = MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB;
101
102 ret = mlx4_update_qp(priv->mdev->dev, qp->qpn,
103 MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB,
104 &qp_params);
105
106 return ret;
107}
83 108
84int mlx4_en_map_buffer(struct mlx4_buf *buf) 109int mlx4_en_map_buffer(struct mlx4_buf *buf)
85{ 110{
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index f13a4d7bbf95..90db94e83fde 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -155,6 +155,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
155 [27] = "Port beacon support", 155 [27] = "Port beacon support",
156 [28] = "RX-ALL support", 156 [28] = "RX-ALL support",
157 [29] = "802.1ad offload support", 157 [29] = "802.1ad offload support",
158 [31] = "Modifying loopback source checks using UPDATE_QP support",
159 [32] = "Loopback source checks support",
158 }; 160 };
159 int i; 161 int i;
160 162
@@ -964,6 +966,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
964 MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); 966 MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
965 if (field32 & (1 << 16)) 967 if (field32 & (1 << 16))
966 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP; 968 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
969 if (field32 & (1 << 18))
970 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB;
971 if (field32 & (1 << 19))
972 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_LB_SRC_CHK;
967 if (field32 & (1 << 26)) 973 if (field32 & (1 << 26))
968 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL; 974 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL;
969 if (field32 & (1 << 20)) 975 if (field32 & (1 << 20))
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index defcf8c395bf..c41f15102ae0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -798,7 +798,8 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
798void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event); 798void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event);
799int mlx4_en_map_buffer(struct mlx4_buf *buf); 799int mlx4_en_map_buffer(struct mlx4_buf *buf);
800void mlx4_en_unmap_buffer(struct mlx4_buf *buf); 800void mlx4_en_unmap_buffer(struct mlx4_buf *buf);
801 801int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp,
802 int loopback);
802void mlx4_en_calc_rx_buf(struct net_device *dev); 803void mlx4_en_calc_rx_buf(struct net_device *dev);
803int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv); 804int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv);
804void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv); 805void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 3311f35d08e0..168823dde79f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -436,6 +436,23 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
436 cmd->qp_context.pri_path.grh_mylmc = params->smac_index; 436 cmd->qp_context.pri_path.grh_mylmc = params->smac_index;
437 } 437 }
438 438
439 if (attr & MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB) {
440 if (!(dev->caps.flags2
441 & MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) {
442 mlx4_warn(dev,
443 "Trying to set src check LB, but it isn't supported\n");
444 err = -ENOTSUPP;
445 goto out;
446 }
447 pri_addr_path_mask |=
448 1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB;
449 if (params->flags &
450 MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB) {
451 cmd->qp_context.pri_path.fl |=
452 MLX4_FL_ETH_SRC_CHECK_MC_LB;
453 }
454 }
455
439 if (attr & MLX4_UPDATE_QP_VSD) { 456 if (attr & MLX4_UPDATE_QP_VSD) {
440 qp_mask |= 1ULL << MLX4_UPD_QP_MASK_VSD; 457 qp_mask |= 1ULL << MLX4_UPD_QP_MASK_VSD;
441 if (params->flags & MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE) 458 if (params->flags & MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE)
@@ -458,7 +475,7 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
458 err = mlx4_cmd(dev, mailbox->dma, qpn & 0xffffff, 0, 475 err = mlx4_cmd(dev, mailbox->dma, qpn & 0xffffff, 0,
459 MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A, 476 MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A,
460 MLX4_CMD_NATIVE); 477 MLX4_CMD_NATIVE);
461 478out:
462 mlx4_free_cmd_mailbox(dev, mailbox); 479 mlx4_free_cmd_mailbox(dev, mailbox);
463 return err; 480 return err;
464} 481}
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index ac4b99ab1f85..9813d34f3e5b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -770,9 +770,12 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
770 } 770 }
771 } 771 }
772 772
773 /* preserve IF_COUNTER flag */
774 qpc->pri_path.vlan_control &=
775 MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER;
773 if (vp_oper->state.link_state == IFLA_VF_LINK_STATE_DISABLE && 776 if (vp_oper->state.link_state == IFLA_VF_LINK_STATE_DISABLE &&
774 dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP) { 777 dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP) {
775 qpc->pri_path.vlan_control = 778 qpc->pri_path.vlan_control |=
776 MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | 779 MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
777 MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED | 780 MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED |
778 MLX4_VLAN_CTRL_ETH_TX_BLOCK_UNTAGGED | 781 MLX4_VLAN_CTRL_ETH_TX_BLOCK_UNTAGGED |
@@ -780,12 +783,12 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
780 MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED | 783 MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED |
781 MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; 784 MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
782 } else if (0 != vp_oper->state.default_vlan) { 785 } else if (0 != vp_oper->state.default_vlan) {
783 qpc->pri_path.vlan_control = 786 qpc->pri_path.vlan_control |=
784 MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | 787 MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
785 MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | 788 MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
786 MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; 789 MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
787 } else { /* priority tagged */ 790 } else { /* priority tagged */
788 qpc->pri_path.vlan_control = 791 qpc->pri_path.vlan_control |=
789 MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | 792 MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
790 MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; 793 MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
791 } 794 }
@@ -3764,9 +3767,6 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
3764 update_gid(dev, inbox, (u8)slave); 3767 update_gid(dev, inbox, (u8)slave);
3765 adjust_proxy_tun_qkey(dev, vhcr, qpc); 3768 adjust_proxy_tun_qkey(dev, vhcr, qpc);
3766 orig_sched_queue = qpc->pri_path.sched_queue; 3769 orig_sched_queue = qpc->pri_path.sched_queue;
3767 err = update_vport_qp_param(dev, inbox, slave, qpn);
3768 if (err)
3769 return err;
3770 3770
3771 err = get_res(dev, slave, qpn, RES_QP, &qp); 3771 err = get_res(dev, slave, qpn, RES_QP, &qp);
3772 if (err) 3772 if (err)
@@ -3776,6 +3776,10 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
3776 goto out; 3776 goto out;
3777 } 3777 }
3778 3778
3779 err = update_vport_qp_param(dev, inbox, slave, qpn);
3780 if (err)
3781 goto out;
3782
3779 err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); 3783 err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
3780out: 3784out:
3781 /* if no error, save sched queue value passed in by VF. This is 3785 /* if no error, save sched queue value passed in by VF. This is
@@ -4210,7 +4214,9 @@ static int add_eth_header(struct mlx4_dev *dev, int slave,
4210 4214
4211} 4215}
4212 4216
4213#define MLX4_UPD_QP_PATH_MASK_SUPPORTED (1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX) 4217#define MLX4_UPD_QP_PATH_MASK_SUPPORTED ( \
4218 1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX |\
4219 1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)
4214int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, 4220int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
4215 struct mlx4_vhcr *vhcr, 4221 struct mlx4_vhcr *vhcr,
4216 struct mlx4_cmd_mailbox *inbox, 4222 struct mlx4_cmd_mailbox *inbox,
@@ -4233,6 +4239,16 @@ int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
4233 (pri_addr_path_mask & ~MLX4_UPD_QP_PATH_MASK_SUPPORTED)) 4239 (pri_addr_path_mask & ~MLX4_UPD_QP_PATH_MASK_SUPPORTED))
4234 return -EPERM; 4240 return -EPERM;
4235 4241
4242 if ((pri_addr_path_mask &
4243 (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)) &&
4244 !(dev->caps.flags2 &
4245 MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) {
4246 mlx4_warn(dev,
4247 "Src check LB for slave %d isn't supported\n",
4248 slave);
4249 return -ENOTSUPP;
4250 }
4251
4236 /* Just change the smac for the QP */ 4252 /* Just change the smac for the QP */
4237 err = get_res(dev, slave, qpn, RES_QP, &rqp); 4253 err = get_res(dev, slave, qpn, RES_QP, &rqp);
4238 if (err) { 4254 if (err) {
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
index 5f78b42b427a..263db37de7c8 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
@@ -123,7 +123,9 @@ extern kib_tunables_t kiblnd_tunables;
123 IBLND_CREDIT_HIGHWATER_V1 : \ 123 IBLND_CREDIT_HIGHWATER_V1 : \
124 *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */ 124 *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */
125 125
126#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps, qpt) 126#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(&init_net, \
127 cb, dev, \
128 ps, qpt)
127 129
128static inline int 130static inline int
129kiblnd_concurrent_sends_v1(void) 131kiblnd_concurrent_sends_v1(void)
@@ -504,7 +506,7 @@ typedef struct kib_tx /* transmit message */
504 __u64 tx_msgaddr; /* message buffer (I/O addr) */ 506 __u64 tx_msgaddr; /* message buffer (I/O addr) */
505 DECLARE_PCI_UNMAP_ADDR(tx_msgunmap); /* for dma_unmap_single() */ 507 DECLARE_PCI_UNMAP_ADDR(tx_msgunmap); /* for dma_unmap_single() */
506 int tx_nwrq; /* # send work items */ 508 int tx_nwrq; /* # send work items */
507 struct ib_send_wr *tx_wrq; /* send work items... */ 509 struct ib_rdma_wr *tx_wrq; /* send work items... */
508 struct ib_sge *tx_sge; /* ...and their memory */ 510 struct ib_sge *tx_sge; /* ...and their memory */
509 kib_rdma_desc_t *tx_rd; /* rdma descriptor */ 511 kib_rdma_desc_t *tx_rd; /* rdma descriptor */
510 int tx_nfrags; /* # entries in... */ 512 int tx_nfrags; /* # entries in... */
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index 8989e36091fb..260750354a41 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -838,7 +838,7 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit)
838 /* close_conn will launch failover */ 838 /* close_conn will launch failover */
839 rc = -ENETDOWN; 839 rc = -ENETDOWN;
840 } else { 840 } else {
841 rc = ib_post_send(conn->ibc_cmid->qp, tx->tx_wrq, &bad_wrq); 841 rc = ib_post_send(conn->ibc_cmid->qp, &tx->tx_wrq->wr, &bad_wrq);
842 } 842 }
843 843
844 conn->ibc_last_send = jiffies; 844 conn->ibc_last_send = jiffies;
@@ -1012,7 +1012,7 @@ kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob)
1012{ 1012{
1013 kib_hca_dev_t *hdev = tx->tx_pool->tpo_hdev; 1013 kib_hca_dev_t *hdev = tx->tx_pool->tpo_hdev;
1014 struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq]; 1014 struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq];
1015 struct ib_send_wr *wrq = &tx->tx_wrq[tx->tx_nwrq]; 1015 struct ib_rdma_wr *wrq = &tx->tx_wrq[tx->tx_nwrq];
1016 int nob = offsetof(kib_msg_t, ibm_u) + body_nob; 1016 int nob = offsetof(kib_msg_t, ibm_u) + body_nob;
1017 struct ib_mr *mr; 1017 struct ib_mr *mr;
1018 1018
@@ -1031,12 +1031,12 @@ kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob)
1031 1031
1032 memset(wrq, 0, sizeof(*wrq)); 1032 memset(wrq, 0, sizeof(*wrq));
1033 1033
1034 wrq->next = NULL; 1034 wrq->wr.next = NULL;
1035 wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_TX); 1035 wrq->wr.wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_TX);
1036 wrq->sg_list = sge; 1036 wrq->wr.sg_list = sge;
1037 wrq->num_sge = 1; 1037 wrq->wr.num_sge = 1;
1038 wrq->opcode = IB_WR_SEND; 1038 wrq->wr.opcode = IB_WR_SEND;
1039 wrq->send_flags = IB_SEND_SIGNALED; 1039 wrq->wr.send_flags = IB_SEND_SIGNALED;
1040 1040
1041 tx->tx_nwrq++; 1041 tx->tx_nwrq++;
1042} 1042}
@@ -1048,7 +1048,7 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type,
1048 kib_msg_t *ibmsg = tx->tx_msg; 1048 kib_msg_t *ibmsg = tx->tx_msg;
1049 kib_rdma_desc_t *srcrd = tx->tx_rd; 1049 kib_rdma_desc_t *srcrd = tx->tx_rd;
1050 struct ib_sge *sge = &tx->tx_sge[0]; 1050 struct ib_sge *sge = &tx->tx_sge[0];
1051 struct ib_send_wr *wrq = &tx->tx_wrq[0]; 1051 struct ib_rdma_wr *wrq = &tx->tx_wrq[0], *next;
1052 int rc = resid; 1052 int rc = resid;
1053 int srcidx; 1053 int srcidx;
1054 int dstidx; 1054 int dstidx;
@@ -1094,16 +1094,17 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type,
1094 sge->length = wrknob; 1094 sge->length = wrknob;
1095 1095
1096 wrq = &tx->tx_wrq[tx->tx_nwrq]; 1096 wrq = &tx->tx_wrq[tx->tx_nwrq];
1097 next = wrq + 1;
1097 1098
1098 wrq->next = wrq + 1; 1099 wrq->wr.next = &next->wr;
1099 wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA); 1100 wrq->wr.wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA);
1100 wrq->sg_list = sge; 1101 wrq->wr.sg_list = sge;
1101 wrq->num_sge = 1; 1102 wrq->wr.num_sge = 1;
1102 wrq->opcode = IB_WR_RDMA_WRITE; 1103 wrq->wr.opcode = IB_WR_RDMA_WRITE;
1103 wrq->send_flags = 0; 1104 wrq->wr.send_flags = 0;
1104 1105
1105 wrq->wr.rdma.remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx); 1106 wrq->remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx);
1106 wrq->wr.rdma.rkey = kiblnd_rd_frag_key(dstrd, dstidx); 1107 wrq->rkey = kiblnd_rd_frag_key(dstrd, dstidx);
1107 1108
1108 srcidx = kiblnd_rd_consume_frag(srcrd, srcidx, wrknob); 1109 srcidx = kiblnd_rd_consume_frag(srcrd, srcidx, wrknob);
1109 dstidx = kiblnd_rd_consume_frag(dstrd, dstidx, wrknob); 1110 dstidx = kiblnd_rd_consume_frag(dstrd, dstidx, wrknob);
diff --git a/drivers/staging/rdma/amso1100/c2_qp.c b/drivers/staging/rdma/amso1100/c2_qp.c
index e0a7aff0eb2a..ca364dbe369c 100644
--- a/drivers/staging/rdma/amso1100/c2_qp.c
+++ b/drivers/staging/rdma/amso1100/c2_qp.c
@@ -860,9 +860,9 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
860 flags |= SQ_READ_FENCE; 860 flags |= SQ_READ_FENCE;
861 } 861 }
862 wr.sqwr.rdma_write.remote_stag = 862 wr.sqwr.rdma_write.remote_stag =
863 cpu_to_be32(ib_wr->wr.rdma.rkey); 863 cpu_to_be32(rdma_wr(ib_wr)->rkey);
864 wr.sqwr.rdma_write.remote_to = 864 wr.sqwr.rdma_write.remote_to =
865 cpu_to_be64(ib_wr->wr.rdma.remote_addr); 865 cpu_to_be64(rdma_wr(ib_wr)->remote_addr);
866 err = move_sgl((struct c2_data_addr *) 866 err = move_sgl((struct c2_data_addr *)
867 & (wr.sqwr.rdma_write.data), 867 & (wr.sqwr.rdma_write.data),
868 ib_wr->sg_list, 868 ib_wr->sg_list,
@@ -889,9 +889,9 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
889 wr.sqwr.rdma_read.local_to = 889 wr.sqwr.rdma_read.local_to =
890 cpu_to_be64(ib_wr->sg_list->addr); 890 cpu_to_be64(ib_wr->sg_list->addr);
891 wr.sqwr.rdma_read.remote_stag = 891 wr.sqwr.rdma_read.remote_stag =
892 cpu_to_be32(ib_wr->wr.rdma.rkey); 892 cpu_to_be32(rdma_wr(ib_wr)->rkey);
893 wr.sqwr.rdma_read.remote_to = 893 wr.sqwr.rdma_read.remote_to =
894 cpu_to_be64(ib_wr->wr.rdma.remote_addr); 894 cpu_to_be64(rdma_wr(ib_wr)->remote_addr);
895 wr.sqwr.rdma_read.length = 895 wr.sqwr.rdma_read.length =
896 cpu_to_be32(ib_wr->sg_list->length); 896 cpu_to_be32(ib_wr->sg_list->length);
897 break; 897 break;
diff --git a/drivers/staging/rdma/ehca/ehca_reqs.c b/drivers/staging/rdma/ehca/ehca_reqs.c
index 47f94984353d..10e2074384f5 100644
--- a/drivers/staging/rdma/ehca/ehca_reqs.c
+++ b/drivers/staging/rdma/ehca/ehca_reqs.c
@@ -110,19 +110,19 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
110/* need ib_mad struct */ 110/* need ib_mad struct */
111#include <rdma/ib_mad.h> 111#include <rdma/ib_mad.h>
112 112
113static void trace_send_wr_ud(const struct ib_send_wr *send_wr) 113static void trace_ud_wr(const struct ib_ud_wr *ud_wr)
114{ 114{
115 int idx; 115 int idx;
116 int j; 116 int j;
117 while (send_wr) { 117 while (ud_wr) {
118 struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; 118 struct ib_mad_hdr *mad_hdr = ud_wrmad_hdr;
119 struct ib_sge *sge = send_wr->sg_list; 119 struct ib_sge *sge = ud_wr->wr.sg_list;
120 ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " 120 ehca_gen_dbg("ud_wr#%x wr_id=%lx num_sge=%x "
121 "send_flags=%x opcode=%x", idx, send_wr->wr_id, 121 "send_flags=%x opcode=%x", idx, ud_wr->wr.wr_id,
122 send_wr->num_sge, send_wr->send_flags, 122 ud_wr->wr.num_sge, ud_wr->wr.send_flags,
123 send_wr->opcode); 123 ud_wr->.wr.opcode);
124 if (mad_hdr) { 124 if (mad_hdr) {
125 ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x " 125 ehca_gen_dbg("ud_wr#%x mad_hdr base_version=%x "
126 "mgmt_class=%x class_version=%x method=%x " 126 "mgmt_class=%x class_version=%x method=%x "
127 "status=%x class_specific=%x tid=%lx " 127 "status=%x class_specific=%x tid=%lx "
128 "attr_id=%x resv=%x attr_mod=%x", 128 "attr_id=%x resv=%x attr_mod=%x",
@@ -134,33 +134,33 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
134 mad_hdr->resv, 134 mad_hdr->resv,
135 mad_hdr->attr_mod); 135 mad_hdr->attr_mod);
136 } 136 }
137 for (j = 0; j < send_wr->num_sge; j++) { 137 for (j = 0; j < ud_wr->wr.num_sge; j++) {
138 u8 *data = __va(sge->addr); 138 u8 *data = __va(sge->addr);
139 ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " 139 ehca_gen_dbg("ud_wr#%x sge#%x addr=%p length=%x "
140 "lkey=%x", 140 "lkey=%x",
141 idx, j, data, sge->length, sge->lkey); 141 idx, j, data, sge->length, sge->lkey);
142 /* assume length is n*16 */ 142 /* assume length is n*16 */
143 ehca_dmp(data, sge->length, "send_wr#%x sge#%x", 143 ehca_dmp(data, sge->length, "ud_wr#%x sge#%x",
144 idx, j); 144 idx, j);
145 sge++; 145 sge++;
146 } /* eof for j */ 146 } /* eof for j */
147 idx++; 147 idx++;
148 send_wr = send_wr->next; 148 ud_wr = ud_wr(ud_wr->wr.next);
149 } /* eof while send_wr */ 149 } /* eof while ud_wr */
150} 150}
151 151
152#endif /* DEBUG_GSI_SEND_WR */ 152#endif /* DEBUG_GSI_SEND_WR */
153 153
154static inline int ehca_write_swqe(struct ehca_qp *qp, 154static inline int ehca_write_swqe(struct ehca_qp *qp,
155 struct ehca_wqe *wqe_p, 155 struct ehca_wqe *wqe_p,
156 const struct ib_send_wr *send_wr, 156 struct ib_send_wr *send_wr,
157 u32 sq_map_idx, 157 u32 sq_map_idx,
158 int hidden) 158 int hidden)
159{ 159{
160 u32 idx; 160 u32 idx;
161 u64 dma_length; 161 u64 dma_length;
162 struct ehca_av *my_av; 162 struct ehca_av *my_av;
163 u32 remote_qkey = send_wr->wr.ud.remote_qkey; 163 u32 remote_qkey;
164 struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; 164 struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
165 165
166 if (unlikely((send_wr->num_sge < 0) || 166 if (unlikely((send_wr->num_sge < 0) ||
@@ -223,20 +223,21 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
223 /* no break is intential here */ 223 /* no break is intential here */
224 case IB_QPT_UD: 224 case IB_QPT_UD:
225 /* IB 1.2 spec C10-15 compliance */ 225 /* IB 1.2 spec C10-15 compliance */
226 if (send_wr->wr.ud.remote_qkey & 0x80000000) 226 remote_qkey = ud_wr(send_wr)->remote_qkey;
227 if (remote_qkey & 0x80000000)
227 remote_qkey = qp->qkey; 228 remote_qkey = qp->qkey;
228 229
229 wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; 230 wqe_p->destination_qp_number = ud_wr(send_wr)->remote_qpn << 8;
230 wqe_p->local_ee_context_qkey = remote_qkey; 231 wqe_p->local_ee_context_qkey = remote_qkey;
231 if (unlikely(!send_wr->wr.ud.ah)) { 232 if (unlikely(!ud_wr(send_wr)->ah)) {
232 ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); 233 ehca_gen_err("ud_wr(send_wr) is NULL. qp=%p", qp);
233 return -EINVAL; 234 return -EINVAL;
234 } 235 }
235 if (unlikely(send_wr->wr.ud.remote_qpn == 0)) { 236 if (unlikely(ud_wr(send_wr)->remote_qpn == 0)) {
236 ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num); 237 ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num);
237 return -EINVAL; 238 return -EINVAL;
238 } 239 }
239 my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah); 240 my_av = container_of(ud_wr(send_wr)->ah, struct ehca_av, ib_ah);
240 wqe_p->u.ud_av.ud_av = my_av->av; 241 wqe_p->u.ud_av.ud_av = my_av->av;
241 242
242 /* 243 /*
@@ -255,9 +256,9 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
255 qp->qp_type == IB_QPT_GSI) 256 qp->qp_type == IB_QPT_GSI)
256 wqe_p->u.ud_av.ud_av.pmtu = 1; 257 wqe_p->u.ud_av.ud_av.pmtu = 1;
257 if (qp->qp_type == IB_QPT_GSI) { 258 if (qp->qp_type == IB_QPT_GSI) {
258 wqe_p->pkeyi = send_wr->wr.ud.pkey_index; 259 wqe_p->pkeyi = ud_wr(send_wr)->pkey_index;
259#ifdef DEBUG_GSI_SEND_WR 260#ifdef DEBUG_GSI_SEND_WR
260 trace_send_wr_ud(send_wr); 261 trace_ud_wr(ud_wr(send_wr));
261#endif /* DEBUG_GSI_SEND_WR */ 262#endif /* DEBUG_GSI_SEND_WR */
262 } 263 }
263 break; 264 break;
@@ -269,8 +270,8 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
269 case IB_QPT_RC: 270 case IB_QPT_RC:
270 /* TODO: atomic not implemented */ 271 /* TODO: atomic not implemented */
271 wqe_p->u.nud.remote_virtual_address = 272 wqe_p->u.nud.remote_virtual_address =
272 send_wr->wr.rdma.remote_addr; 273 rdma_wr(send_wr)->remote_addr;
273 wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; 274 wqe_p->u.nud.rkey = rdma_wr(send_wr)->rkey;
274 275
275 /* 276 /*
276 * omitted checking of IB_SEND_INLINE 277 * omitted checking of IB_SEND_INLINE
diff --git a/drivers/staging/rdma/hfi1/keys.c b/drivers/staging/rdma/hfi1/keys.c
index f6eff177ace1..cb4e6087dfdb 100644
--- a/drivers/staging/rdma/hfi1/keys.c
+++ b/drivers/staging/rdma/hfi1/keys.c
@@ -354,58 +354,3 @@ bail:
354 rcu_read_unlock(); 354 rcu_read_unlock();
355 return 0; 355 return 0;
356} 356}
357
358/*
359 * Initialize the memory region specified by the work request.
360 */
361int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_send_wr *wr)
362{
363 struct hfi1_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
364 struct hfi1_pd *pd = to_ipd(qp->ibqp.pd);
365 struct hfi1_mregion *mr;
366 u32 rkey = wr->wr.fast_reg.rkey;
367 unsigned i, n, m;
368 int ret = -EINVAL;
369 unsigned long flags;
370 u64 *page_list;
371 size_t ps;
372
373 spin_lock_irqsave(&rkt->lock, flags);
374 if (pd->user || rkey == 0)
375 goto bail;
376
377 mr = rcu_dereference_protected(
378 rkt->table[(rkey >> (32 - hfi1_lkey_table_size))],
379 lockdep_is_held(&rkt->lock));
380 if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd))
381 goto bail;
382
383 if (wr->wr.fast_reg.page_list_len > mr->max_segs)
384 goto bail;
385
386 ps = 1UL << wr->wr.fast_reg.page_shift;
387 if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len)
388 goto bail;
389
390 mr->user_base = wr->wr.fast_reg.iova_start;
391 mr->iova = wr->wr.fast_reg.iova_start;
392 mr->lkey = rkey;
393 mr->length = wr->wr.fast_reg.length;
394 mr->access_flags = wr->wr.fast_reg.access_flags;
395 page_list = wr->wr.fast_reg.page_list->page_list;
396 m = 0;
397 n = 0;
398 for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
399 mr->map[m]->segs[n].vaddr = (void *) page_list[i];
400 mr->map[m]->segs[n].length = ps;
401 if (++n == HFI1_SEGSZ) {
402 m++;
403 n = 0;
404 }
405 }
406
407 ret = 0;
408bail:
409 spin_unlock_irqrestore(&rkt->lock, flags);
410 return ret;
411}
diff --git a/drivers/staging/rdma/hfi1/mr.c b/drivers/staging/rdma/hfi1/mr.c
index 0208fc200c1a..568f185a022d 100644
--- a/drivers/staging/rdma/hfi1/mr.c
+++ b/drivers/staging/rdma/hfi1/mr.c
@@ -344,9 +344,10 @@ out:
344 344
345/* 345/*
346 * Allocate a memory region usable with the 346 * Allocate a memory region usable with the
347 * IB_WR_FAST_REG_MR send work request. 347 * IB_WR_REG_MR send work request.
348 * 348 *
349 * Return the memory region on success, otherwise return an errno. 349 * Return the memory region on success, otherwise return an errno.
350 * FIXME: IB_WR_REG_MR is not supported
350 */ 351 */
351struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd, 352struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd,
352 enum ib_mr_type mr_type, 353 enum ib_mr_type mr_type,
@@ -364,36 +365,6 @@ struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd,
364 return &mr->ibmr; 365 return &mr->ibmr;
365} 366}
366 367
367struct ib_fast_reg_page_list *
368hfi1_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len)
369{
370 unsigned size = page_list_len * sizeof(u64);
371 struct ib_fast_reg_page_list *pl;
372
373 if (size > PAGE_SIZE)
374 return ERR_PTR(-EINVAL);
375
376 pl = kzalloc(sizeof(*pl), GFP_KERNEL);
377 if (!pl)
378 return ERR_PTR(-ENOMEM);
379
380 pl->page_list = kzalloc(size, GFP_KERNEL);
381 if (!pl->page_list)
382 goto err_free;
383
384 return pl;
385
386err_free:
387 kfree(pl);
388 return ERR_PTR(-ENOMEM);
389}
390
391void hfi1_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl)
392{
393 kfree(pl->page_list);
394 kfree(pl);
395}
396
397/** 368/**
398 * hfi1_alloc_fmr - allocate a fast memory region 369 * hfi1_alloc_fmr - allocate a fast memory region
399 * @pd: the protection domain for this memory region 370 * @pd: the protection domain for this memory region
diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c
index df1fa56eaf85..f8c36166962f 100644
--- a/drivers/staging/rdma/hfi1/qp.c
+++ b/drivers/staging/rdma/hfi1/qp.c
@@ -422,7 +422,7 @@ static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends)
422 if (qp->ibqp.qp_type == IB_QPT_UD || 422 if (qp->ibqp.qp_type == IB_QPT_UD ||
423 qp->ibqp.qp_type == IB_QPT_SMI || 423 qp->ibqp.qp_type == IB_QPT_SMI ||
424 qp->ibqp.qp_type == IB_QPT_GSI) 424 qp->ibqp.qp_type == IB_QPT_GSI)
425 atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); 425 atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
426 if (++qp->s_last >= qp->s_size) 426 if (++qp->s_last >= qp->s_size)
427 qp->s_last = 0; 427 qp->s_last = 0;
428 } 428 }
diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c
index 0b19206ff33e..5fc93bb312f1 100644
--- a/drivers/staging/rdma/hfi1/rc.c
+++ b/drivers/staging/rdma/hfi1/rc.c
@@ -404,9 +404,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
404 goto bail; 404 goto bail;
405 } 405 }
406 ohdr->u.rc.reth.vaddr = 406 ohdr->u.rc.reth.vaddr =
407 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 407 cpu_to_be64(wqe->rdma_wr.remote_addr);
408 ohdr->u.rc.reth.rkey = 408 ohdr->u.rc.reth.rkey =
409 cpu_to_be32(wqe->wr.wr.rdma.rkey); 409 cpu_to_be32(wqe->rdma_wr.rkey);
410 ohdr->u.rc.reth.length = cpu_to_be32(len); 410 ohdr->u.rc.reth.length = cpu_to_be32(len);
411 hwords += sizeof(struct ib_reth) / sizeof(u32); 411 hwords += sizeof(struct ib_reth) / sizeof(u32);
412 wqe->lpsn = wqe->psn; 412 wqe->lpsn = wqe->psn;
@@ -455,9 +455,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
455 wqe->lpsn = qp->s_next_psn++; 455 wqe->lpsn = qp->s_next_psn++;
456 } 456 }
457 ohdr->u.rc.reth.vaddr = 457 ohdr->u.rc.reth.vaddr =
458 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 458 cpu_to_be64(wqe->rdma_wr.remote_addr);
459 ohdr->u.rc.reth.rkey = 459 ohdr->u.rc.reth.rkey =
460 cpu_to_be32(wqe->wr.wr.rdma.rkey); 460 cpu_to_be32(wqe->rdma_wr.rkey);
461 ohdr->u.rc.reth.length = cpu_to_be32(len); 461 ohdr->u.rc.reth.length = cpu_to_be32(len);
462 qp->s_state = OP(RDMA_READ_REQUEST); 462 qp->s_state = OP(RDMA_READ_REQUEST);
463 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); 463 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
@@ -488,21 +488,21 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
488 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 488 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
489 qp->s_state = OP(COMPARE_SWAP); 489 qp->s_state = OP(COMPARE_SWAP);
490 ohdr->u.atomic_eth.swap_data = cpu_to_be64( 490 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
491 wqe->wr.wr.atomic.swap); 491 wqe->atomic_wr.swap);
492 ohdr->u.atomic_eth.compare_data = cpu_to_be64( 492 ohdr->u.atomic_eth.compare_data = cpu_to_be64(
493 wqe->wr.wr.atomic.compare_add); 493 wqe->atomic_wr.compare_add);
494 } else { 494 } else {
495 qp->s_state = OP(FETCH_ADD); 495 qp->s_state = OP(FETCH_ADD);
496 ohdr->u.atomic_eth.swap_data = cpu_to_be64( 496 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
497 wqe->wr.wr.atomic.compare_add); 497 wqe->atomic_wr.compare_add);
498 ohdr->u.atomic_eth.compare_data = 0; 498 ohdr->u.atomic_eth.compare_data = 0;
499 } 499 }
500 ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( 500 ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
501 wqe->wr.wr.atomic.remote_addr >> 32); 501 wqe->atomic_wr.remote_addr >> 32);
502 ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( 502 ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
503 wqe->wr.wr.atomic.remote_addr); 503 wqe->atomic_wr.remote_addr);
504 ohdr->u.atomic_eth.rkey = cpu_to_be32( 504 ohdr->u.atomic_eth.rkey = cpu_to_be32(
505 wqe->wr.wr.atomic.rkey); 505 wqe->atomic_wr.rkey);
506 hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); 506 hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
507 ss = NULL; 507 ss = NULL;
508 len = 0; 508 len = 0;
@@ -629,9 +629,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
629 */ 629 */
630 len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu; 630 len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu;
631 ohdr->u.rc.reth.vaddr = 631 ohdr->u.rc.reth.vaddr =
632 cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); 632 cpu_to_be64(wqe->rdma_wr.remote_addr + len);
633 ohdr->u.rc.reth.rkey = 633 ohdr->u.rc.reth.rkey =
634 cpu_to_be32(wqe->wr.wr.rdma.rkey); 634 cpu_to_be32(wqe->rdma_wr.rkey);
635 ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len); 635 ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
636 qp->s_state = OP(RDMA_READ_REQUEST); 636 qp->s_state = OP(RDMA_READ_REQUEST);
637 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); 637 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c
index 8614b070545c..49bc9fd7a51a 100644
--- a/drivers/staging/rdma/hfi1/ruc.c
+++ b/drivers/staging/rdma/hfi1/ruc.c
@@ -481,8 +481,8 @@ again:
481 if (wqe->length == 0) 481 if (wqe->length == 0)
482 break; 482 break;
483 if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, wqe->length, 483 if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
484 wqe->wr.wr.rdma.remote_addr, 484 wqe->rdma_wr.remote_addr,
485 wqe->wr.wr.rdma.rkey, 485 wqe->rdma_wr.rkey,
486 IB_ACCESS_REMOTE_WRITE))) 486 IB_ACCESS_REMOTE_WRITE)))
487 goto acc_err; 487 goto acc_err;
488 qp->r_sge.sg_list = NULL; 488 qp->r_sge.sg_list = NULL;
@@ -494,8 +494,8 @@ again:
494 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) 494 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
495 goto inv_err; 495 goto inv_err;
496 if (unlikely(!hfi1_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, 496 if (unlikely(!hfi1_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
497 wqe->wr.wr.rdma.remote_addr, 497 wqe->rdma_wr.remote_addr,
498 wqe->wr.wr.rdma.rkey, 498 wqe->rdma_wr.rkey,
499 IB_ACCESS_REMOTE_READ))) 499 IB_ACCESS_REMOTE_READ)))
500 goto acc_err; 500 goto acc_err;
501 release = 0; 501 release = 0;
@@ -512,18 +512,18 @@ again:
512 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) 512 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
513 goto inv_err; 513 goto inv_err;
514 if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), 514 if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
515 wqe->wr.wr.atomic.remote_addr, 515 wqe->atomic_wr.remote_addr,
516 wqe->wr.wr.atomic.rkey, 516 wqe->atomic_wr.rkey,
517 IB_ACCESS_REMOTE_ATOMIC))) 517 IB_ACCESS_REMOTE_ATOMIC)))
518 goto acc_err; 518 goto acc_err;
519 /* Perform atomic OP and save result. */ 519 /* Perform atomic OP and save result. */
520 maddr = (atomic64_t *) qp->r_sge.sge.vaddr; 520 maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
521 sdata = wqe->wr.wr.atomic.compare_add; 521 sdata = wqe->atomic_wr.compare_add;
522 *(u64 *) sqp->s_sge.sge.vaddr = 522 *(u64 *) sqp->s_sge.sge.vaddr =
523 (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? 523 (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
524 (u64) atomic64_add_return(sdata, maddr) - sdata : 524 (u64) atomic64_add_return(sdata, maddr) - sdata :
525 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, 525 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
526 sdata, wqe->wr.wr.atomic.swap); 526 sdata, wqe->atomic_wr.swap);
527 hfi1_put_mr(qp->r_sge.sge.mr); 527 hfi1_put_mr(qp->r_sge.sge.mr);
528 qp->r_sge.num_sge = 0; 528 qp->r_sge.num_sge = 0;
529 goto send_comp; 529 goto send_comp;
@@ -912,7 +912,7 @@ void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe,
912 if (qp->ibqp.qp_type == IB_QPT_UD || 912 if (qp->ibqp.qp_type == IB_QPT_UD ||
913 qp->ibqp.qp_type == IB_QPT_SMI || 913 qp->ibqp.qp_type == IB_QPT_SMI ||
914 qp->ibqp.qp_type == IB_QPT_GSI) 914 qp->ibqp.qp_type == IB_QPT_GSI)
915 atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); 915 atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
916 916
917 /* See ch. 11.2.4.1 and 10.7.3.1 */ 917 /* See ch. 11.2.4.1 and 10.7.3.1 */
918 if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) || 918 if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) ||
diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c
index b536f397737c..6095039c4485 100644
--- a/drivers/staging/rdma/hfi1/uc.c
+++ b/drivers/staging/rdma/hfi1/uc.c
@@ -147,9 +147,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
147 case IB_WR_RDMA_WRITE: 147 case IB_WR_RDMA_WRITE:
148 case IB_WR_RDMA_WRITE_WITH_IMM: 148 case IB_WR_RDMA_WRITE_WITH_IMM:
149 ohdr->u.rc.reth.vaddr = 149 ohdr->u.rc.reth.vaddr =
150 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 150 cpu_to_be64(wqe->rdma_wr.remote_addr);
151 ohdr->u.rc.reth.rkey = 151 ohdr->u.rc.reth.rkey =
152 cpu_to_be32(wqe->wr.wr.rdma.rkey); 152 cpu_to_be32(wqe->rdma_wr.rkey);
153 ohdr->u.rc.reth.length = cpu_to_be32(len); 153 ohdr->u.rc.reth.length = cpu_to_be32(len);
154 hwords += sizeof(struct ib_reth) / 4; 154 hwords += sizeof(struct ib_reth) / 4;
155 if (len > pmtu) { 155 if (len > pmtu) {
diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c
index d40d1a1e10aa..5a9c784bec04 100644
--- a/drivers/staging/rdma/hfi1/ud.c
+++ b/drivers/staging/rdma/hfi1/ud.c
@@ -80,7 +80,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
80 80
81 rcu_read_lock(); 81 rcu_read_lock();
82 82
83 qp = hfi1_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn); 83 qp = hfi1_lookup_qpn(ibp, swqe->ud_wr.remote_qpn);
84 if (!qp) { 84 if (!qp) {
85 ibp->n_pkt_drops++; 85 ibp->n_pkt_drops++;
86 rcu_read_unlock(); 86 rcu_read_unlock();
@@ -98,7 +98,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
98 goto drop; 98 goto drop;
99 } 99 }
100 100
101 ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr; 101 ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
102 ppd = ppd_from_ibp(ibp); 102 ppd = ppd_from_ibp(ibp);
103 103
104 if (qp->ibqp.qp_num > 1) { 104 if (qp->ibqp.qp_num > 1) {
@@ -128,8 +128,8 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
128 if (qp->ibqp.qp_num) { 128 if (qp->ibqp.qp_num) {
129 u32 qkey; 129 u32 qkey;
130 130
131 qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ? 131 qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
132 sqp->qkey : swqe->wr.wr.ud.remote_qkey; 132 sqp->qkey : swqe->ud_wr.remote_qkey;
133 if (unlikely(qkey != qp->qkey)) { 133 if (unlikely(qkey != qp->qkey)) {
134 u16 lid; 134 u16 lid;
135 135
@@ -234,7 +234,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
234 if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) { 234 if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) {
235 if (sqp->ibqp.qp_type == IB_QPT_GSI || 235 if (sqp->ibqp.qp_type == IB_QPT_GSI ||
236 sqp->ibqp.qp_type == IB_QPT_SMI) 236 sqp->ibqp.qp_type == IB_QPT_SMI)
237 wc.pkey_index = swqe->wr.wr.ud.pkey_index; 237 wc.pkey_index = swqe->ud_wr.pkey_index;
238 else 238 else
239 wc.pkey_index = sqp->s_pkey_index; 239 wc.pkey_index = sqp->s_pkey_index;
240 } else { 240 } else {
@@ -309,7 +309,7 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
309 /* Construct the header. */ 309 /* Construct the header. */
310 ibp = to_iport(qp->ibqp.device, qp->port_num); 310 ibp = to_iport(qp->ibqp.device, qp->port_num);
311 ppd = ppd_from_ibp(ibp); 311 ppd = ppd_from_ibp(ibp);
312 ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; 312 ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
313 if (ah_attr->dlid < HFI1_MULTICAST_LID_BASE || 313 if (ah_attr->dlid < HFI1_MULTICAST_LID_BASE ||
314 ah_attr->dlid == HFI1_PERMISSIVE_LID) { 314 ah_attr->dlid == HFI1_PERMISSIVE_LID) {
315 lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); 315 lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
@@ -401,18 +401,18 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
401 bth0 |= IB_BTH_SOLICITED; 401 bth0 |= IB_BTH_SOLICITED;
402 bth0 |= extra_bytes << 20; 402 bth0 |= extra_bytes << 20;
403 if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) 403 if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI)
404 bth0 |= hfi1_get_pkey(ibp, wqe->wr.wr.ud.pkey_index); 404 bth0 |= hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index);
405 else 405 else
406 bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); 406 bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
407 ohdr->bth[0] = cpu_to_be32(bth0); 407 ohdr->bth[0] = cpu_to_be32(bth0);
408 ohdr->bth[1] = cpu_to_be32(wqe->wr.wr.ud.remote_qpn); 408 ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn);
409 ohdr->bth[2] = cpu_to_be32(mask_psn(qp->s_next_psn++)); 409 ohdr->bth[2] = cpu_to_be32(mask_psn(qp->s_next_psn++));
410 /* 410 /*
411 * Qkeys with the high order bit set mean use the 411 * Qkeys with the high order bit set mean use the
412 * qkey from the QP context instead of the WR (see 10.2.5). 412 * qkey from the QP context instead of the WR (see 10.2.5).
413 */ 413 */
414 ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ? 414 ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
415 qp->qkey : wqe->wr.wr.ud.remote_qkey); 415 qp->qkey : wqe->ud_wr.remote_qkey);
416 ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); 416 ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
417 /* disarm any ahg */ 417 /* disarm any ahg */
418 qp->s_hdr->ahgcount = 0; 418 qp->s_hdr->ahgcount = 0;
diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c
index a13a2b135365..9beb0aa876f0 100644
--- a/drivers/staging/rdma/hfi1/verbs.c
+++ b/drivers/staging/rdma/hfi1/verbs.c
@@ -383,9 +383,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
383 * undefined operations. 383 * undefined operations.
384 * Make sure buffer is large enough to hold the result for atomics. 384 * Make sure buffer is large enough to hold the result for atomics.
385 */ 385 */
386 if (wr->opcode == IB_WR_FAST_REG_MR) { 386 if (qp->ibqp.qp_type == IB_QPT_UC) {
387 return -EINVAL;
388 } else if (qp->ibqp.qp_type == IB_QPT_UC) {
389 if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) 387 if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
390 return -EINVAL; 388 return -EINVAL;
391 } else if (qp->ibqp.qp_type != IB_QPT_RC) { 389 } else if (qp->ibqp.qp_type != IB_QPT_RC) {
@@ -394,7 +392,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
394 wr->opcode != IB_WR_SEND_WITH_IMM) 392 wr->opcode != IB_WR_SEND_WITH_IMM)
395 return -EINVAL; 393 return -EINVAL;
396 /* Check UD destination address PD */ 394 /* Check UD destination address PD */
397 if (qp->ibqp.pd != wr->wr.ud.ah->pd) 395 if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
398 return -EINVAL; 396 return -EINVAL;
399 } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) 397 } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
400 return -EINVAL; 398 return -EINVAL;
@@ -415,7 +413,21 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
415 rkt = &to_idev(qp->ibqp.device)->lk_table; 413 rkt = &to_idev(qp->ibqp.device)->lk_table;
416 pd = to_ipd(qp->ibqp.pd); 414 pd = to_ipd(qp->ibqp.pd);
417 wqe = get_swqe_ptr(qp, qp->s_head); 415 wqe = get_swqe_ptr(qp, qp->s_head);
418 wqe->wr = *wr; 416
417
418 if (qp->ibqp.qp_type != IB_QPT_UC &&
419 qp->ibqp.qp_type != IB_QPT_RC)
420 memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
421 else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
422 wr->opcode == IB_WR_RDMA_WRITE ||
423 wr->opcode == IB_WR_RDMA_READ)
424 memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
425 else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
426 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
427 memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
428 else
429 memcpy(&wqe->wr, wr, sizeof(wqe->wr));
430
419 wqe->length = 0; 431 wqe->length = 0;
420 j = 0; 432 j = 0;
421 if (wr->num_sge) { 433 if (wr->num_sge) {
@@ -441,7 +453,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
441 if (wqe->length > 0x80000000U) 453 if (wqe->length > 0x80000000U)
442 goto bail_inval_free; 454 goto bail_inval_free;
443 } else { 455 } else {
444 struct hfi1_ah *ah = to_iah(wr->wr.ud.ah); 456 struct hfi1_ah *ah = to_iah(ud_wr(wr)->ah);
445 457
446 atomic_inc(&ah->refcount); 458 atomic_inc(&ah->refcount);
447 } 459 }
@@ -2055,8 +2067,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
2055 ibdev->reg_user_mr = hfi1_reg_user_mr; 2067 ibdev->reg_user_mr = hfi1_reg_user_mr;
2056 ibdev->dereg_mr = hfi1_dereg_mr; 2068 ibdev->dereg_mr = hfi1_dereg_mr;
2057 ibdev->alloc_mr = hfi1_alloc_mr; 2069 ibdev->alloc_mr = hfi1_alloc_mr;
2058 ibdev->alloc_fast_reg_page_list = hfi1_alloc_fast_reg_page_list;
2059 ibdev->free_fast_reg_page_list = hfi1_free_fast_reg_page_list;
2060 ibdev->alloc_fmr = hfi1_alloc_fmr; 2070 ibdev->alloc_fmr = hfi1_alloc_fmr;
2061 ibdev->map_phys_fmr = hfi1_map_phys_fmr; 2071 ibdev->map_phys_fmr = hfi1_map_phys_fmr;
2062 ibdev->unmap_fmr = hfi1_unmap_fmr; 2072 ibdev->unmap_fmr = hfi1_unmap_fmr;
diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h
index e4a8a0d4ccf8..041ad07ee699 100644
--- a/drivers/staging/rdma/hfi1/verbs.h
+++ b/drivers/staging/rdma/hfi1/verbs.h
@@ -348,7 +348,12 @@ struct hfi1_mr {
348 * in qp->s_max_sge. 348 * in qp->s_max_sge.
349 */ 349 */
350struct hfi1_swqe { 350struct hfi1_swqe {
351 struct ib_send_wr wr; /* don't use wr.sg_list */ 351 union {
352 struct ib_send_wr wr; /* don't use wr.sg_list */
353 struct ib_rdma_wr rdma_wr;
354 struct ib_atomic_wr atomic_wr;
355 struct ib_ud_wr ud_wr;
356 };
352 u32 psn; /* first packet sequence number */ 357 u32 psn; /* first packet sequence number */
353 u32 lpsn; /* last packet sequence number */ 358 u32 lpsn; /* last packet sequence number */
354 u32 ssn; /* send sequence number */ 359 u32 ssn; /* send sequence number */
@@ -1021,13 +1026,6 @@ struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd,
1021 enum ib_mr_type mr_type, 1026 enum ib_mr_type mr_type,
1022 u32 max_entries); 1027 u32 max_entries);
1023 1028
1024struct ib_fast_reg_page_list *hfi1_alloc_fast_reg_page_list(
1025 struct ib_device *ibdev, int page_list_len);
1026
1027void hfi1_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl);
1028
1029int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_send_wr *wr);
1030
1031struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags, 1029struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
1032 struct ib_fmr_attr *fmr_attr); 1030 struct ib_fmr_attr *fmr_attr);
1033 1031
diff --git a/drivers/staging/rdma/ipath/ipath_rc.c b/drivers/staging/rdma/ipath/ipath_rc.c
index 79b3dbc97179..d4aa53574e57 100644
--- a/drivers/staging/rdma/ipath/ipath_rc.c
+++ b/drivers/staging/rdma/ipath/ipath_rc.c
@@ -350,9 +350,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
350 goto bail; 350 goto bail;
351 } 351 }
352 ohdr->u.rc.reth.vaddr = 352 ohdr->u.rc.reth.vaddr =
353 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 353 cpu_to_be64(wqe->rdma_wr.remote_addr);
354 ohdr->u.rc.reth.rkey = 354 ohdr->u.rc.reth.rkey =
355 cpu_to_be32(wqe->wr.wr.rdma.rkey); 355 cpu_to_be32(wqe->rdma_wr.rkey);
356 ohdr->u.rc.reth.length = cpu_to_be32(len); 356 ohdr->u.rc.reth.length = cpu_to_be32(len);
357 hwords += sizeof(struct ib_reth) / sizeof(u32); 357 hwords += sizeof(struct ib_reth) / sizeof(u32);
358 wqe->lpsn = wqe->psn; 358 wqe->lpsn = wqe->psn;
@@ -401,9 +401,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
401 wqe->lpsn = qp->s_next_psn++; 401 wqe->lpsn = qp->s_next_psn++;
402 } 402 }
403 ohdr->u.rc.reth.vaddr = 403 ohdr->u.rc.reth.vaddr =
404 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 404 cpu_to_be64(wqe->rdma_wr.remote_addr);
405 ohdr->u.rc.reth.rkey = 405 ohdr->u.rc.reth.rkey =
406 cpu_to_be32(wqe->wr.wr.rdma.rkey); 406 cpu_to_be32(wqe->rdma_wr.rkey);
407 ohdr->u.rc.reth.length = cpu_to_be32(len); 407 ohdr->u.rc.reth.length = cpu_to_be32(len);
408 qp->s_state = OP(RDMA_READ_REQUEST); 408 qp->s_state = OP(RDMA_READ_REQUEST);
409 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); 409 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
@@ -433,21 +433,21 @@ int ipath_make_rc_req(struct ipath_qp *qp)
433 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 433 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
434 qp->s_state = OP(COMPARE_SWAP); 434 qp->s_state = OP(COMPARE_SWAP);
435 ohdr->u.atomic_eth.swap_data = cpu_to_be64( 435 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
436 wqe->wr.wr.atomic.swap); 436 wqe->atomic_wr.swap);
437 ohdr->u.atomic_eth.compare_data = cpu_to_be64( 437 ohdr->u.atomic_eth.compare_data = cpu_to_be64(
438 wqe->wr.wr.atomic.compare_add); 438 wqe->atomic_wr.compare_add);
439 } else { 439 } else {
440 qp->s_state = OP(FETCH_ADD); 440 qp->s_state = OP(FETCH_ADD);
441 ohdr->u.atomic_eth.swap_data = cpu_to_be64( 441 ohdr->u.atomic_eth.swap_data = cpu_to_be64(
442 wqe->wr.wr.atomic.compare_add); 442 wqe->atomic_wr.compare_add);
443 ohdr->u.atomic_eth.compare_data = 0; 443 ohdr->u.atomic_eth.compare_data = 0;
444 } 444 }
445 ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( 445 ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
446 wqe->wr.wr.atomic.remote_addr >> 32); 446 wqe->atomic_wr.remote_addr >> 32);
447 ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( 447 ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
448 wqe->wr.wr.atomic.remote_addr); 448 wqe->atomic_wr.remote_addr);
449 ohdr->u.atomic_eth.rkey = cpu_to_be32( 449 ohdr->u.atomic_eth.rkey = cpu_to_be32(
450 wqe->wr.wr.atomic.rkey); 450 wqe->atomic_wr.rkey);
451 hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); 451 hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
452 ss = NULL; 452 ss = NULL;
453 len = 0; 453 len = 0;
@@ -567,9 +567,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
567 ipath_init_restart(qp, wqe); 567 ipath_init_restart(qp, wqe);
568 len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu; 568 len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
569 ohdr->u.rc.reth.vaddr = 569 ohdr->u.rc.reth.vaddr =
570 cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); 570 cpu_to_be64(wqe->rdma_wr.remote_addr + len);
571 ohdr->u.rc.reth.rkey = 571 ohdr->u.rc.reth.rkey =
572 cpu_to_be32(wqe->wr.wr.rdma.rkey); 572 cpu_to_be32(wqe->rdma_wr.rkey);
573 ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); 573 ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
574 qp->s_state = OP(RDMA_READ_REQUEST); 574 qp->s_state = OP(RDMA_READ_REQUEST);
575 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); 575 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
diff --git a/drivers/staging/rdma/ipath/ipath_ruc.c b/drivers/staging/rdma/ipath/ipath_ruc.c
index 2296832f94da..e541a01f1f61 100644
--- a/drivers/staging/rdma/ipath/ipath_ruc.c
+++ b/drivers/staging/rdma/ipath/ipath_ruc.c
@@ -352,8 +352,8 @@ again:
352 if (wqe->length == 0) 352 if (wqe->length == 0)
353 break; 353 break;
354 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, 354 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
355 wqe->wr.wr.rdma.remote_addr, 355 wqe->rdma_wr.remote_addr,
356 wqe->wr.wr.rdma.rkey, 356 wqe->rdma_wr.rkey,
357 IB_ACCESS_REMOTE_WRITE))) 357 IB_ACCESS_REMOTE_WRITE)))
358 goto acc_err; 358 goto acc_err;
359 break; 359 break;
@@ -362,8 +362,8 @@ again:
362 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) 362 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
363 goto inv_err; 363 goto inv_err;
364 if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, 364 if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
365 wqe->wr.wr.rdma.remote_addr, 365 wqe->rdma_wr.remote_addr,
366 wqe->wr.wr.rdma.rkey, 366 wqe->rdma_wr.rkey,
367 IB_ACCESS_REMOTE_READ))) 367 IB_ACCESS_REMOTE_READ)))
368 goto acc_err; 368 goto acc_err;
369 qp->r_sge.sge = wqe->sg_list[0]; 369 qp->r_sge.sge = wqe->sg_list[0];
@@ -376,18 +376,18 @@ again:
376 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) 376 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
377 goto inv_err; 377 goto inv_err;
378 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), 378 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
379 wqe->wr.wr.atomic.remote_addr, 379 wqe->atomic_wr.remote_addr,
380 wqe->wr.wr.atomic.rkey, 380 wqe->atomic_wr.rkey,
381 IB_ACCESS_REMOTE_ATOMIC))) 381 IB_ACCESS_REMOTE_ATOMIC)))
382 goto acc_err; 382 goto acc_err;
383 /* Perform atomic OP and save result. */ 383 /* Perform atomic OP and save result. */
384 maddr = (atomic64_t *) qp->r_sge.sge.vaddr; 384 maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
385 sdata = wqe->wr.wr.atomic.compare_add; 385 sdata = wqe->atomic_wr.compare_add;
386 *(u64 *) sqp->s_sge.sge.vaddr = 386 *(u64 *) sqp->s_sge.sge.vaddr =
387 (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? 387 (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
388 (u64) atomic64_add_return(sdata, maddr) - sdata : 388 (u64) atomic64_add_return(sdata, maddr) - sdata :
389 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, 389 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
390 sdata, wqe->wr.wr.atomic.swap); 390 sdata, wqe->atomic_wr.swap);
391 goto send_comp; 391 goto send_comp;
392 392
393 default: 393 default:
diff --git a/drivers/staging/rdma/ipath/ipath_uc.c b/drivers/staging/rdma/ipath/ipath_uc.c
index 22e60998f1a7..0246b30280b9 100644
--- a/drivers/staging/rdma/ipath/ipath_uc.c
+++ b/drivers/staging/rdma/ipath/ipath_uc.c
@@ -126,9 +126,9 @@ int ipath_make_uc_req(struct ipath_qp *qp)
126 case IB_WR_RDMA_WRITE: 126 case IB_WR_RDMA_WRITE:
127 case IB_WR_RDMA_WRITE_WITH_IMM: 127 case IB_WR_RDMA_WRITE_WITH_IMM:
128 ohdr->u.rc.reth.vaddr = 128 ohdr->u.rc.reth.vaddr =
129 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 129 cpu_to_be64(wqe->rdma_wr.remote_addr);
130 ohdr->u.rc.reth.rkey = 130 ohdr->u.rc.reth.rkey =
131 cpu_to_be32(wqe->wr.wr.rdma.rkey); 131 cpu_to_be32(wqe->rdma_wr.rkey);
132 ohdr->u.rc.reth.length = cpu_to_be32(len); 132 ohdr->u.rc.reth.length = cpu_to_be32(len);
133 hwords += sizeof(struct ib_reth) / 4; 133 hwords += sizeof(struct ib_reth) / 4;
134 if (len > pmtu) { 134 if (len > pmtu) {
diff --git a/drivers/staging/rdma/ipath/ipath_ud.c b/drivers/staging/rdma/ipath/ipath_ud.c
index 33fcfe206bc9..385d9410a51e 100644
--- a/drivers/staging/rdma/ipath/ipath_ud.c
+++ b/drivers/staging/rdma/ipath/ipath_ud.c
@@ -64,7 +64,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
64 u32 rlen; 64 u32 rlen;
65 u32 length; 65 u32 length;
66 66
67 qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn); 67 qp = ipath_lookup_qpn(&dev->qp_table, swqe->ud_wr.remote_qpn);
68 if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { 68 if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
69 dev->n_pkt_drops++; 69 dev->n_pkt_drops++;
70 goto done; 70 goto done;
@@ -76,8 +76,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
76 * qkey from the QP context instead of the WR (see 10.2.5). 76 * qkey from the QP context instead of the WR (see 10.2.5).
77 */ 77 */
78 if (unlikely(qp->ibqp.qp_num && 78 if (unlikely(qp->ibqp.qp_num &&
79 ((int) swqe->wr.wr.ud.remote_qkey < 0 ? 79 ((int) swqe->ud_wr.remote_qkey < 0 ?
80 sqp->qkey : swqe->wr.wr.ud.remote_qkey) != qp->qkey)) { 80 sqp->qkey : swqe->ud_wr.remote_qkey) != qp->qkey)) {
81 /* XXX OK to lose a count once in a while. */ 81 /* XXX OK to lose a count once in a while. */
82 dev->qkey_violations++; 82 dev->qkey_violations++;
83 dev->n_pkt_drops++; 83 dev->n_pkt_drops++;
@@ -174,7 +174,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
174 } else 174 } else
175 spin_unlock_irqrestore(&rq->lock, flags); 175 spin_unlock_irqrestore(&rq->lock, flags);
176 176
177 ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr; 177 ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
178 if (ah_attr->ah_flags & IB_AH_GRH) { 178 if (ah_attr->ah_flags & IB_AH_GRH) {
179 ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh)); 179 ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
180 wc.wc_flags |= IB_WC_GRH; 180 wc.wc_flags |= IB_WC_GRH;
@@ -224,7 +224,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
224 wc.port_num = 1; 224 wc.port_num = 1;
225 /* Signal completion event if the solicited bit is set. */ 225 /* Signal completion event if the solicited bit is set. */
226 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 226 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
227 swqe->wr.send_flags & IB_SEND_SOLICITED); 227 swqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED);
228drop: 228drop:
229 if (atomic_dec_and_test(&qp->refcount)) 229 if (atomic_dec_and_test(&qp->refcount))
230 wake_up(&qp->wait); 230 wake_up(&qp->wait);
@@ -279,7 +279,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
279 next_cur = 0; 279 next_cur = 0;
280 280
281 /* Construct the header. */ 281 /* Construct the header. */
282 ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; 282 ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
283 if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) { 283 if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) {
284 if (ah_attr->dlid != IPATH_PERMISSIVE_LID) 284 if (ah_attr->dlid != IPATH_PERMISSIVE_LID)
285 dev->n_multicast_xmit++; 285 dev->n_multicast_xmit++;
@@ -321,7 +321,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
321 qp->s_wqe = wqe; 321 qp->s_wqe = wqe;
322 qp->s_sge.sge = wqe->sg_list[0]; 322 qp->s_sge.sge = wqe->sg_list[0];
323 qp->s_sge.sg_list = wqe->sg_list + 1; 323 qp->s_sge.sg_list = wqe->sg_list + 1;
324 qp->s_sge.num_sge = wqe->wr.num_sge; 324 qp->s_sge.num_sge = wqe->ud_wr.wr.num_sge;
325 325
326 if (ah_attr->ah_flags & IB_AH_GRH) { 326 if (ah_attr->ah_flags & IB_AH_GRH) {
327 /* Header size in 32-bit words. */ 327 /* Header size in 32-bit words. */
@@ -339,9 +339,9 @@ int ipath_make_ud_req(struct ipath_qp *qp)
339 lrh0 = IPATH_LRH_BTH; 339 lrh0 = IPATH_LRH_BTH;
340 ohdr = &qp->s_hdr.u.oth; 340 ohdr = &qp->s_hdr.u.oth;
341 } 341 }
342 if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { 342 if (wqe->ud_wr.wr.opcode == IB_WR_SEND_WITH_IMM) {
343 qp->s_hdrwords++; 343 qp->s_hdrwords++;
344 ohdr->u.ud.imm_data = wqe->wr.ex.imm_data; 344 ohdr->u.ud.imm_data = wqe->ud_wr.wr.ex.imm_data;
345 bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; 345 bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
346 } else 346 } else
347 bth0 = IB_OPCODE_UD_SEND_ONLY << 24; 347 bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
@@ -359,7 +359,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
359 qp->s_hdr.lrh[3] = cpu_to_be16(lid); 359 qp->s_hdr.lrh[3] = cpu_to_be16(lid);
360 } else 360 } else
361 qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE; 361 qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
362 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 362 if (wqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED)
363 bth0 |= 1 << 23; 363 bth0 |= 1 << 23;
364 bth0 |= extra_bytes << 20; 364 bth0 |= extra_bytes << 20;
365 bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY : 365 bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
@@ -371,14 +371,14 @@ int ipath_make_ud_req(struct ipath_qp *qp)
371 ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && 371 ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
372 ah_attr->dlid != IPATH_PERMISSIVE_LID ? 372 ah_attr->dlid != IPATH_PERMISSIVE_LID ?
373 cpu_to_be32(IPATH_MULTICAST_QPN) : 373 cpu_to_be32(IPATH_MULTICAST_QPN) :
374 cpu_to_be32(wqe->wr.wr.ud.remote_qpn); 374 cpu_to_be32(wqe->ud_wr.remote_qpn);
375 ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK); 375 ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK);
376 /* 376 /*
377 * Qkeys with the high order bit set mean use the 377 * Qkeys with the high order bit set mean use the
378 * qkey from the QP context instead of the WR (see 10.2.5). 378 * qkey from the QP context instead of the WR (see 10.2.5).
379 */ 379 */
380 ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ? 380 ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
381 qp->qkey : wqe->wr.wr.ud.remote_qkey); 381 qp->qkey : wqe->ud_wr.remote_qkey);
382 ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); 382 ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
383 383
384done: 384done:
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.c b/drivers/staging/rdma/ipath/ipath_verbs.c
index a2fb41bba117..1778dee13f99 100644
--- a/drivers/staging/rdma/ipath/ipath_verbs.c
+++ b/drivers/staging/rdma/ipath/ipath_verbs.c
@@ -374,7 +374,7 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
374 wr->opcode != IB_WR_SEND_WITH_IMM) 374 wr->opcode != IB_WR_SEND_WITH_IMM)
375 goto bail_inval; 375 goto bail_inval;
376 /* Check UD destination address PD */ 376 /* Check UD destination address PD */
377 if (qp->ibqp.pd != wr->wr.ud.ah->pd) 377 if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
378 goto bail_inval; 378 goto bail_inval;
379 } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) 379 } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
380 goto bail_inval; 380 goto bail_inval;
@@ -395,7 +395,20 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
395 } 395 }
396 396
397 wqe = get_swqe_ptr(qp, qp->s_head); 397 wqe = get_swqe_ptr(qp, qp->s_head);
398 wqe->wr = *wr; 398
399 if (qp->ibqp.qp_type != IB_QPT_UC &&
400 qp->ibqp.qp_type != IB_QPT_RC)
401 memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
402 else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
403 wr->opcode == IB_WR_RDMA_WRITE ||
404 wr->opcode == IB_WR_RDMA_READ)
405 memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
406 else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
407 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
408 memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
409 else
410 memcpy(&wqe->wr, wr, sizeof(wqe->wr));
411
399 wqe->length = 0; 412 wqe->length = 0;
400 if (wr->num_sge) { 413 if (wr->num_sge) {
401 acc = wr->opcode >= IB_WR_RDMA_READ ? 414 acc = wr->opcode >= IB_WR_RDMA_READ ?
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.h b/drivers/staging/rdma/ipath/ipath_verbs.h
index ec167e545e15..0a90a56870ab 100644
--- a/drivers/staging/rdma/ipath/ipath_verbs.h
+++ b/drivers/staging/rdma/ipath/ipath_verbs.h
@@ -277,7 +277,13 @@ struct ipath_mr {
277 * in qp->s_max_sge. 277 * in qp->s_max_sge.
278 */ 278 */
279struct ipath_swqe { 279struct ipath_swqe {
280 struct ib_send_wr wr; /* don't use wr.sg_list */ 280 union {
281 struct ib_send_wr wr; /* don't use wr.sg_list */
282 struct ib_ud_wr ud_wr;
283 struct ib_rdma_wr rdma_wr;
284 struct ib_atomic_wr atomic_wr;
285 };
286
281 u32 psn; /* first packet sequence number */ 287 u32 psn; /* first packet sequence number */
282 u32 lpsn; /* last packet sequence number */ 288 u32 lpsn; /* last packet sequence number */
283 u32 ssn; /* send sequence number */ 289 u32 ssn; /* send sequence number */
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 5a8677bafe04..7501626ab529 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -214,6 +214,8 @@ enum {
214 MLX4_DEV_CAP_FLAG2_IGNORE_FCS = 1LL << 28, 214 MLX4_DEV_CAP_FLAG2_IGNORE_FCS = 1LL << 28,
215 MLX4_DEV_CAP_FLAG2_PHV_EN = 1LL << 29, 215 MLX4_DEV_CAP_FLAG2_PHV_EN = 1LL << 29,
216 MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN = 1LL << 30, 216 MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN = 1LL << 30,
217 MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31,
218 MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32,
217}; 219};
218 220
219enum { 221enum {
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index de45a51b3f04..fe052e234906 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -135,7 +135,10 @@ struct mlx4_rss_context {
135 135
136struct mlx4_qp_path { 136struct mlx4_qp_path {
137 u8 fl; 137 u8 fl;
138 u8 vlan_control; 138 union {
139 u8 vlan_control;
140 u8 control;
141 };
139 u8 disable_pkey_check; 142 u8 disable_pkey_check;
140 u8 pkey_index; 143 u8 pkey_index;
141 u8 counter_index; 144 u8 counter_index;
@@ -156,9 +159,16 @@ struct mlx4_qp_path {
156}; 159};
157 160
158enum { /* fl */ 161enum { /* fl */
159 MLX4_FL_CV = 1 << 6, 162 MLX4_FL_CV = 1 << 6,
160 MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2 163 MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2,
164 MLX4_FL_ETH_SRC_CHECK_MC_LB = 1 << 1,
165 MLX4_FL_ETH_SRC_CHECK_UC_LB = 1 << 0,
161}; 166};
167
168enum { /* control */
169 MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER = 1 << 7,
170};
171
162enum { /* vlan_control */ 172enum { /* vlan_control */
163 MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED = 1 << 6, 173 MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED = 1 << 6,
164 MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED = 1 << 5, /* 802.1p priority tag */ 174 MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED = 1 << 5, /* 802.1p priority tag */
@@ -254,6 +264,8 @@ enum {
254 MLX4_UPD_QP_PATH_MASK_SCHED_QUEUE = 14 + 32, 264 MLX4_UPD_QP_PATH_MASK_SCHED_QUEUE = 14 + 32,
255 MLX4_UPD_QP_PATH_MASK_IF_COUNTER_INDEX = 15 + 32, 265 MLX4_UPD_QP_PATH_MASK_IF_COUNTER_INDEX = 15 + 32,
256 MLX4_UPD_QP_PATH_MASK_FVL_RX = 16 + 32, 266 MLX4_UPD_QP_PATH_MASK_FVL_RX = 16 + 32,
267 MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_UC_LB = 18 + 32,
268 MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB = 19 + 32,
257}; 269};
258 270
259enum { /* param3 */ 271enum { /* param3 */
@@ -436,11 +448,13 @@ enum mlx4_update_qp_attr {
436 MLX4_UPDATE_QP_VSD = 1 << 1, 448 MLX4_UPDATE_QP_VSD = 1 << 1,
437 MLX4_UPDATE_QP_RATE_LIMIT = 1 << 2, 449 MLX4_UPDATE_QP_RATE_LIMIT = 1 << 2,
438 MLX4_UPDATE_QP_QOS_VPORT = 1 << 3, 450 MLX4_UPDATE_QP_QOS_VPORT = 1 << 3,
439 MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 4) - 1 451 MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB = 1 << 4,
452 MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 5) - 1
440}; 453};
441 454
442enum mlx4_update_qp_params_flags { 455enum mlx4_update_qp_params_flags {
443 MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE = 1 << 0, 456 MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB = 1 << 0,
457 MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE = 1 << 1,
444}; 458};
445 459
446struct mlx4_update_qp_params { 460struct mlx4_update_qp_params {
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 7ccc961f33e9..1e4438ea2380 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -105,11 +105,9 @@ struct svc_rdma_chunk_sge {
105}; 105};
106struct svc_rdma_fastreg_mr { 106struct svc_rdma_fastreg_mr {
107 struct ib_mr *mr; 107 struct ib_mr *mr;
108 void *kva; 108 struct scatterlist *sg;
109 struct ib_fast_reg_page_list *page_list; 109 int sg_nents;
110 int page_list_len;
111 unsigned long access_flags; 110 unsigned long access_flags;
112 unsigned long map_len;
113 enum dma_data_direction direction; 111 enum dma_data_direction direction;
114 struct list_head frmr_list; 112 struct list_head frmr_list;
115}; 113};
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index fde33ac6b58a..11528591d0d7 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -47,6 +47,7 @@
47#include <rdma/ib_verbs.h> 47#include <rdma/ib_verbs.h>
48#include <rdma/ib_pack.h> 48#include <rdma/ib_pack.h>
49#include <net/ipv6.h> 49#include <net/ipv6.h>
50#include <net/net_namespace.h>
50 51
51struct rdma_addr_client { 52struct rdma_addr_client {
52 atomic_t refcount; 53 atomic_t refcount;
@@ -64,6 +65,16 @@ void rdma_addr_register_client(struct rdma_addr_client *client);
64 */ 65 */
65void rdma_addr_unregister_client(struct rdma_addr_client *client); 66void rdma_addr_unregister_client(struct rdma_addr_client *client);
66 67
68/**
69 * struct rdma_dev_addr - Contains resolved RDMA hardware addresses
70 * @src_dev_addr: Source MAC address.
71 * @dst_dev_addr: Destination MAC address.
72 * @broadcast: Broadcast address of the device.
73 * @dev_type: The interface hardware type of the device.
74 * @bound_dev_if: An optional device interface index.
75 * @transport: The transport type used.
76 * @net: Network namespace containing the bound_dev_if net_dev.
77 */
67struct rdma_dev_addr { 78struct rdma_dev_addr {
68 unsigned char src_dev_addr[MAX_ADDR_LEN]; 79 unsigned char src_dev_addr[MAX_ADDR_LEN];
69 unsigned char dst_dev_addr[MAX_ADDR_LEN]; 80 unsigned char dst_dev_addr[MAX_ADDR_LEN];
@@ -71,11 +82,14 @@ struct rdma_dev_addr {
71 unsigned short dev_type; 82 unsigned short dev_type;
72 int bound_dev_if; 83 int bound_dev_if;
73 enum rdma_transport_type transport; 84 enum rdma_transport_type transport;
85 struct net *net;
74}; 86};
75 87
76/** 88/**
77 * rdma_translate_ip - Translate a local IP address to an RDMA hardware 89 * rdma_translate_ip - Translate a local IP address to an RDMA hardware
78 * address. 90 * address.
91 *
92 * The dev_addr->net field must be initialized.
79 */ 93 */
80int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, 94int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
81 u16 *vlan_id); 95 u16 *vlan_id);
@@ -90,7 +104,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
90 * @dst_addr: The destination address to resolve. 104 * @dst_addr: The destination address to resolve.
91 * @addr: A reference to a data location that will receive the resolved 105 * @addr: A reference to a data location that will receive the resolved
92 * addresses. The data location must remain valid until the callback has 106 * addresses. The data location must remain valid until the callback has
93 * been invoked. 107 * been invoked. The net field of the addr struct must be valid.
94 * @timeout_ms: Amount of time to wait for the address resolution to complete. 108 * @timeout_ms: Amount of time to wait for the address resolution to complete.
95 * @callback: Call invoked once address resolution has completed, timed out, 109 * @callback: Call invoked once address resolution has completed, timed out,
96 * or been canceled. A status of 0 indicates success. 110 * or been canceled. A status of 0 indicates success.
@@ -112,7 +126,7 @@ int rdma_addr_size(struct sockaddr *addr);
112 126
113int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id); 127int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
114int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, 128int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
115 u8 *smac, u16 *vlan_id); 129 u8 *smac, u16 *vlan_id, int if_index);
116 130
117static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) 131static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
118{ 132{
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index bd92130f4ac5..269a27cf0a46 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -43,6 +43,8 @@
43 * @port_num: The port number of the device to query. 43 * @port_num: The port number of the device to query.
44 * @index: The index into the cached GID table to query. 44 * @index: The index into the cached GID table to query.
45 * @gid: The GID value found at the specified index. 45 * @gid: The GID value found at the specified index.
46 * @attr: The GID attribute found at the specified index (only in RoCE).
47 * NULL means ignore (output parameter).
46 * 48 *
47 * ib_get_cached_gid() fetches the specified GID table entry stored in 49 * ib_get_cached_gid() fetches the specified GID table entry stored in
48 * the local software cache. 50 * the local software cache.
@@ -50,13 +52,15 @@
50int ib_get_cached_gid(struct ib_device *device, 52int ib_get_cached_gid(struct ib_device *device,
51 u8 port_num, 53 u8 port_num,
52 int index, 54 int index,
53 union ib_gid *gid); 55 union ib_gid *gid,
56 struct ib_gid_attr *attr);
54 57
55/** 58/**
56 * ib_find_cached_gid - Returns the port number and GID table index where 59 * ib_find_cached_gid - Returns the port number and GID table index where
57 * a specified GID value occurs. 60 * a specified GID value occurs.
58 * @device: The device to query. 61 * @device: The device to query.
59 * @gid: The GID value to search for. 62 * @gid: The GID value to search for.
63 * @ndev: In RoCE, the net device of the device. NULL means ignore.
60 * @port_num: The port number of the device where the GID value was found. 64 * @port_num: The port number of the device where the GID value was found.
61 * @index: The index into the cached GID table where the GID was found. This 65 * @index: The index into the cached GID table where the GID was found. This
62 * parameter may be NULL. 66 * parameter may be NULL.
@@ -64,12 +68,40 @@ int ib_get_cached_gid(struct ib_device *device,
64 * ib_find_cached_gid() searches for the specified GID value in 68 * ib_find_cached_gid() searches for the specified GID value in
65 * the local software cache. 69 * the local software cache.
66 */ 70 */
67int ib_find_cached_gid(struct ib_device *device, 71int ib_find_cached_gid(struct ib_device *device,
68 const union ib_gid *gid, 72 const union ib_gid *gid,
69 u8 *port_num, 73 struct net_device *ndev,
70 u16 *index); 74 u8 *port_num,
75 u16 *index);
71 76
72/** 77/**
78 * ib_find_cached_gid_by_port - Returns the GID table index where a specified
79 * GID value occurs
80 * @device: The device to query.
81 * @gid: The GID value to search for.
82 * @port_num: The port number of the device where the GID value sould be
83 * searched.
84 * @ndev: In RoCE, the net device of the device. Null means ignore.
85 * @index: The index into the cached GID table where the GID was found. This
86 * parameter may be NULL.
87 *
88 * ib_find_cached_gid() searches for the specified GID value in
89 * the local software cache.
90 */
91int ib_find_cached_gid_by_port(struct ib_device *device,
92 const union ib_gid *gid,
93 u8 port_num,
94 struct net_device *ndev,
95 u16 *index);
96
97int ib_find_gid_by_filter(struct ib_device *device,
98 const union ib_gid *gid,
99 u8 port_num,
100 bool (*filter)(const union ib_gid *gid,
101 const struct ib_gid_attr *,
102 void *),
103 void *context, u16 *index);
104/**
73 * ib_get_cached_pkey - Returns a cached PKey table entry 105 * ib_get_cached_pkey - Returns a cached PKey table entry
74 * @device: The device to query. 106 * @device: The device to query.
75 * @port_num: The port number of the device to query. 107 * @port_num: The port number of the device to query.
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index 709a5331e6b9..e99d8f9a4551 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -76,7 +76,7 @@ enum {
76 IB_OPCODE_UC = 0x20, 76 IB_OPCODE_UC = 0x20,
77 IB_OPCODE_RD = 0x40, 77 IB_OPCODE_RD = 0x40,
78 IB_OPCODE_UD = 0x60, 78 IB_OPCODE_UD = 0x60,
79 /* per IBTA 3.1 Table 38, A10.3.2 */ 79 /* per IBTA 1.3 vol 1 Table 38, A10.3.2 */
80 IB_OPCODE_CNP = 0x80, 80 IB_OPCODE_CNP = 0x80,
81 81
82 /* operations -- just used to define real constants */ 82 /* operations -- just used to define real constants */
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 7e071a6abb34..301969552d0a 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -39,6 +39,7 @@
39#include <linux/compiler.h> 39#include <linux/compiler.h>
40 40
41#include <linux/atomic.h> 41#include <linux/atomic.h>
42#include <linux/netdevice.h>
42 43
43#include <rdma/ib_verbs.h> 44#include <rdma/ib_verbs.h>
44#include <rdma/ib_mad.h> 45#include <rdma/ib_mad.h>
@@ -154,11 +155,18 @@ struct ib_sa_path_rec {
154 u8 packet_life_time_selector; 155 u8 packet_life_time_selector;
155 u8 packet_life_time; 156 u8 packet_life_time;
156 u8 preference; 157 u8 preference;
157 u8 smac[ETH_ALEN];
158 u8 dmac[ETH_ALEN]; 158 u8 dmac[ETH_ALEN];
159 u16 vlan_id; 159 /* ignored in IB */
160 int ifindex;
161 /* ignored in IB */
162 struct net *net;
160}; 163};
161 164
165static inline struct net_device *ib_get_ndev_from_path(struct ib_sa_path_rec *rec)
166{
167 return rec->net ? dev_get_by_index(rec->net, rec->ifindex) : NULL;
168}
169
162#define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0) 170#define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0)
163#define IB_SA_MCMEMBER_REC_PORT_GID IB_SA_COMP_MASK( 1) 171#define IB_SA_MCMEMBER_REC_PORT_GID IB_SA_COMP_MASK( 1)
164#define IB_SA_MCMEMBER_REC_QKEY IB_SA_COMP_MASK( 2) 172#define IB_SA_MCMEMBER_REC_QKEY IB_SA_COMP_MASK( 2)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 7845fae6f2df..9a68a19532ba 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -137,6 +137,8 @@ enum ib_device_cap_flags {
137 IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22), 137 IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
138 IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23), 138 IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23),
139 IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24), 139 IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24),
140 IB_DEVICE_RC_IP_CSUM = (1<<25),
141 IB_DEVICE_RAW_IP_CSUM = (1<<26),
140 IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29), 142 IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
141 IB_DEVICE_SIGNATURE_HANDOVER = (1<<30), 143 IB_DEVICE_SIGNATURE_HANDOVER = (1<<30),
142 IB_DEVICE_ON_DEMAND_PAGING = (1<<31), 144 IB_DEVICE_ON_DEMAND_PAGING = (1<<31),
@@ -474,7 +476,7 @@ enum ib_event_type {
474 IB_EVENT_GID_CHANGE, 476 IB_EVENT_GID_CHANGE,
475}; 477};
476 478
477__attribute_const__ const char *ib_event_msg(enum ib_event_type event); 479const char *__attribute_const__ ib_event_msg(enum ib_event_type event);
478 480
479struct ib_event { 481struct ib_event {
480 struct ib_device *device; 482 struct ib_device *device;
@@ -697,7 +699,6 @@ struct ib_ah_attr {
697 u8 ah_flags; 699 u8 ah_flags;
698 u8 port_num; 700 u8 port_num;
699 u8 dmac[ETH_ALEN]; 701 u8 dmac[ETH_ALEN];
700 u16 vlan_id;
701}; 702};
702 703
703enum ib_wc_status { 704enum ib_wc_status {
@@ -725,7 +726,7 @@ enum ib_wc_status {
725 IB_WC_GENERAL_ERR 726 IB_WC_GENERAL_ERR
726}; 727};
727 728
728__attribute_const__ const char *ib_wc_status_msg(enum ib_wc_status status); 729const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status);
729 730
730enum ib_wc_opcode { 731enum ib_wc_opcode {
731 IB_WC_SEND, 732 IB_WC_SEND,
@@ -736,7 +737,7 @@ enum ib_wc_opcode {
736 IB_WC_BIND_MW, 737 IB_WC_BIND_MW,
737 IB_WC_LSO, 738 IB_WC_LSO,
738 IB_WC_LOCAL_INV, 739 IB_WC_LOCAL_INV,
739 IB_WC_FAST_REG_MR, 740 IB_WC_REG_MR,
740 IB_WC_MASKED_COMP_SWAP, 741 IB_WC_MASKED_COMP_SWAP,
741 IB_WC_MASKED_FETCH_ADD, 742 IB_WC_MASKED_FETCH_ADD,
742/* 743/*
@@ -873,7 +874,6 @@ enum ib_qp_create_flags {
873 IB_QP_CREATE_RESERVED_END = 1 << 31, 874 IB_QP_CREATE_RESERVED_END = 1 << 31,
874}; 875};
875 876
876
877/* 877/*
878 * Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler 878 * Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler
879 * callback to destroy the passed in QP. 879 * callback to destroy the passed in QP.
@@ -957,10 +957,10 @@ enum ib_qp_attr_mask {
957 IB_QP_PATH_MIG_STATE = (1<<18), 957 IB_QP_PATH_MIG_STATE = (1<<18),
958 IB_QP_CAP = (1<<19), 958 IB_QP_CAP = (1<<19),
959 IB_QP_DEST_QPN = (1<<20), 959 IB_QP_DEST_QPN = (1<<20),
960 IB_QP_SMAC = (1<<21), 960 IB_QP_RESERVED1 = (1<<21),
961 IB_QP_ALT_SMAC = (1<<22), 961 IB_QP_RESERVED2 = (1<<22),
962 IB_QP_VID = (1<<23), 962 IB_QP_RESERVED3 = (1<<23),
963 IB_QP_ALT_VID = (1<<24), 963 IB_QP_RESERVED4 = (1<<24),
964}; 964};
965 965
966enum ib_qp_state { 966enum ib_qp_state {
@@ -1010,10 +1010,6 @@ struct ib_qp_attr {
1010 u8 rnr_retry; 1010 u8 rnr_retry;
1011 u8 alt_port_num; 1011 u8 alt_port_num;
1012 u8 alt_timeout; 1012 u8 alt_timeout;
1013 u8 smac[ETH_ALEN];
1014 u8 alt_smac[ETH_ALEN];
1015 u16 vlan_id;
1016 u16 alt_vlan_id;
1017}; 1013};
1018 1014
1019enum ib_wr_opcode { 1015enum ib_wr_opcode {
@@ -1028,7 +1024,7 @@ enum ib_wr_opcode {
1028 IB_WR_SEND_WITH_INV, 1024 IB_WR_SEND_WITH_INV,
1029 IB_WR_RDMA_READ_WITH_INV, 1025 IB_WR_RDMA_READ_WITH_INV,
1030 IB_WR_LOCAL_INV, 1026 IB_WR_LOCAL_INV,
1031 IB_WR_FAST_REG_MR, 1027 IB_WR_REG_MR,
1032 IB_WR_MASKED_ATOMIC_CMP_AND_SWP, 1028 IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
1033 IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, 1029 IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
1034 IB_WR_BIND_MW, 1030 IB_WR_BIND_MW,
@@ -1066,12 +1062,6 @@ struct ib_sge {
1066 u32 lkey; 1062 u32 lkey;
1067}; 1063};
1068 1064
1069struct ib_fast_reg_page_list {
1070 struct ib_device *device;
1071 u64 *page_list;
1072 unsigned int max_page_list_len;
1073};
1074
1075/** 1065/**
1076 * struct ib_mw_bind_info - Parameters for a memory window bind operation. 1066 * struct ib_mw_bind_info - Parameters for a memory window bind operation.
1077 * @mr: A memory region to bind the memory window to. 1067 * @mr: A memory region to bind the memory window to.
@@ -1100,54 +1090,89 @@ struct ib_send_wr {
1100 __be32 imm_data; 1090 __be32 imm_data;
1101 u32 invalidate_rkey; 1091 u32 invalidate_rkey;
1102 } ex; 1092 } ex;
1103 union {
1104 struct {
1105 u64 remote_addr;
1106 u32 rkey;
1107 } rdma;
1108 struct {
1109 u64 remote_addr;
1110 u64 compare_add;
1111 u64 swap;
1112 u64 compare_add_mask;
1113 u64 swap_mask;
1114 u32 rkey;
1115 } atomic;
1116 struct {
1117 struct ib_ah *ah;
1118 void *header;
1119 int hlen;
1120 int mss;
1121 u32 remote_qpn;
1122 u32 remote_qkey;
1123 u16 pkey_index; /* valid for GSI only */
1124 u8 port_num; /* valid for DR SMPs on switch only */
1125 } ud;
1126 struct {
1127 u64 iova_start;
1128 struct ib_fast_reg_page_list *page_list;
1129 unsigned int page_shift;
1130 unsigned int page_list_len;
1131 u32 length;
1132 int access_flags;
1133 u32 rkey;
1134 } fast_reg;
1135 struct {
1136 struct ib_mw *mw;
1137 /* The new rkey for the memory window. */
1138 u32 rkey;
1139 struct ib_mw_bind_info bind_info;
1140 } bind_mw;
1141 struct {
1142 struct ib_sig_attrs *sig_attrs;
1143 struct ib_mr *sig_mr;
1144 int access_flags;
1145 struct ib_sge *prot;
1146 } sig_handover;
1147 } wr;
1148 u32 xrc_remote_srq_num; /* XRC TGT QPs only */
1149}; 1093};
1150 1094
1095struct ib_rdma_wr {
1096 struct ib_send_wr wr;
1097 u64 remote_addr;
1098 u32 rkey;
1099};
1100
1101static inline struct ib_rdma_wr *rdma_wr(struct ib_send_wr *wr)
1102{
1103 return container_of(wr, struct ib_rdma_wr, wr);
1104}
1105
1106struct ib_atomic_wr {
1107 struct ib_send_wr wr;
1108 u64 remote_addr;
1109 u64 compare_add;
1110 u64 swap;
1111 u64 compare_add_mask;
1112 u64 swap_mask;
1113 u32 rkey;
1114};
1115
1116static inline struct ib_atomic_wr *atomic_wr(struct ib_send_wr *wr)
1117{
1118 return container_of(wr, struct ib_atomic_wr, wr);
1119}
1120
1121struct ib_ud_wr {
1122 struct ib_send_wr wr;
1123 struct ib_ah *ah;
1124 void *header;
1125 int hlen;
1126 int mss;
1127 u32 remote_qpn;
1128 u32 remote_qkey;
1129 u16 pkey_index; /* valid for GSI only */
1130 u8 port_num; /* valid for DR SMPs on switch only */
1131};
1132
1133static inline struct ib_ud_wr *ud_wr(struct ib_send_wr *wr)
1134{
1135 return container_of(wr, struct ib_ud_wr, wr);
1136}
1137
1138struct ib_reg_wr {
1139 struct ib_send_wr wr;
1140 struct ib_mr *mr;
1141 u32 key;
1142 int access;
1143};
1144
1145static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr)
1146{
1147 return container_of(wr, struct ib_reg_wr, wr);
1148}
1149
1150struct ib_bind_mw_wr {
1151 struct ib_send_wr wr;
1152 struct ib_mw *mw;
1153 /* The new rkey for the memory window. */
1154 u32 rkey;
1155 struct ib_mw_bind_info bind_info;
1156};
1157
1158static inline struct ib_bind_mw_wr *bind_mw_wr(struct ib_send_wr *wr)
1159{
1160 return container_of(wr, struct ib_bind_mw_wr, wr);
1161}
1162
1163struct ib_sig_handover_wr {
1164 struct ib_send_wr wr;
1165 struct ib_sig_attrs *sig_attrs;
1166 struct ib_mr *sig_mr;
1167 int access_flags;
1168 struct ib_sge *prot;
1169};
1170
1171static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr)
1172{
1173 return container_of(wr, struct ib_sig_handover_wr, wr);
1174}
1175
1151struct ib_recv_wr { 1176struct ib_recv_wr {
1152 struct ib_recv_wr *next; 1177 struct ib_recv_wr *next;
1153 u64 wr_id; 1178 u64 wr_id;
@@ -1334,6 +1359,9 @@ struct ib_mr {
1334 struct ib_uobject *uobject; 1359 struct ib_uobject *uobject;
1335 u32 lkey; 1360 u32 lkey;
1336 u32 rkey; 1361 u32 rkey;
1362 u64 iova;
1363 u32 length;
1364 unsigned int page_size;
1337 atomic_t usecnt; /* count number of MWs */ 1365 atomic_t usecnt; /* count number of MWs */
1338}; 1366};
1339 1367
@@ -1718,9 +1746,9 @@ struct ib_device {
1718 struct ib_mr * (*alloc_mr)(struct ib_pd *pd, 1746 struct ib_mr * (*alloc_mr)(struct ib_pd *pd,
1719 enum ib_mr_type mr_type, 1747 enum ib_mr_type mr_type,
1720 u32 max_num_sg); 1748 u32 max_num_sg);
1721 struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device, 1749 int (*map_mr_sg)(struct ib_mr *mr,
1722 int page_list_len); 1750 struct scatterlist *sg,
1723 void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list); 1751 int sg_nents);
1724 int (*rereg_phys_mr)(struct ib_mr *mr, 1752 int (*rereg_phys_mr)(struct ib_mr *mr,
1725 int mr_rereg_mask, 1753 int mr_rereg_mask,
1726 struct ib_pd *pd, 1754 struct ib_pd *pd,
@@ -2176,7 +2204,8 @@ static inline bool rdma_cap_roce_gid_table(const struct ib_device *device,
2176} 2204}
2177 2205
2178int ib_query_gid(struct ib_device *device, 2206int ib_query_gid(struct ib_device *device,
2179 u8 port_num, int index, union ib_gid *gid); 2207 u8 port_num, int index, union ib_gid *gid,
2208 struct ib_gid_attr *attr);
2180 2209
2181int ib_query_pkey(struct ib_device *device, 2210int ib_query_pkey(struct ib_device *device,
2182 u8 port_num, u16 index, u16 *pkey); 2211 u8 port_num, u16 index, u16 *pkey);
@@ -2190,7 +2219,7 @@ int ib_modify_port(struct ib_device *device,
2190 struct ib_port_modify *port_modify); 2219 struct ib_port_modify *port_modify);
2191 2220
2192int ib_find_gid(struct ib_device *device, union ib_gid *gid, 2221int ib_find_gid(struct ib_device *device, union ib_gid *gid,
2193 u8 *port_num, u16 *index); 2222 struct net_device *ndev, u8 *port_num, u16 *index);
2194 2223
2195int ib_find_pkey(struct ib_device *device, 2224int ib_find_pkey(struct ib_device *device,
2196 u8 port_num, u16 pkey, u16 *index); 2225 u8 port_num, u16 pkey, u16 *index);
@@ -2829,33 +2858,6 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
2829 u32 max_num_sg); 2858 u32 max_num_sg);
2830 2859
2831/** 2860/**
2832 * ib_alloc_fast_reg_page_list - Allocates a page list array
2833 * @device - ib device pointer.
2834 * @page_list_len - size of the page list array to be allocated.
2835 *
2836 * This allocates and returns a struct ib_fast_reg_page_list * and a
2837 * page_list array that is at least page_list_len in size. The actual
2838 * size is returned in max_page_list_len. The caller is responsible
2839 * for initializing the contents of the page_list array before posting
2840 * a send work request with the IB_WC_FAST_REG_MR opcode.
2841 *
2842 * The page_list array entries must be translated using one of the
2843 * ib_dma_*() functions just like the addresses passed to
2844 * ib_map_phys_fmr(). Once the ib_post_send() is issued, the struct
2845 * ib_fast_reg_page_list must not be modified by the caller until the
2846 * IB_WC_FAST_REG_MR work request completes.
2847 */
2848struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(
2849 struct ib_device *device, int page_list_len);
2850
2851/**
2852 * ib_free_fast_reg_page_list - Deallocates a previously allocated
2853 * page list array.
2854 * @page_list - struct ib_fast_reg_page_list pointer to be deallocated.
2855 */
2856void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
2857
2858/**
2859 * ib_update_fast_reg_key - updates the key portion of the fast_reg MR 2861 * ib_update_fast_reg_key - updates the key portion of the fast_reg MR
2860 * R_Key and L_Key. 2862 * R_Key and L_Key.
2861 * @mr - struct ib_mr pointer to be updated. 2863 * @mr - struct ib_mr pointer to be updated.
@@ -3023,4 +3025,28 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
3023 u16 pkey, const union ib_gid *gid, 3025 u16 pkey, const union ib_gid *gid,
3024 const struct sockaddr *addr); 3026 const struct sockaddr *addr);
3025 3027
3028int ib_map_mr_sg(struct ib_mr *mr,
3029 struct scatterlist *sg,
3030 int sg_nents,
3031 unsigned int page_size);
3032
3033static inline int
3034ib_map_mr_sg_zbva(struct ib_mr *mr,
3035 struct scatterlist *sg,
3036 int sg_nents,
3037 unsigned int page_size)
3038{
3039 int n;
3040
3041 n = ib_map_mr_sg(mr, sg, sg_nents, page_size);
3042 mr->iova = 0;
3043
3044 return n;
3045}
3046
3047int ib_sg_to_pages(struct ib_mr *mr,
3048 struct scatterlist *sgl,
3049 int sg_nents,
3050 int (*set_page)(struct ib_mr *, u64));
3051
3026#endif /* IB_VERBS_H */ 3052#endif /* IB_VERBS_H */
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index c92522c192d2..afe44fde72a5 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -62,7 +62,7 @@ enum rdma_cm_event_type {
62 RDMA_CM_EVENT_TIMEWAIT_EXIT 62 RDMA_CM_EVENT_TIMEWAIT_EXIT
63}; 63};
64 64
65__attribute_const__ const char *rdma_event_msg(enum rdma_cm_event_type event); 65const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event);
66 66
67enum rdma_port_space { 67enum rdma_port_space {
68 RDMA_PS_SDP = 0x0001, 68 RDMA_PS_SDP = 0x0001,
@@ -160,13 +160,17 @@ struct rdma_cm_id {
160/** 160/**
161 * rdma_create_id - Create an RDMA identifier. 161 * rdma_create_id - Create an RDMA identifier.
162 * 162 *
163 * @net: The network namespace in which to create the new id.
163 * @event_handler: User callback invoked to report events associated with the 164 * @event_handler: User callback invoked to report events associated with the
164 * returned rdma_id. 165 * returned rdma_id.
165 * @context: User specified context associated with the id. 166 * @context: User specified context associated with the id.
166 * @ps: RDMA port space. 167 * @ps: RDMA port space.
167 * @qp_type: type of queue pair associated with the id. 168 * @qp_type: type of queue pair associated with the id.
169 *
170 * The id holds a reference on the network namespace until it is destroyed.
168 */ 171 */
169struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, 172struct rdma_cm_id *rdma_create_id(struct net *net,
173 rdma_cm_event_handler event_handler,
170 void *context, enum rdma_port_space ps, 174 void *context, enum rdma_port_space ps,
171 enum ib_qp_type qp_type); 175 enum ib_qp_type qp_type);
172 176
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 978841eeaff1..8126c143a519 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -92,6 +92,7 @@ enum {
92enum { 92enum {
93 IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE, 93 IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE,
94 IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ, 94 IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ,
95 IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP,
95 IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, 96 IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
96 IB_USER_VERBS_EX_CMD_DESTROY_FLOW, 97 IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
97}; 98};
@@ -516,6 +517,25 @@ struct ib_uverbs_create_qp {
516 __u64 driver_data[0]; 517 __u64 driver_data[0];
517}; 518};
518 519
520struct ib_uverbs_ex_create_qp {
521 __u64 user_handle;
522 __u32 pd_handle;
523 __u32 send_cq_handle;
524 __u32 recv_cq_handle;
525 __u32 srq_handle;
526 __u32 max_send_wr;
527 __u32 max_recv_wr;
528 __u32 max_send_sge;
529 __u32 max_recv_sge;
530 __u32 max_inline_data;
531 __u8 sq_sig_all;
532 __u8 qp_type;
533 __u8 is_srq;
534 __u8 reserved;
535 __u32 comp_mask;
536 __u32 create_flags;
537};
538
519struct ib_uverbs_open_qp { 539struct ib_uverbs_open_qp {
520 __u64 response; 540 __u64 response;
521 __u64 user_handle; 541 __u64 user_handle;
@@ -538,6 +558,12 @@ struct ib_uverbs_create_qp_resp {
538 __u32 reserved; 558 __u32 reserved;
539}; 559};
540 560
561struct ib_uverbs_ex_create_qp_resp {
562 struct ib_uverbs_create_qp_resp base;
563 __u32 comp_mask;
564 __u32 response_length;
565};
566
541/* 567/*
542 * This struct needs to remain a multiple of 8 bytes to keep the 568 * This struct needs to remain a multiple of 8 bytes to keep the
543 * alignment of the modify QP parameters. 569 * alignment of the modify QP parameters.
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index ba1210253f5e..52b4a2f993f2 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -655,8 +655,8 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
655 return -ENOMEM; 655 return -ENOMEM;
656 656
657 /* Create the RDMA CM ID */ 657 /* Create the RDMA CM ID */
658 rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP, 658 rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client,
659 IB_QPT_RC); 659 RDMA_PS_TCP, IB_QPT_RC);
660 if (IS_ERR(rdma->cm_id)) 660 if (IS_ERR(rdma->cm_id))
661 goto error; 661 goto error;
662 662
diff --git a/net/rds/ib.c b/net/rds/ib.c
index a833ab7898fe..f222885ac0c7 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -336,7 +336,7 @@ static int rds_ib_laddr_check(struct net *net, __be32 addr)
336 /* Create a CMA ID and try to bind it. This catches both 336 /* Create a CMA ID and try to bind it. This catches both
337 * IB and iWARP capable NICs. 337 * IB and iWARP capable NICs.
338 */ 338 */
339 cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); 339 cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
340 if (IS_ERR(cm_id)) 340 if (IS_ERR(cm_id))
341 return PTR_ERR(cm_id); 341 return PTR_ERR(cm_id);
342 342
diff --git a/net/rds/ib.h b/net/rds/ib.h
index f17d09567890..b3fdebb57460 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -75,7 +75,11 @@ struct rds_ib_connect_private {
75 75
76struct rds_ib_send_work { 76struct rds_ib_send_work {
77 void *s_op; 77 void *s_op;
78 struct ib_send_wr s_wr; 78 union {
79 struct ib_send_wr s_wr;
80 struct ib_rdma_wr s_rdma_wr;
81 struct ib_atomic_wr s_atomic_wr;
82 };
79 struct ib_sge s_sge[RDS_IB_MAX_SGE]; 83 struct ib_sge s_sge[RDS_IB_MAX_SGE];
80 unsigned long s_queued; 84 unsigned long s_queued;
81}; 85};
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 2b2370e7f356..da5a7fb98c77 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -668,7 +668,7 @@ int rds_ib_conn_connect(struct rds_connection *conn)
668 668
669 /* XXX I wonder what affect the port space has */ 669 /* XXX I wonder what affect the port space has */
670 /* delegate cm event handler to rdma_transport */ 670 /* delegate cm event handler to rdma_transport */
671 ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, 671 ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
672 RDMA_PS_TCP, IB_QPT_RC); 672 RDMA_PS_TCP, IB_QPT_RC);
673 if (IS_ERR(ic->i_cm_id)) { 673 if (IS_ERR(ic->i_cm_id)) {
674 ret = PTR_ERR(ic->i_cm_id); 674 ret = PTR_ERR(ic->i_cm_id);
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 670882c752e9..eac30bf486d7 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -777,23 +777,23 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
777 send->s_queued = jiffies; 777 send->s_queued = jiffies;
778 778
779 if (op->op_type == RDS_ATOMIC_TYPE_CSWP) { 779 if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
780 send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP; 780 send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
781 send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare; 781 send->s_atomic_wr.compare_add = op->op_m_cswp.compare;
782 send->s_wr.wr.atomic.swap = op->op_m_cswp.swap; 782 send->s_atomic_wr.swap = op->op_m_cswp.swap;
783 send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask; 783 send->s_atomic_wr.compare_add_mask = op->op_m_cswp.compare_mask;
784 send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask; 784 send->s_atomic_wr.swap_mask = op->op_m_cswp.swap_mask;
785 } else { /* FADD */ 785 } else { /* FADD */
786 send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD; 786 send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
787 send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add; 787 send->s_atomic_wr.compare_add = op->op_m_fadd.add;
788 send->s_wr.wr.atomic.swap = 0; 788 send->s_atomic_wr.swap = 0;
789 send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask; 789 send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask;
790 send->s_wr.wr.atomic.swap_mask = 0; 790 send->s_atomic_wr.swap_mask = 0;
791 } 791 }
792 nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify); 792 nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
793 send->s_wr.num_sge = 1; 793 send->s_atomic_wr.wr.num_sge = 1;
794 send->s_wr.next = NULL; 794 send->s_atomic_wr.wr.next = NULL;
795 send->s_wr.wr.atomic.remote_addr = op->op_remote_addr; 795 send->s_atomic_wr.remote_addr = op->op_remote_addr;
796 send->s_wr.wr.atomic.rkey = op->op_rkey; 796 send->s_atomic_wr.rkey = op->op_rkey;
797 send->s_op = op; 797 send->s_op = op;
798 rds_message_addref(container_of(send->s_op, struct rds_message, atomic)); 798 rds_message_addref(container_of(send->s_op, struct rds_message, atomic));
799 799
@@ -818,11 +818,11 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
818 if (nr_sig) 818 if (nr_sig)
819 atomic_add(nr_sig, &ic->i_signaled_sends); 819 atomic_add(nr_sig, &ic->i_signaled_sends);
820 820
821 failed_wr = &send->s_wr; 821 failed_wr = &send->s_atomic_wr.wr;
822 ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr); 822 ret = ib_post_send(ic->i_cm_id->qp, &send->s_atomic_wr.wr, &failed_wr);
823 rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic, 823 rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
824 send, &send->s_wr, ret, failed_wr); 824 send, &send->s_atomic_wr, ret, failed_wr);
825 BUG_ON(failed_wr != &send->s_wr); 825 BUG_ON(failed_wr != &send->s_atomic_wr.wr);
826 if (ret) { 826 if (ret) {
827 printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 " 827 printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
828 "returned %d\n", &conn->c_faddr, ret); 828 "returned %d\n", &conn->c_faddr, ret);
@@ -831,9 +831,9 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
831 goto out; 831 goto out;
832 } 832 }
833 833
834 if (unlikely(failed_wr != &send->s_wr)) { 834 if (unlikely(failed_wr != &send->s_atomic_wr.wr)) {
835 printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret); 835 printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
836 BUG_ON(failed_wr != &send->s_wr); 836 BUG_ON(failed_wr != &send->s_atomic_wr.wr);
837 } 837 }
838 838
839out: 839out:
@@ -904,22 +904,23 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
904 nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify); 904 nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
905 905
906 send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; 906 send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
907 send->s_wr.wr.rdma.remote_addr = remote_addr; 907 send->s_rdma_wr.remote_addr = remote_addr;
908 send->s_wr.wr.rdma.rkey = op->op_rkey; 908 send->s_rdma_wr.rkey = op->op_rkey;
909 909
910 if (num_sge > max_sge) { 910 if (num_sge > max_sge) {
911 send->s_wr.num_sge = max_sge; 911 send->s_rdma_wr.wr.num_sge = max_sge;
912 num_sge -= max_sge; 912 num_sge -= max_sge;
913 } else { 913 } else {
914 send->s_wr.num_sge = num_sge; 914 send->s_rdma_wr.wr.num_sge = num_sge;
915 } 915 }
916 916
917 send->s_wr.next = NULL; 917 send->s_rdma_wr.wr.next = NULL;
918 918
919 if (prev) 919 if (prev)
920 prev->s_wr.next = &send->s_wr; 920 prev->s_rdma_wr.wr.next = &send->s_rdma_wr.wr;
921 921
922 for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) { 922 for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
923 scat != &op->op_sg[op->op_count]; j++) {
923 len = ib_sg_dma_len(ic->i_cm_id->device, scat); 924 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
924 send->s_sge[j].addr = 925 send->s_sge[j].addr =
925 ib_sg_dma_address(ic->i_cm_id->device, scat); 926 ib_sg_dma_address(ic->i_cm_id->device, scat);
@@ -934,7 +935,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
934 } 935 }
935 936
936 rdsdebug("send %p wr %p num_sge %u next %p\n", send, 937 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
937 &send->s_wr, send->s_wr.num_sge, send->s_wr.next); 938 &send->s_rdma_wr.wr,
939 send->s_rdma_wr.wr.num_sge,
940 send->s_rdma_wr.wr.next);
938 941
939 prev = send; 942 prev = send;
940 if (++send == &ic->i_sends[ic->i_send_ring.w_nr]) 943 if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
@@ -955,11 +958,11 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
955 if (nr_sig) 958 if (nr_sig)
956 atomic_add(nr_sig, &ic->i_signaled_sends); 959 atomic_add(nr_sig, &ic->i_signaled_sends);
957 960
958 failed_wr = &first->s_wr; 961 failed_wr = &first->s_rdma_wr.wr;
959 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); 962 ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
960 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, 963 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
961 first, &first->s_wr, ret, failed_wr); 964 first, &first->s_rdma_wr.wr, ret, failed_wr);
962 BUG_ON(failed_wr != &first->s_wr); 965 BUG_ON(failed_wr != &first->s_rdma_wr.wr);
963 if (ret) { 966 if (ret) {
964 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 " 967 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
965 "returned %d\n", &conn->c_faddr, ret); 968 "returned %d\n", &conn->c_faddr, ret);
@@ -968,9 +971,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
968 goto out; 971 goto out;
969 } 972 }
970 973
971 if (unlikely(failed_wr != &first->s_wr)) { 974 if (unlikely(failed_wr != &first->s_rdma_wr.wr)) {
972 printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret); 975 printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
973 BUG_ON(failed_wr != &first->s_wr); 976 BUG_ON(failed_wr != &first->s_rdma_wr.wr);
974 } 977 }
975 978
976 979
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 3df0295c6659..576f1825fc55 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -223,7 +223,7 @@ static int rds_iw_laddr_check(struct net *net, __be32 addr)
223 /* Create a CMA ID and try to bind it. This catches both 223 /* Create a CMA ID and try to bind it. This catches both
224 * IB and iWARP capable NICs. 224 * IB and iWARP capable NICs.
225 */ 225 */
226 cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); 226 cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
227 if (IS_ERR(cm_id)) 227 if (IS_ERR(cm_id))
228 return PTR_ERR(cm_id); 228 return PTR_ERR(cm_id);
229 229
diff --git a/net/rds/iw.h b/net/rds/iw.h
index cbe6674e31ee..5af01d1758b3 100644
--- a/net/rds/iw.h
+++ b/net/rds/iw.h
@@ -74,10 +74,13 @@ struct rds_iw_send_work {
74 struct rm_rdma_op *s_op; 74 struct rm_rdma_op *s_op;
75 struct rds_iw_mapping *s_mapping; 75 struct rds_iw_mapping *s_mapping;
76 struct ib_mr *s_mr; 76 struct ib_mr *s_mr;
77 struct ib_fast_reg_page_list *s_page_list;
78 unsigned char s_remap_count; 77 unsigned char s_remap_count;
79 78
80 struct ib_send_wr s_wr; 79 union {
80 struct ib_send_wr s_send_wr;
81 struct ib_rdma_wr s_rdma_wr;
82 struct ib_reg_wr s_reg_wr;
83 };
81 struct ib_sge s_sge[RDS_IW_MAX_SGE]; 84 struct ib_sge s_sge[RDS_IW_MAX_SGE];
82 unsigned long s_queued; 85 unsigned long s_queued;
83}; 86};
@@ -195,7 +198,7 @@ struct rds_iw_device {
195 198
196/* Magic WR_ID for ACKs */ 199/* Magic WR_ID for ACKs */
197#define RDS_IW_ACK_WR_ID ((u64)0xffffffffffffffffULL) 200#define RDS_IW_ACK_WR_ID ((u64)0xffffffffffffffffULL)
198#define RDS_IW_FAST_REG_WR_ID ((u64)0xefefefefefefefefULL) 201#define RDS_IW_REG_WR_ID ((u64)0xefefefefefefefefULL)
199#define RDS_IW_LOCAL_INV_WR_ID ((u64)0xdfdfdfdfdfdfdfdfULL) 202#define RDS_IW_LOCAL_INV_WR_ID ((u64)0xdfdfdfdfdfdfdfdfULL)
200 203
201struct rds_iw_statistics { 204struct rds_iw_statistics {
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index a6553a6fb2bc..aea4c911bc76 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -524,7 +524,7 @@ int rds_iw_conn_connect(struct rds_connection *conn)
524 524
525 /* XXX I wonder what affect the port space has */ 525 /* XXX I wonder what affect the port space has */
526 /* delegate cm event handler to rdma_transport */ 526 /* delegate cm event handler to rdma_transport */
527 ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, 527 ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
528 RDMA_PS_TCP, IB_QPT_RC); 528 RDMA_PS_TCP, IB_QPT_RC);
529 if (IS_ERR(ic->i_cm_id)) { 529 if (IS_ERR(ic->i_cm_id)) {
530 ret = PTR_ERR(ic->i_cm_id); 530 ret = PTR_ERR(ic->i_cm_id);
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index d3d4454ffc84..b09a40c1adce 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -47,7 +47,6 @@ struct rds_iw_mr {
47 struct rdma_cm_id *cm_id; 47 struct rdma_cm_id *cm_id;
48 48
49 struct ib_mr *mr; 49 struct ib_mr *mr;
50 struct ib_fast_reg_page_list *page_list;
51 50
52 struct rds_iw_mapping mapping; 51 struct rds_iw_mapping mapping;
53 unsigned char remap_count; 52 unsigned char remap_count;
@@ -77,8 +76,8 @@ struct rds_iw_mr_pool {
77 76
78static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all); 77static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all);
79static void rds_iw_mr_pool_flush_worker(struct work_struct *work); 78static void rds_iw_mr_pool_flush_worker(struct work_struct *work);
80static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); 79static int rds_iw_init_reg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
81static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, 80static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
82 struct rds_iw_mr *ibmr, 81 struct rds_iw_mr *ibmr,
83 struct scatterlist *sg, unsigned int nents); 82 struct scatterlist *sg, unsigned int nents);
84static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); 83static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
@@ -258,19 +257,18 @@ static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg,
258 sg->bytes = 0; 257 sg->bytes = 0;
259} 258}
260 259
261static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev, 260static int rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
262 struct rds_iw_scatterlist *sg) 261 struct rds_iw_scatterlist *sg)
263{ 262{
264 struct ib_device *dev = rds_iwdev->dev; 263 struct ib_device *dev = rds_iwdev->dev;
265 u64 *dma_pages = NULL; 264 int i, ret;
266 int i, j, ret;
267 265
268 WARN_ON(sg->dma_len); 266 WARN_ON(sg->dma_len);
269 267
270 sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL); 268 sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
271 if (unlikely(!sg->dma_len)) { 269 if (unlikely(!sg->dma_len)) {
272 printk(KERN_WARNING "RDS/IW: dma_map_sg failed!\n"); 270 printk(KERN_WARNING "RDS/IW: dma_map_sg failed!\n");
273 return ERR_PTR(-EBUSY); 271 return -EBUSY;
274 } 272 }
275 273
276 sg->bytes = 0; 274 sg->bytes = 0;
@@ -303,31 +301,14 @@ static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
303 if (sg->dma_npages > fastreg_message_size) 301 if (sg->dma_npages > fastreg_message_size)
304 goto out_unmap; 302 goto out_unmap;
305 303
306 dma_pages = kmalloc(sizeof(u64) * sg->dma_npages, GFP_ATOMIC);
307 if (!dma_pages) {
308 ret = -ENOMEM;
309 goto out_unmap;
310 }
311 304
312 for (i = j = 0; i < sg->dma_len; ++i) {
313 unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]);
314 u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]);
315 u64 end_addr;
316 305
317 end_addr = dma_addr + dma_len; 306 return 0;
318 dma_addr &= ~PAGE_MASK;
319 for (; dma_addr < end_addr; dma_addr += PAGE_SIZE)
320 dma_pages[j++] = dma_addr;
321 BUG_ON(j > sg->dma_npages);
322 }
323
324 return dma_pages;
325 307
326out_unmap: 308out_unmap:
327 ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL); 309 ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
328 sg->dma_len = 0; 310 sg->dma_len = 0;
329 kfree(dma_pages); 311 return ret;
330 return ERR_PTR(ret);
331} 312}
332 313
333 314
@@ -440,7 +421,7 @@ static struct rds_iw_mr *rds_iw_alloc_mr(struct rds_iw_device *rds_iwdev)
440 INIT_LIST_HEAD(&ibmr->mapping.m_list); 421 INIT_LIST_HEAD(&ibmr->mapping.m_list);
441 ibmr->mapping.m_mr = ibmr; 422 ibmr->mapping.m_mr = ibmr;
442 423
443 err = rds_iw_init_fastreg(pool, ibmr); 424 err = rds_iw_init_reg(pool, ibmr);
444 if (err) 425 if (err)
445 goto out_no_cigar; 426 goto out_no_cigar;
446 427
@@ -620,7 +601,7 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
620 ibmr->cm_id = cm_id; 601 ibmr->cm_id = cm_id;
621 ibmr->device = rds_iwdev; 602 ibmr->device = rds_iwdev;
622 603
623 ret = rds_iw_map_fastreg(rds_iwdev->mr_pool, ibmr, sg, nents); 604 ret = rds_iw_map_reg(rds_iwdev->mr_pool, ibmr, sg, nents);
624 if (ret == 0) 605 if (ret == 0)
625 *key_ret = ibmr->mr->rkey; 606 *key_ret = ibmr->mr->rkey;
626 else 607 else
@@ -636,7 +617,7 @@ out:
636} 617}
637 618
638/* 619/*
639 * iWARP fastreg handling 620 * iWARP reg handling
640 * 621 *
641 * The life cycle of a fastreg registration is a bit different from 622 * The life cycle of a fastreg registration is a bit different from
642 * FMRs. 623 * FMRs.
@@ -648,7 +629,7 @@ out:
648 * This creates a bit of a problem for us, as we do not have the destination 629 * This creates a bit of a problem for us, as we do not have the destination
649 * IP in GET_MR, so the connection must be setup prior to the GET_MR call for 630 * IP in GET_MR, so the connection must be setup prior to the GET_MR call for
650 * RDMA to be correctly setup. If a fastreg request is present, rds_iw_xmit 631 * RDMA to be correctly setup. If a fastreg request is present, rds_iw_xmit
651 * will try to queue a LOCAL_INV (if needed) and a FAST_REG_MR work request 632 * will try to queue a LOCAL_INV (if needed) and a REG_MR work request
652 * before queuing the SEND. When completions for these arrive, they are 633 * before queuing the SEND. When completions for these arrive, they are
653 * dispatched to the MR has a bit set showing that RDMa can be performed. 634 * dispatched to the MR has a bit set showing that RDMa can be performed.
654 * 635 *
@@ -657,11 +638,10 @@ out:
657 * The expectation there is that this invalidation step includes ALL 638 * The expectation there is that this invalidation step includes ALL
658 * PREVIOUSLY FREED MRs. 639 * PREVIOUSLY FREED MRs.
659 */ 640 */
660static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, 641static int rds_iw_init_reg(struct rds_iw_mr_pool *pool,
661 struct rds_iw_mr *ibmr) 642 struct rds_iw_mr *ibmr)
662{ 643{
663 struct rds_iw_device *rds_iwdev = pool->device; 644 struct rds_iw_device *rds_iwdev = pool->device;
664 struct ib_fast_reg_page_list *page_list = NULL;
665 struct ib_mr *mr; 645 struct ib_mr *mr;
666 int err; 646 int err;
667 647
@@ -674,55 +654,44 @@ static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool,
674 return err; 654 return err;
675 } 655 }
676 656
677 /* FIXME - this is overkill, but mapping->m_sg.dma_len/mapping->m_sg.dma_npages
678 * is not filled in.
679 */
680 page_list = ib_alloc_fast_reg_page_list(rds_iwdev->dev, pool->max_message_size);
681 if (IS_ERR(page_list)) {
682 err = PTR_ERR(page_list);
683
684 printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed (err=%d)\n", err);
685 ib_dereg_mr(mr);
686 return err;
687 }
688
689 ibmr->page_list = page_list;
690 ibmr->mr = mr; 657 ibmr->mr = mr;
691 return 0; 658 return 0;
692} 659}
693 660
694static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping) 661static int rds_iw_rdma_reg_mr(struct rds_iw_mapping *mapping)
695{ 662{
696 struct rds_iw_mr *ibmr = mapping->m_mr; 663 struct rds_iw_mr *ibmr = mapping->m_mr;
697 struct ib_send_wr f_wr, *failed_wr; 664 struct rds_iw_scatterlist *m_sg = &mapping->m_sg;
698 int ret; 665 struct ib_reg_wr reg_wr;
666 struct ib_send_wr *failed_wr;
667 int ret, n;
668
669 n = ib_map_mr_sg_zbva(ibmr->mr, m_sg->list, m_sg->len, PAGE_SIZE);
670 if (unlikely(n != m_sg->len))
671 return n < 0 ? n : -EINVAL;
672
673 reg_wr.wr.next = NULL;
674 reg_wr.wr.opcode = IB_WR_REG_MR;
675 reg_wr.wr.wr_id = RDS_IW_REG_WR_ID;
676 reg_wr.wr.num_sge = 0;
677 reg_wr.mr = ibmr->mr;
678 reg_wr.key = mapping->m_rkey;
679 reg_wr.access = IB_ACCESS_LOCAL_WRITE |
680 IB_ACCESS_REMOTE_READ |
681 IB_ACCESS_REMOTE_WRITE;
699 682
700 /* 683 /*
701 * Perform a WR for the fast_reg_mr. Each individual page 684 * Perform a WR for the reg_mr. Each individual page
702 * in the sg list is added to the fast reg page list and placed 685 * in the sg list is added to the fast reg page list and placed
703 * inside the fast_reg_mr WR. The key used is a rolling 8bit 686 * inside the reg_mr WR. The key used is a rolling 8bit
704 * counter, which should guarantee uniqueness. 687 * counter, which should guarantee uniqueness.
705 */ 688 */
706 ib_update_fast_reg_key(ibmr->mr, ibmr->remap_count++); 689 ib_update_fast_reg_key(ibmr->mr, ibmr->remap_count++);
707 mapping->m_rkey = ibmr->mr->rkey; 690 mapping->m_rkey = ibmr->mr->rkey;
708 691
709 memset(&f_wr, 0, sizeof(f_wr)); 692 failed_wr = &reg_wr.wr;
710 f_wr.wr_id = RDS_IW_FAST_REG_WR_ID; 693 ret = ib_post_send(ibmr->cm_id->qp, &reg_wr.wr, &failed_wr);
711 f_wr.opcode = IB_WR_FAST_REG_MR; 694 BUG_ON(failed_wr != &reg_wr.wr);
712 f_wr.wr.fast_reg.length = mapping->m_sg.bytes;
713 f_wr.wr.fast_reg.rkey = mapping->m_rkey;
714 f_wr.wr.fast_reg.page_list = ibmr->page_list;
715 f_wr.wr.fast_reg.page_list_len = mapping->m_sg.dma_len;
716 f_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
717 f_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE |
718 IB_ACCESS_REMOTE_READ |
719 IB_ACCESS_REMOTE_WRITE;
720 f_wr.wr.fast_reg.iova_start = 0;
721 f_wr.send_flags = IB_SEND_SIGNALED;
722
723 failed_wr = &f_wr;
724 ret = ib_post_send(ibmr->cm_id->qp, &f_wr, &failed_wr);
725 BUG_ON(failed_wr != &f_wr);
726 if (ret) 695 if (ret)
727 printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", 696 printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n",
728 __func__, __LINE__, ret); 697 __func__, __LINE__, ret);
@@ -754,21 +723,20 @@ out:
754 return ret; 723 return ret;
755} 724}
756 725
757static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, 726static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
758 struct rds_iw_mr *ibmr, 727 struct rds_iw_mr *ibmr,
759 struct scatterlist *sg, 728 struct scatterlist *sg,
760 unsigned int sg_len) 729 unsigned int sg_len)
761{ 730{
762 struct rds_iw_device *rds_iwdev = pool->device; 731 struct rds_iw_device *rds_iwdev = pool->device;
763 struct rds_iw_mapping *mapping = &ibmr->mapping; 732 struct rds_iw_mapping *mapping = &ibmr->mapping;
764 u64 *dma_pages; 733 u64 *dma_pages;
765 int i, ret = 0; 734 int ret = 0;
766 735
767 rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len); 736 rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len);
768 737
769 dma_pages = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg); 738 ret = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg);
770 if (IS_ERR(dma_pages)) { 739 if (ret) {
771 ret = PTR_ERR(dma_pages);
772 dma_pages = NULL; 740 dma_pages = NULL;
773 goto out; 741 goto out;
774 } 742 }
@@ -778,10 +746,7 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
778 goto out; 746 goto out;
779 } 747 }
780 748
781 for (i = 0; i < mapping->m_sg.dma_npages; ++i) 749 ret = rds_iw_rdma_reg_mr(mapping);
782 ibmr->page_list->page_list[i] = dma_pages[i];
783
784 ret = rds_iw_rdma_build_fastreg(mapping);
785 if (ret) 750 if (ret)
786 goto out; 751 goto out;
787 752
@@ -867,8 +832,6 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
867static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, 832static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool,
868 struct rds_iw_mr *ibmr) 833 struct rds_iw_mr *ibmr)
869{ 834{
870 if (ibmr->page_list)
871 ib_free_fast_reg_page_list(ibmr->page_list);
872 if (ibmr->mr) 835 if (ibmr->mr)
873 ib_dereg_mr(ibmr->mr); 836 ib_dereg_mr(ibmr->mr);
874} 837}
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 86152ec3b887..e20bd503f4bd 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -137,13 +137,13 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic)
137 send->s_op = NULL; 137 send->s_op = NULL;
138 send->s_mapping = NULL; 138 send->s_mapping = NULL;
139 139
140 send->s_wr.next = NULL; 140 send->s_send_wr.next = NULL;
141 send->s_wr.wr_id = i; 141 send->s_send_wr.wr_id = i;
142 send->s_wr.sg_list = send->s_sge; 142 send->s_send_wr.sg_list = send->s_sge;
143 send->s_wr.num_sge = 1; 143 send->s_send_wr.num_sge = 1;
144 send->s_wr.opcode = IB_WR_SEND; 144 send->s_send_wr.opcode = IB_WR_SEND;
145 send->s_wr.send_flags = 0; 145 send->s_send_wr.send_flags = 0;
146 send->s_wr.ex.imm_data = 0; 146 send->s_send_wr.ex.imm_data = 0;
147 147
148 sge = rds_iw_data_sge(ic, send->s_sge); 148 sge = rds_iw_data_sge(ic, send->s_sge);
149 sge->lkey = 0; 149 sge->lkey = 0;
@@ -159,13 +159,6 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic)
159 printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed\n"); 159 printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed\n");
160 break; 160 break;
161 } 161 }
162
163 send->s_page_list = ib_alloc_fast_reg_page_list(
164 ic->i_cm_id->device, fastreg_message_size);
165 if (IS_ERR(send->s_page_list)) {
166 printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed\n");
167 break;
168 }
169 } 162 }
170} 163}
171 164
@@ -177,9 +170,7 @@ void rds_iw_send_clear_ring(struct rds_iw_connection *ic)
177 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { 170 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
178 BUG_ON(!send->s_mr); 171 BUG_ON(!send->s_mr);
179 ib_dereg_mr(send->s_mr); 172 ib_dereg_mr(send->s_mr);
180 BUG_ON(!send->s_page_list); 173 if (send->s_send_wr.opcode == 0xdead)
181 ib_free_fast_reg_page_list(send->s_page_list);
182 if (send->s_wr.opcode == 0xdead)
183 continue; 174 continue;
184 if (send->s_rm) 175 if (send->s_rm)
185 rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); 176 rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
@@ -227,7 +218,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
227 continue; 218 continue;
228 } 219 }
229 220
230 if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == RDS_IW_FAST_REG_WR_ID) { 221 if (wc.opcode == IB_WC_REG_MR && wc.wr_id == RDS_IW_REG_WR_ID) {
231 ic->i_fastreg_posted = 1; 222 ic->i_fastreg_posted = 1;
232 continue; 223 continue;
233 } 224 }
@@ -247,12 +238,12 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
247 send = &ic->i_sends[oldest]; 238 send = &ic->i_sends[oldest];
248 239
249 /* In the error case, wc.opcode sometimes contains garbage */ 240 /* In the error case, wc.opcode sometimes contains garbage */
250 switch (send->s_wr.opcode) { 241 switch (send->s_send_wr.opcode) {
251 case IB_WR_SEND: 242 case IB_WR_SEND:
252 if (send->s_rm) 243 if (send->s_rm)
253 rds_iw_send_unmap_rm(ic, send, wc.status); 244 rds_iw_send_unmap_rm(ic, send, wc.status);
254 break; 245 break;
255 case IB_WR_FAST_REG_MR: 246 case IB_WR_REG_MR:
256 case IB_WR_RDMA_WRITE: 247 case IB_WR_RDMA_WRITE:
257 case IB_WR_RDMA_READ: 248 case IB_WR_RDMA_READ:
258 case IB_WR_RDMA_READ_WITH_INV: 249 case IB_WR_RDMA_READ_WITH_INV:
@@ -262,12 +253,12 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
262 default: 253 default:
263 printk_ratelimited(KERN_NOTICE 254 printk_ratelimited(KERN_NOTICE
264 "RDS/IW: %s: unexpected opcode 0x%x in WR!\n", 255 "RDS/IW: %s: unexpected opcode 0x%x in WR!\n",
265 __func__, send->s_wr.opcode); 256 __func__, send->s_send_wr.opcode);
266 break; 257 break;
267 } 258 }
268 259
269 send->s_wr.opcode = 0xdead; 260 send->s_send_wr.opcode = 0xdead;
270 send->s_wr.num_sge = 1; 261 send->s_send_wr.num_sge = 1;
271 if (time_after(jiffies, send->s_queued + HZ/2)) 262 if (time_after(jiffies, send->s_queued + HZ/2))
272 rds_iw_stats_inc(s_iw_tx_stalled); 263 rds_iw_stats_inc(s_iw_tx_stalled);
273 264
@@ -455,10 +446,10 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
455 446
456 WARN_ON(pos != send - ic->i_sends); 447 WARN_ON(pos != send - ic->i_sends);
457 448
458 send->s_wr.send_flags = send_flags; 449 send->s_send_wr.send_flags = send_flags;
459 send->s_wr.opcode = IB_WR_SEND; 450 send->s_send_wr.opcode = IB_WR_SEND;
460 send->s_wr.num_sge = 2; 451 send->s_send_wr.num_sge = 2;
461 send->s_wr.next = NULL; 452 send->s_send_wr.next = NULL;
462 send->s_queued = jiffies; 453 send->s_queued = jiffies;
463 send->s_op = NULL; 454 send->s_op = NULL;
464 455
@@ -472,7 +463,7 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
472 } else { 463 } else {
473 /* We're sending a packet with no payload. There is only 464 /* We're sending a packet with no payload. There is only
474 * one SGE */ 465 * one SGE */
475 send->s_wr.num_sge = 1; 466 send->s_send_wr.num_sge = 1;
476 sge = &send->s_sge[0]; 467 sge = &send->s_sge[0];
477 } 468 }
478 469
@@ -672,23 +663,23 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
672 */ 663 */
673 if (ic->i_unsignaled_wrs-- == 0) { 664 if (ic->i_unsignaled_wrs-- == 0) {
674 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; 665 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
675 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 666 send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
676 } 667 }
677 668
678 ic->i_unsignaled_bytes -= len; 669 ic->i_unsignaled_bytes -= len;
679 if (ic->i_unsignaled_bytes <= 0) { 670 if (ic->i_unsignaled_bytes <= 0) {
680 ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes; 671 ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
681 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 672 send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
682 } 673 }
683 674
684 /* 675 /*
685 * Always signal the last one if we're stopping due to flow control. 676 * Always signal the last one if we're stopping due to flow control.
686 */ 677 */
687 if (flow_controlled && i == (work_alloc-1)) 678 if (flow_controlled && i == (work_alloc-1))
688 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 679 send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
689 680
690 rdsdebug("send %p wr %p num_sge %u next %p\n", send, 681 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
691 &send->s_wr, send->s_wr.num_sge, send->s_wr.next); 682 &send->s_send_wr, send->s_send_wr.num_sge, send->s_send_wr.next);
692 683
693 sent += len; 684 sent += len;
694 rm->data.op_dmaoff += len; 685 rm->data.op_dmaoff += len;
@@ -722,7 +713,7 @@ add_header:
722 } 713 }
723 714
724 if (prev) 715 if (prev)
725 prev->s_wr.next = &send->s_wr; 716 prev->s_send_wr.next = &send->s_send_wr;
726 prev = send; 717 prev = send;
727 718
728 pos = (pos + 1) % ic->i_send_ring.w_nr; 719 pos = (pos + 1) % ic->i_send_ring.w_nr;
@@ -736,7 +727,7 @@ add_header:
736 /* if we finished the message then send completion owns it */ 727 /* if we finished the message then send completion owns it */
737 if (scat == &rm->data.op_sg[rm->data.op_count]) { 728 if (scat == &rm->data.op_sg[rm->data.op_count]) {
738 prev->s_rm = ic->i_rm; 729 prev->s_rm = ic->i_rm;
739 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 730 prev->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
740 ic->i_rm = NULL; 731 ic->i_rm = NULL;
741 } 732 }
742 733
@@ -748,11 +739,11 @@ add_header:
748 rds_iw_send_add_credits(conn, credit_alloc - i); 739 rds_iw_send_add_credits(conn, credit_alloc - i);
749 740
750 /* XXX need to worry about failed_wr and partial sends. */ 741 /* XXX need to worry about failed_wr and partial sends. */
751 failed_wr = &first->s_wr; 742 failed_wr = &first->s_send_wr;
752 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); 743 ret = ib_post_send(ic->i_cm_id->qp, &first->s_send_wr, &failed_wr);
753 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, 744 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
754 first, &first->s_wr, ret, failed_wr); 745 first, &first->s_send_wr, ret, failed_wr);
755 BUG_ON(failed_wr != &first->s_wr); 746 BUG_ON(failed_wr != &first->s_send_wr);
756 if (ret) { 747 if (ret) {
757 printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 " 748 printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 "
758 "returned %d\n", &conn->c_faddr, ret); 749 "returned %d\n", &conn->c_faddr, ret);
@@ -770,24 +761,26 @@ out:
770 return ret; 761 return ret;
771} 762}
772 763
773static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rds_iw_connection *ic, struct rds_iw_send_work *send, int nent, int len, u64 sg_addr) 764static int rds_iw_build_send_reg(struct rds_iw_send_work *send,
765 struct scatterlist *sg,
766 int sg_nents)
774{ 767{
775 BUG_ON(nent > send->s_page_list->max_page_list_len); 768 int n;
776 /* 769
777 * Perform a WR for the fast_reg_mr. Each individual page 770 n = ib_map_mr_sg(send->s_mr, sg, sg_nents, PAGE_SIZE);
778 * in the sg list is added to the fast reg page list and placed 771 if (unlikely(n != sg_nents))
779 * inside the fast_reg_mr WR. 772 return n < 0 ? n : -EINVAL;
780 */ 773
781 send->s_wr.opcode = IB_WR_FAST_REG_MR; 774 send->s_reg_wr.wr.opcode = IB_WR_REG_MR;
782 send->s_wr.wr.fast_reg.length = len; 775 send->s_reg_wr.wr.wr_id = 0;
783 send->s_wr.wr.fast_reg.rkey = send->s_mr->rkey; 776 send->s_reg_wr.wr.num_sge = 0;
784 send->s_wr.wr.fast_reg.page_list = send->s_page_list; 777 send->s_reg_wr.mr = send->s_mr;
785 send->s_wr.wr.fast_reg.page_list_len = nent; 778 send->s_reg_wr.key = send->s_mr->rkey;
786 send->s_wr.wr.fast_reg.page_shift = PAGE_SHIFT; 779 send->s_reg_wr.access = IB_ACCESS_REMOTE_WRITE;
787 send->s_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE;
788 send->s_wr.wr.fast_reg.iova_start = sg_addr;
789 780
790 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++); 781 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
782
783 return 0;
791} 784}
792 785
793int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) 786int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
@@ -808,6 +801,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
808 int sent; 801 int sent;
809 int ret; 802 int ret;
810 int num_sge; 803 int num_sge;
804 int sg_nents;
811 805
812 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); 806 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
813 807
@@ -861,9 +855,10 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
861 scat = &op->op_sg[0]; 855 scat = &op->op_sg[0];
862 sent = 0; 856 sent = 0;
863 num_sge = op->op_count; 857 num_sge = op->op_count;
858 sg_nents = 0;
864 859
865 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) { 860 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
866 send->s_wr.send_flags = 0; 861 send->s_rdma_wr.wr.send_flags = 0;
867 send->s_queued = jiffies; 862 send->s_queued = jiffies;
868 863
869 /* 864 /*
@@ -872,7 +867,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
872 */ 867 */
873 if (ic->i_unsignaled_wrs-- == 0) { 868 if (ic->i_unsignaled_wrs-- == 0) {
874 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; 869 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
875 send->s_wr.send_flags = IB_SEND_SIGNALED; 870 send->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
876 } 871 }
877 872
878 /* To avoid the need to have the plumbing to invalidate the fastreg_mr used 873 /* To avoid the need to have the plumbing to invalidate the fastreg_mr used
@@ -880,30 +875,31 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
880 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed. 875 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed.
881 */ 876 */
882 if (op->op_write) 877 if (op->op_write)
883 send->s_wr.opcode = IB_WR_RDMA_WRITE; 878 send->s_rdma_wr.wr.opcode = IB_WR_RDMA_WRITE;
884 else 879 else
885 send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV; 880 send->s_rdma_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
886 881
887 send->s_wr.wr.rdma.remote_addr = remote_addr; 882 send->s_rdma_wr.remote_addr = remote_addr;
888 send->s_wr.wr.rdma.rkey = op->op_rkey; 883 send->s_rdma_wr.rkey = op->op_rkey;
889 send->s_op = op; 884 send->s_op = op;
890 885
891 if (num_sge > rds_iwdev->max_sge) { 886 if (num_sge > rds_iwdev->max_sge) {
892 send->s_wr.num_sge = rds_iwdev->max_sge; 887 send->s_rdma_wr.wr.num_sge = rds_iwdev->max_sge;
893 num_sge -= rds_iwdev->max_sge; 888 num_sge -= rds_iwdev->max_sge;
894 } else 889 } else
895 send->s_wr.num_sge = num_sge; 890 send->s_rdma_wr.wr.num_sge = num_sge;
896 891
897 send->s_wr.next = NULL; 892 send->s_rdma_wr.wr.next = NULL;
898 893
899 if (prev) 894 if (prev)
900 prev->s_wr.next = &send->s_wr; 895 prev->s_send_wr.next = &send->s_rdma_wr.wr;
901 896
902 for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) { 897 for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
898 scat != &op->op_sg[op->op_count]; j++) {
903 len = ib_sg_dma_len(ic->i_cm_id->device, scat); 899 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
904 900
905 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) 901 if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV)
906 send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat); 902 sg_nents++;
907 else { 903 else {
908 send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat); 904 send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat);
909 send->s_sge[j].length = len; 905 send->s_sge[j].length = len;
@@ -917,15 +913,17 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
917 scat++; 913 scat++;
918 } 914 }
919 915
920 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) { 916 if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
921 send->s_wr.num_sge = 1; 917 send->s_rdma_wr.wr.num_sge = 1;
922 send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr; 918 send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr;
923 send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes; 919 send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes;
924 send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey; 920 send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey;
925 } 921 }
926 922
927 rdsdebug("send %p wr %p num_sge %u next %p\n", send, 923 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
928 &send->s_wr, send->s_wr.num_sge, send->s_wr.next); 924 &send->s_rdma_wr,
925 send->s_rdma_wr.wr.num_sge,
926 send->s_rdma_wr.wr.next);
929 927
930 prev = send; 928 prev = send;
931 if (++send == &ic->i_sends[ic->i_send_ring.w_nr]) 929 if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
@@ -934,7 +932,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
934 932
935 /* if we finished the message then send completion owns it */ 933 /* if we finished the message then send completion owns it */
936 if (scat == &op->op_sg[op->op_count]) 934 if (scat == &op->op_sg[op->op_count])
937 first->s_wr.send_flags = IB_SEND_SIGNALED; 935 first->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
938 936
939 if (i < work_alloc) { 937 if (i < work_alloc) {
940 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i); 938 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
@@ -948,16 +946,20 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
948 * fastreg_mr (or possibly a dma_mr) 946 * fastreg_mr (or possibly a dma_mr)
949 */ 947 */
950 if (!op->op_write) { 948 if (!op->op_write) {
951 rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos], 949 ret = rds_iw_build_send_reg(&ic->i_sends[fr_pos],
952 op->op_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr); 950 &op->op_sg[0], sg_nents);
951 if (ret) {
952 printk(KERN_WARNING "RDS/IW: failed to reg send mem\n");
953 goto out;
954 }
953 work_alloc++; 955 work_alloc++;
954 } 956 }
955 957
956 failed_wr = &first->s_wr; 958 failed_wr = &first->s_rdma_wr.wr;
957 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); 959 ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
958 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, 960 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
959 first, &first->s_wr, ret, failed_wr); 961 first, &first->s_rdma_wr, ret, failed_wr);
960 BUG_ON(failed_wr != &first->s_wr); 962 BUG_ON(failed_wr != &first->s_rdma_wr.wr);
961 if (ret) { 963 if (ret) {
962 printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 " 964 printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 "
963 "returned %d\n", &conn->c_faddr, ret); 965 "returned %d\n", &conn->c_faddr, ret);
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index b9b40af5345b..9c1fed81bf0f 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -142,8 +142,8 @@ static int rds_rdma_listen_init(void)
142 struct rdma_cm_id *cm_id; 142 struct rdma_cm_id *cm_id;
143 int ret; 143 int ret;
144 144
145 cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP, 145 cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, NULL,
146 IB_QPT_RC); 146 RDMA_PS_TCP, IB_QPT_RC);
147 if (IS_ERR(cm_id)) { 147 if (IS_ERR(cm_id)) {
148 ret = PTR_ERR(cm_id); 148 ret = PTR_ERR(cm_id);
149 printk(KERN_ERR "RDS/RDMA: failed to setup listener, " 149 printk(KERN_ERR "RDS/RDMA: failed to setup listener, "
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 5318951b3b53..a1434447b0d6 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
151 f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); 151 f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
152 if (IS_ERR(f->fr_mr)) 152 if (IS_ERR(f->fr_mr))
153 goto out_mr_err; 153 goto out_mr_err;
154 f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); 154
155 if (IS_ERR(f->fr_pgl)) 155 f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL);
156 if (!f->sg)
156 goto out_list_err; 157 goto out_list_err;
158
159 sg_init_table(f->sg, depth);
160
157 return 0; 161 return 0;
158 162
159out_mr_err: 163out_mr_err:
@@ -163,9 +167,9 @@ out_mr_err:
163 return rc; 167 return rc;
164 168
165out_list_err: 169out_list_err:
166 rc = PTR_ERR(f->fr_pgl); 170 rc = -ENOMEM;
167 dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n", 171 dprintk("RPC: %s: sg allocation failure\n",
168 __func__, rc); 172 __func__);
169 ib_dereg_mr(f->fr_mr); 173 ib_dereg_mr(f->fr_mr);
170 return rc; 174 return rc;
171} 175}
@@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r)
179 if (rc) 183 if (rc)
180 dprintk("RPC: %s: ib_dereg_mr status %i\n", 184 dprintk("RPC: %s: ib_dereg_mr status %i\n",
181 __func__, rc); 185 __func__, rc);
182 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); 186 kfree(r->r.frmr.sg);
183} 187}
184 188
185static int 189static int
@@ -312,13 +316,10 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
312 struct rpcrdma_mw *mw; 316 struct rpcrdma_mw *mw;
313 struct rpcrdma_frmr *frmr; 317 struct rpcrdma_frmr *frmr;
314 struct ib_mr *mr; 318 struct ib_mr *mr;
315 struct ib_send_wr fastreg_wr, *bad_wr; 319 struct ib_reg_wr reg_wr;
320 struct ib_send_wr *bad_wr;
321 int rc, i, n, dma_nents;
316 u8 key; 322 u8 key;
317 int len, pageoff;
318 int i, rc;
319 int seg_len;
320 u64 pa;
321 int page_no;
322 323
323 mw = seg1->rl_mw; 324 mw = seg1->rl_mw;
324 seg1->rl_mw = NULL; 325 seg1->rl_mw = NULL;
@@ -331,64 +332,80 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
331 } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); 332 } while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
332 frmr = &mw->r.frmr; 333 frmr = &mw->r.frmr;
333 frmr->fr_state = FRMR_IS_VALID; 334 frmr->fr_state = FRMR_IS_VALID;
335 mr = frmr->fr_mr;
334 336
335 pageoff = offset_in_page(seg1->mr_offset);
336 seg1->mr_offset -= pageoff; /* start of page */
337 seg1->mr_len += pageoff;
338 len = -pageoff;
339 if (nsegs > ia->ri_max_frmr_depth) 337 if (nsegs > ia->ri_max_frmr_depth)
340 nsegs = ia->ri_max_frmr_depth; 338 nsegs = ia->ri_max_frmr_depth;
341 339
342 for (page_no = i = 0; i < nsegs;) { 340 for (i = 0; i < nsegs;) {
343 rpcrdma_map_one(device, seg, direction); 341 if (seg->mr_page)
344 pa = seg->mr_dma; 342 sg_set_page(&frmr->sg[i],
345 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { 343 seg->mr_page,
346 frmr->fr_pgl->page_list[page_no++] = pa; 344 seg->mr_len,
347 pa += PAGE_SIZE; 345 offset_in_page(seg->mr_offset));
348 } 346 else
349 len += seg->mr_len; 347 sg_set_buf(&frmr->sg[i], seg->mr_offset,
348 seg->mr_len);
349
350 ++seg; 350 ++seg;
351 ++i; 351 ++i;
352
352 /* Check for holes */ 353 /* Check for holes */
353 if ((i < nsegs && offset_in_page(seg->mr_offset)) || 354 if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
354 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) 355 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
355 break; 356 break;
356 } 357 }
357 dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", 358 frmr->sg_nents = i;
358 __func__, mw, i, len); 359
359 360 dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction);
360 memset(&fastreg_wr, 0, sizeof(fastreg_wr)); 361 if (!dma_nents) {
361 fastreg_wr.wr_id = (unsigned long)(void *)mw; 362 pr_err("RPC: %s: failed to dma map sg %p sg_nents %u\n",
362 fastreg_wr.opcode = IB_WR_FAST_REG_MR; 363 __func__, frmr->sg, frmr->sg_nents);
363 fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff; 364 return -ENOMEM;
364 fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl; 365 }
365 fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; 366
366 fastreg_wr.wr.fast_reg.page_list_len = page_no; 367 n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
367 fastreg_wr.wr.fast_reg.length = len; 368 if (unlikely(n != frmr->sg_nents)) {
368 fastreg_wr.wr.fast_reg.access_flags = writing ? 369 pr_err("RPC: %s: failed to map mr %p (%u/%u)\n",
369 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 370 __func__, frmr->fr_mr, n, frmr->sg_nents);
370 IB_ACCESS_REMOTE_READ; 371 rc = n < 0 ? n : -EINVAL;
371 mr = frmr->fr_mr; 372 goto out_senderr;
373 }
374
375 dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n",
376 __func__, mw, frmr->sg_nents, mr->length);
377
372 key = (u8)(mr->rkey & 0x000000FF); 378 key = (u8)(mr->rkey & 0x000000FF);
373 ib_update_fast_reg_key(mr, ++key); 379 ib_update_fast_reg_key(mr, ++key);
374 fastreg_wr.wr.fast_reg.rkey = mr->rkey; 380
381 reg_wr.wr.next = NULL;
382 reg_wr.wr.opcode = IB_WR_REG_MR;
383 reg_wr.wr.wr_id = (uintptr_t)mw;
384 reg_wr.wr.num_sge = 0;
385 reg_wr.wr.send_flags = 0;
386 reg_wr.mr = mr;
387 reg_wr.key = mr->rkey;
388 reg_wr.access = writing ?
389 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
390 IB_ACCESS_REMOTE_READ;
375 391
376 DECR_CQCOUNT(&r_xprt->rx_ep); 392 DECR_CQCOUNT(&r_xprt->rx_ep);
377 rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr); 393 rc = ib_post_send(ia->ri_id->qp, &reg_wr.wr, &bad_wr);
378 if (rc) 394 if (rc)
379 goto out_senderr; 395 goto out_senderr;
380 396
397 seg1->mr_dir = direction;
381 seg1->rl_mw = mw; 398 seg1->rl_mw = mw;
382 seg1->mr_rkey = mr->rkey; 399 seg1->mr_rkey = mr->rkey;
383 seg1->mr_base = seg1->mr_dma + pageoff; 400 seg1->mr_base = mr->iova;
384 seg1->mr_nsegs = i; 401 seg1->mr_nsegs = frmr->sg_nents;
385 seg1->mr_len = len; 402 seg1->mr_len = mr->length;
386 return i; 403
404 return frmr->sg_nents;
387 405
388out_senderr: 406out_senderr:
389 dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); 407 dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc);
390 while (i--) 408 ib_dma_unmap_sg(device, frmr->sg, dma_nents, direction);
391 rpcrdma_unmap_one(device, --seg);
392 __frwr_queue_recovery(mw); 409 __frwr_queue_recovery(mw);
393 return rc; 410 return rc;
394} 411}
@@ -402,22 +419,22 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
402 struct rpcrdma_mr_seg *seg1 = seg; 419 struct rpcrdma_mr_seg *seg1 = seg;
403 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 420 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
404 struct rpcrdma_mw *mw = seg1->rl_mw; 421 struct rpcrdma_mw *mw = seg1->rl_mw;
422 struct rpcrdma_frmr *frmr = &mw->r.frmr;
405 struct ib_send_wr invalidate_wr, *bad_wr; 423 struct ib_send_wr invalidate_wr, *bad_wr;
406 int rc, nsegs = seg->mr_nsegs; 424 int rc, nsegs = seg->mr_nsegs;
407 425
408 dprintk("RPC: %s: FRMR %p\n", __func__, mw); 426 dprintk("RPC: %s: FRMR %p\n", __func__, mw);
409 427
410 seg1->rl_mw = NULL; 428 seg1->rl_mw = NULL;
411 mw->r.frmr.fr_state = FRMR_IS_INVALID; 429 frmr->fr_state = FRMR_IS_INVALID;
412 430
413 memset(&invalidate_wr, 0, sizeof(invalidate_wr)); 431 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
414 invalidate_wr.wr_id = (unsigned long)(void *)mw; 432 invalidate_wr.wr_id = (unsigned long)(void *)mw;
415 invalidate_wr.opcode = IB_WR_LOCAL_INV; 433 invalidate_wr.opcode = IB_WR_LOCAL_INV;
416 invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey; 434 invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey;
417 DECR_CQCOUNT(&r_xprt->rx_ep); 435 DECR_CQCOUNT(&r_xprt->rx_ep);
418 436
419 while (seg1->mr_nsegs--) 437 ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir);
420 rpcrdma_unmap_one(ia->ri_device, seg++);
421 read_lock(&ia->ri_qplock); 438 read_lock(&ia->ri_qplock);
422 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); 439 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
423 read_unlock(&ia->ri_qplock); 440 read_unlock(&ia->ri_qplock);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index f0c3ff67ca98..ff4f01e527ec 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -126,7 +126,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
126 u64 rs_offset, 126 u64 rs_offset,
127 bool last) 127 bool last)
128{ 128{
129 struct ib_send_wr read_wr; 129 struct ib_rdma_wr read_wr;
130 int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; 130 int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
131 struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); 131 struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
132 int ret, read, pno; 132 int ret, read, pno;
@@ -180,16 +180,16 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
180 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); 180 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
181 181
182 memset(&read_wr, 0, sizeof(read_wr)); 182 memset(&read_wr, 0, sizeof(read_wr));
183 read_wr.wr_id = (unsigned long)ctxt; 183 read_wr.wr.wr_id = (unsigned long)ctxt;
184 read_wr.opcode = IB_WR_RDMA_READ; 184 read_wr.wr.opcode = IB_WR_RDMA_READ;
185 ctxt->wr_op = read_wr.opcode; 185 ctxt->wr_op = read_wr.wr.opcode;
186 read_wr.send_flags = IB_SEND_SIGNALED; 186 read_wr.wr.send_flags = IB_SEND_SIGNALED;
187 read_wr.wr.rdma.rkey = rs_handle; 187 read_wr.rkey = rs_handle;
188 read_wr.wr.rdma.remote_addr = rs_offset; 188 read_wr.remote_addr = rs_offset;
189 read_wr.sg_list = ctxt->sge; 189 read_wr.wr.sg_list = ctxt->sge;
190 read_wr.num_sge = pages_needed; 190 read_wr.wr.num_sge = pages_needed;
191 191
192 ret = svc_rdma_send(xprt, &read_wr); 192 ret = svc_rdma_send(xprt, &read_wr.wr);
193 if (ret) { 193 if (ret) {
194 pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); 194 pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
195 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 195 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -219,14 +219,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
219 u64 rs_offset, 219 u64 rs_offset,
220 bool last) 220 bool last)
221{ 221{
222 struct ib_send_wr read_wr; 222 struct ib_rdma_wr read_wr;
223 struct ib_send_wr inv_wr; 223 struct ib_send_wr inv_wr;
224 struct ib_send_wr fastreg_wr; 224 struct ib_reg_wr reg_wr;
225 u8 key; 225 u8 key;
226 int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; 226 int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
227 struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); 227 struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
228 struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt); 228 struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
229 int ret, read, pno; 229 int ret, read, pno, dma_nents, n;
230 u32 pg_off = *page_offset; 230 u32 pg_off = *page_offset;
231 u32 pg_no = *page_no; 231 u32 pg_no = *page_no;
232 232
@@ -235,17 +235,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
235 235
236 ctxt->direction = DMA_FROM_DEVICE; 236 ctxt->direction = DMA_FROM_DEVICE;
237 ctxt->frmr = frmr; 237 ctxt->frmr = frmr;
238 pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len); 238 nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len);
239 read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset, 239 read = min_t(int, (nents << PAGE_SHIFT) - *page_offset, rs_length);
240 rs_length);
241 240
242 frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
243 frmr->direction = DMA_FROM_DEVICE; 241 frmr->direction = DMA_FROM_DEVICE;
244 frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); 242 frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
245 frmr->map_len = pages_needed << PAGE_SHIFT; 243 frmr->sg_nents = nents;
246 frmr->page_list_len = pages_needed;
247 244
248 for (pno = 0; pno < pages_needed; pno++) { 245 for (pno = 0; pno < nents; pno++) {
249 int len = min_t(int, rs_length, PAGE_SIZE - pg_off); 246 int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
250 247
251 head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; 248 head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
@@ -253,17 +250,12 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
253 head->arg.len += len; 250 head->arg.len += len;
254 if (!pg_off) 251 if (!pg_off)
255 head->count++; 252 head->count++;
253
254 sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no],
255 len, pg_off);
256
256 rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; 257 rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
257 rqstp->rq_next_page = rqstp->rq_respages + 1; 258 rqstp->rq_next_page = rqstp->rq_respages + 1;
258 frmr->page_list->page_list[pno] =
259 ib_dma_map_page(xprt->sc_cm_id->device,
260 head->arg.pages[pg_no], 0,
261 PAGE_SIZE, DMA_FROM_DEVICE);
262 ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
263 frmr->page_list->page_list[pno]);
264 if (ret)
265 goto err;
266 atomic_inc(&xprt->sc_dma_used);
267 259
268 /* adjust offset and wrap to next page if needed */ 260 /* adjust offset and wrap to next page if needed */
269 pg_off += len; 261 pg_off += len;
@@ -279,43 +271,57 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
279 else 271 else
280 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); 272 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
281 273
274 dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device,
275 frmr->sg, frmr->sg_nents,
276 frmr->direction);
277 if (!dma_nents) {
278 pr_err("svcrdma: failed to dma map sg %p\n",
279 frmr->sg);
280 return -ENOMEM;
281 }
282 atomic_inc(&xprt->sc_dma_used);
283
284 n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
285 if (unlikely(n != frmr->sg_nents)) {
286 pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n",
287 frmr->mr, n, frmr->sg_nents);
288 return n < 0 ? n : -EINVAL;
289 }
290
282 /* Bump the key */ 291 /* Bump the key */
283 key = (u8)(frmr->mr->lkey & 0x000000FF); 292 key = (u8)(frmr->mr->lkey & 0x000000FF);
284 ib_update_fast_reg_key(frmr->mr, ++key); 293 ib_update_fast_reg_key(frmr->mr, ++key);
285 294
286 ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset; 295 ctxt->sge[0].addr = frmr->mr->iova;
287 ctxt->sge[0].lkey = frmr->mr->lkey; 296 ctxt->sge[0].lkey = frmr->mr->lkey;
288 ctxt->sge[0].length = read; 297 ctxt->sge[0].length = frmr->mr->length;
289 ctxt->count = 1; 298 ctxt->count = 1;
290 ctxt->read_hdr = head; 299 ctxt->read_hdr = head;
291 300
292 /* Prepare FASTREG WR */ 301 /* Prepare REG WR */
293 memset(&fastreg_wr, 0, sizeof(fastreg_wr)); 302 reg_wr.wr.opcode = IB_WR_REG_MR;
294 fastreg_wr.opcode = IB_WR_FAST_REG_MR; 303 reg_wr.wr.wr_id = 0;
295 fastreg_wr.send_flags = IB_SEND_SIGNALED; 304 reg_wr.wr.send_flags = IB_SEND_SIGNALED;
296 fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; 305 reg_wr.wr.num_sge = 0;
297 fastreg_wr.wr.fast_reg.page_list = frmr->page_list; 306 reg_wr.mr = frmr->mr;
298 fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; 307 reg_wr.key = frmr->mr->lkey;
299 fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; 308 reg_wr.access = frmr->access_flags;
300 fastreg_wr.wr.fast_reg.length = frmr->map_len; 309 reg_wr.wr.next = &read_wr.wr;
301 fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
302 fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
303 fastreg_wr.next = &read_wr;
304 310
305 /* Prepare RDMA_READ */ 311 /* Prepare RDMA_READ */
306 memset(&read_wr, 0, sizeof(read_wr)); 312 memset(&read_wr, 0, sizeof(read_wr));
307 read_wr.send_flags = IB_SEND_SIGNALED; 313 read_wr.wr.send_flags = IB_SEND_SIGNALED;
308 read_wr.wr.rdma.rkey = rs_handle; 314 read_wr.rkey = rs_handle;
309 read_wr.wr.rdma.remote_addr = rs_offset; 315 read_wr.remote_addr = rs_offset;
310 read_wr.sg_list = ctxt->sge; 316 read_wr.wr.sg_list = ctxt->sge;
311 read_wr.num_sge = 1; 317 read_wr.wr.num_sge = 1;
312 if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { 318 if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
313 read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; 319 read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
314 read_wr.wr_id = (unsigned long)ctxt; 320 read_wr.wr.wr_id = (unsigned long)ctxt;
315 read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; 321 read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
316 } else { 322 } else {
317 read_wr.opcode = IB_WR_RDMA_READ; 323 read_wr.wr.opcode = IB_WR_RDMA_READ;
318 read_wr.next = &inv_wr; 324 read_wr.wr.next = &inv_wr;
319 /* Prepare invalidate */ 325 /* Prepare invalidate */
320 memset(&inv_wr, 0, sizeof(inv_wr)); 326 memset(&inv_wr, 0, sizeof(inv_wr));
321 inv_wr.wr_id = (unsigned long)ctxt; 327 inv_wr.wr_id = (unsigned long)ctxt;
@@ -323,10 +329,10 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
323 inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; 329 inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
324 inv_wr.ex.invalidate_rkey = frmr->mr->lkey; 330 inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
325 } 331 }
326 ctxt->wr_op = read_wr.opcode; 332 ctxt->wr_op = read_wr.wr.opcode;
327 333
328 /* Post the chain */ 334 /* Post the chain */
329 ret = svc_rdma_send(xprt, &fastreg_wr); 335 ret = svc_rdma_send(xprt, &reg_wr.wr);
330 if (ret) { 336 if (ret) {
331 pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); 337 pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
332 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 338 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -340,7 +346,8 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
340 atomic_inc(&rdma_stat_read); 346 atomic_inc(&rdma_stat_read);
341 return ret; 347 return ret;
342 err: 348 err:
343 svc_rdma_unmap_dma(ctxt); 349 ib_dma_unmap_sg(xprt->sc_cm_id->device,
350 frmr->sg, frmr->sg_nents, frmr->direction);
344 svc_rdma_put_context(ctxt, 0); 351 svc_rdma_put_context(ctxt, 0);
345 svc_rdma_put_frmr(xprt, frmr); 352 svc_rdma_put_frmr(xprt, frmr);
346 return ret; 353 return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 1dfae8317065..969a1ab75fc3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -217,7 +217,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
217 u32 xdr_off, int write_len, 217 u32 xdr_off, int write_len,
218 struct svc_rdma_req_map *vec) 218 struct svc_rdma_req_map *vec)
219{ 219{
220 struct ib_send_wr write_wr; 220 struct ib_rdma_wr write_wr;
221 struct ib_sge *sge; 221 struct ib_sge *sge;
222 int xdr_sge_no; 222 int xdr_sge_no;
223 int sge_no; 223 int sge_no;
@@ -282,17 +282,17 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
282 /* Prepare WRITE WR */ 282 /* Prepare WRITE WR */
283 memset(&write_wr, 0, sizeof write_wr); 283 memset(&write_wr, 0, sizeof write_wr);
284 ctxt->wr_op = IB_WR_RDMA_WRITE; 284 ctxt->wr_op = IB_WR_RDMA_WRITE;
285 write_wr.wr_id = (unsigned long)ctxt; 285 write_wr.wr.wr_id = (unsigned long)ctxt;
286 write_wr.sg_list = &sge[0]; 286 write_wr.wr.sg_list = &sge[0];
287 write_wr.num_sge = sge_no; 287 write_wr.wr.num_sge = sge_no;
288 write_wr.opcode = IB_WR_RDMA_WRITE; 288 write_wr.wr.opcode = IB_WR_RDMA_WRITE;
289 write_wr.send_flags = IB_SEND_SIGNALED; 289 write_wr.wr.send_flags = IB_SEND_SIGNALED;
290 write_wr.wr.rdma.rkey = rmr; 290 write_wr.rkey = rmr;
291 write_wr.wr.rdma.remote_addr = to; 291 write_wr.remote_addr = to;
292 292
293 /* Post It */ 293 /* Post It */
294 atomic_inc(&rdma_stat_write); 294 atomic_inc(&rdma_stat_write);
295 if (svc_rdma_send(xprt, &write_wr)) 295 if (svc_rdma_send(xprt, &write_wr.wr))
296 goto err; 296 goto err;
297 return write_len - bc; 297 return write_len - bc;
298 err: 298 err:
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index fcc3eb80c265..a266e870d870 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -692,8 +692,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
692 if (!cma_xprt) 692 if (!cma_xprt)
693 return ERR_PTR(-ENOMEM); 693 return ERR_PTR(-ENOMEM);
694 694
695 listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP, 695 listen_id = rdma_create_id(&init_net, rdma_listen_handler, cma_xprt,
696 IB_QPT_RC); 696 RDMA_PS_TCP, IB_QPT_RC);
697 if (IS_ERR(listen_id)) { 697 if (IS_ERR(listen_id)) {
698 ret = PTR_ERR(listen_id); 698 ret = PTR_ERR(listen_id);
699 dprintk("svcrdma: rdma_create_id failed = %d\n", ret); 699 dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
@@ -732,7 +732,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
732static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) 732static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
733{ 733{
734 struct ib_mr *mr; 734 struct ib_mr *mr;
735 struct ib_fast_reg_page_list *pl; 735 struct scatterlist *sg;
736 struct svc_rdma_fastreg_mr *frmr; 736 struct svc_rdma_fastreg_mr *frmr;
737 u32 num_sg; 737 u32 num_sg;
738 738
@@ -745,13 +745,14 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
745 if (IS_ERR(mr)) 745 if (IS_ERR(mr))
746 goto err_free_frmr; 746 goto err_free_frmr;
747 747
748 pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, 748 sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL);
749 num_sg); 749 if (!sg)
750 if (IS_ERR(pl))
751 goto err_free_mr; 750 goto err_free_mr;
752 751
752 sg_init_table(sg, RPCSVC_MAXPAGES);
753
753 frmr->mr = mr; 754 frmr->mr = mr;
754 frmr->page_list = pl; 755 frmr->sg = sg;
755 INIT_LIST_HEAD(&frmr->frmr_list); 756 INIT_LIST_HEAD(&frmr->frmr_list);
756 return frmr; 757 return frmr;
757 758
@@ -771,8 +772,8 @@ static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
771 frmr = list_entry(xprt->sc_frmr_q.next, 772 frmr = list_entry(xprt->sc_frmr_q.next,
772 struct svc_rdma_fastreg_mr, frmr_list); 773 struct svc_rdma_fastreg_mr, frmr_list);
773 list_del_init(&frmr->frmr_list); 774 list_del_init(&frmr->frmr_list);
775 kfree(frmr->sg);
774 ib_dereg_mr(frmr->mr); 776 ib_dereg_mr(frmr->mr);
775 ib_free_fast_reg_page_list(frmr->page_list);
776 kfree(frmr); 777 kfree(frmr);
777 } 778 }
778} 779}
@@ -786,8 +787,7 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
786 frmr = list_entry(rdma->sc_frmr_q.next, 787 frmr = list_entry(rdma->sc_frmr_q.next,
787 struct svc_rdma_fastreg_mr, frmr_list); 788 struct svc_rdma_fastreg_mr, frmr_list);
788 list_del_init(&frmr->frmr_list); 789 list_del_init(&frmr->frmr_list);
789 frmr->map_len = 0; 790 frmr->sg_nents = 0;
790 frmr->page_list_len = 0;
791 } 791 }
792 spin_unlock_bh(&rdma->sc_frmr_q_lock); 792 spin_unlock_bh(&rdma->sc_frmr_q_lock);
793 if (frmr) 793 if (frmr)
@@ -796,25 +796,13 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
796 return rdma_alloc_frmr(rdma); 796 return rdma_alloc_frmr(rdma);
797} 797}
798 798
799static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
800 struct svc_rdma_fastreg_mr *frmr)
801{
802 int page_no;
803 for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
804 dma_addr_t addr = frmr->page_list->page_list[page_no];
805 if (ib_dma_mapping_error(frmr->mr->device, addr))
806 continue;
807 atomic_dec(&xprt->sc_dma_used);
808 ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE,
809 frmr->direction);
810 }
811}
812
813void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, 799void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
814 struct svc_rdma_fastreg_mr *frmr) 800 struct svc_rdma_fastreg_mr *frmr)
815{ 801{
816 if (frmr) { 802 if (frmr) {
817 frmr_unmap_dma(rdma, frmr); 803 ib_dma_unmap_sg(rdma->sc_cm_id->device,
804 frmr->sg, frmr->sg_nents, frmr->direction);
805 atomic_dec(&rdma->sc_dma_used);
818 spin_lock_bh(&rdma->sc_frmr_q_lock); 806 spin_lock_bh(&rdma->sc_frmr_q_lock);
819 WARN_ON_ONCE(!list_empty(&frmr->frmr_list)); 807 WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
820 list_add(&frmr->frmr_list, &rdma->sc_frmr_q); 808 list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 5502d4dade74..f63369bd01c5 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -432,7 +432,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
432 432
433 init_completion(&ia->ri_done); 433 init_completion(&ia->ri_done);
434 434
435 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC); 435 id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
436 IB_QPT_RC);
436 if (IS_ERR(id)) { 437 if (IS_ERR(id)) {
437 rc = PTR_ERR(id); 438 rc = PTR_ERR(id);
438 dprintk("RPC: %s: rdma_create_id() failed %i\n", 439 dprintk("RPC: %s: rdma_create_id() failed %i\n",
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index c09414e6f91b..c82abf44e39d 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -193,7 +193,8 @@ enum rpcrdma_frmr_state {
193}; 193};
194 194
195struct rpcrdma_frmr { 195struct rpcrdma_frmr {
196 struct ib_fast_reg_page_list *fr_pgl; 196 struct scatterlist *sg;
197 int sg_nents;
197 struct ib_mr *fr_mr; 198 struct ib_mr *fr_mr;
198 enum rpcrdma_frmr_state fr_state; 199 enum rpcrdma_frmr_state fr_state;
199 struct work_struct fr_work; 200 struct work_struct fr_work;