aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core')
-rw-r--r--drivers/infiniband/core/addr.c20
-rw-r--r--drivers/infiniband/core/agent.c2
-rw-r--r--drivers/infiniband/core/cache.c112
-rw-r--r--drivers/infiniband/core/cm.c40
-rw-r--r--drivers/infiniband/core/cma.c173
-rw-r--r--drivers/infiniband/core/core_priv.h9
-rw-r--r--drivers/infiniband/core/device.c19
-rw-r--r--drivers/infiniband/core/mad.c42
-rw-r--r--drivers/infiniband/core/mad_priv.h2
-rw-r--r--drivers/infiniband/core/multicast.c3
-rw-r--r--drivers/infiniband/core/sa_query.c19
-rw-r--r--drivers/infiniband/core/sysfs.c2
-rw-r--r--drivers/infiniband/core/ucma.c5
-rw-r--r--drivers/infiniband/core/uverbs.h1
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c402
-rw-r--r--drivers/infiniband/core/uverbs_main.c1
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c4
-rw-r--r--drivers/infiniband/core/verbs.c295
18 files changed, 773 insertions, 378 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 746cdf56bc76..34b1adad07aa 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -128,7 +128,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
128 int ret = -EADDRNOTAVAIL; 128 int ret = -EADDRNOTAVAIL;
129 129
130 if (dev_addr->bound_dev_if) { 130 if (dev_addr->bound_dev_if) {
131 dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); 131 dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
132 if (!dev) 132 if (!dev)
133 return -ENODEV; 133 return -ENODEV;
134 ret = rdma_copy_addr(dev_addr, dev, NULL); 134 ret = rdma_copy_addr(dev_addr, dev, NULL);
@@ -138,7 +138,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
138 138
139 switch (addr->sa_family) { 139 switch (addr->sa_family) {
140 case AF_INET: 140 case AF_INET:
141 dev = ip_dev_find(&init_net, 141 dev = ip_dev_find(dev_addr->net,
142 ((struct sockaddr_in *) addr)->sin_addr.s_addr); 142 ((struct sockaddr_in *) addr)->sin_addr.s_addr);
143 143
144 if (!dev) 144 if (!dev)
@@ -149,12 +149,11 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
149 *vlan_id = rdma_vlan_dev_vlan_id(dev); 149 *vlan_id = rdma_vlan_dev_vlan_id(dev);
150 dev_put(dev); 150 dev_put(dev);
151 break; 151 break;
152
153#if IS_ENABLED(CONFIG_IPV6) 152#if IS_ENABLED(CONFIG_IPV6)
154 case AF_INET6: 153 case AF_INET6:
155 rcu_read_lock(); 154 rcu_read_lock();
156 for_each_netdev_rcu(&init_net, dev) { 155 for_each_netdev_rcu(dev_addr->net, dev) {
157 if (ipv6_chk_addr(&init_net, 156 if (ipv6_chk_addr(dev_addr->net,
158 &((struct sockaddr_in6 *) addr)->sin6_addr, 157 &((struct sockaddr_in6 *) addr)->sin6_addr,
159 dev, 1)) { 158 dev, 1)) {
160 ret = rdma_copy_addr(dev_addr, dev, NULL); 159 ret = rdma_copy_addr(dev_addr, dev, NULL);
@@ -236,7 +235,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,
236 fl4.daddr = dst_ip; 235 fl4.daddr = dst_ip;
237 fl4.saddr = src_ip; 236 fl4.saddr = src_ip;
238 fl4.flowi4_oif = addr->bound_dev_if; 237 fl4.flowi4_oif = addr->bound_dev_if;
239 rt = ip_route_output_key(&init_net, &fl4); 238 rt = ip_route_output_key(addr->net, &fl4);
240 if (IS_ERR(rt)) { 239 if (IS_ERR(rt)) {
241 ret = PTR_ERR(rt); 240 ret = PTR_ERR(rt);
242 goto out; 241 goto out;
@@ -278,12 +277,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
278 fl6.saddr = src_in->sin6_addr; 277 fl6.saddr = src_in->sin6_addr;
279 fl6.flowi6_oif = addr->bound_dev_if; 278 fl6.flowi6_oif = addr->bound_dev_if;
280 279
281 dst = ip6_route_output(&init_net, NULL, &fl6); 280 dst = ip6_route_output(addr->net, NULL, &fl6);
282 if ((ret = dst->error)) 281 if ((ret = dst->error))
283 goto put; 282 goto put;
284 283
285 if (ipv6_addr_any(&fl6.saddr)) { 284 if (ipv6_addr_any(&fl6.saddr)) {
286 ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, 285 ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
287 &fl6.daddr, 0, &fl6.saddr); 286 &fl6.daddr, 0, &fl6.saddr);
288 if (ret) 287 if (ret)
289 goto put; 288 goto put;
@@ -458,7 +457,7 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
458} 457}
459 458
460int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, 459int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
461 u8 *dmac, u16 *vlan_id) 460 u8 *dmac, u16 *vlan_id, int if_index)
462{ 461{
463 int ret = 0; 462 int ret = 0;
464 struct rdma_dev_addr dev_addr; 463 struct rdma_dev_addr dev_addr;
@@ -476,6 +475,8 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
476 rdma_gid2ip(&dgid_addr._sockaddr, dgid); 475 rdma_gid2ip(&dgid_addr._sockaddr, dgid);
477 476
478 memset(&dev_addr, 0, sizeof(dev_addr)); 477 memset(&dev_addr, 0, sizeof(dev_addr));
478 dev_addr.bound_dev_if = if_index;
479 dev_addr.net = &init_net;
479 480
480 ctx.addr = &dev_addr; 481 ctx.addr = &dev_addr;
481 init_completion(&ctx.comp); 482 init_completion(&ctx.comp);
@@ -510,6 +511,7 @@ int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
510 rdma_gid2ip(&gid_addr._sockaddr, sgid); 511 rdma_gid2ip(&gid_addr._sockaddr, sgid);
511 512
512 memset(&dev_addr, 0, sizeof(dev_addr)); 513 memset(&dev_addr, 0, sizeof(dev_addr));
514 dev_addr.net = &init_net;
513 ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); 515 ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
514 if (ret) 516 if (ret)
515 return ret; 517 return ret;
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 0429040304fd..4fa524dfb6cf 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -126,7 +126,7 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *
126 mad_send_wr = container_of(send_buf, 126 mad_send_wr = container_of(send_buf,
127 struct ib_mad_send_wr_private, 127 struct ib_mad_send_wr_private,
128 send_buf); 128 send_buf);
129 mad_send_wr->send_wr.wr.ud.port_num = port_num; 129 mad_send_wr->send_wr.port_num = port_num;
130 } 130 }
131 131
132 if (ib_post_send_mad(send_buf, NULL)) { 132 if (ib_post_send_mad(send_buf, NULL)) {
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 87471ef37198..89bebeada38b 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -409,10 +409,10 @@ static int ib_cache_gid_find(struct ib_device *ib_dev,
409 mask, port, index); 409 mask, port, index);
410} 410}
411 411
412int ib_cache_gid_find_by_port(struct ib_device *ib_dev, 412int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
413 const union ib_gid *gid, 413 const union ib_gid *gid,
414 u8 port, struct net_device *ndev, 414 u8 port, struct net_device *ndev,
415 u16 *index) 415 u16 *index)
416{ 416{
417 int local_index; 417 int local_index;
418 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 418 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
@@ -438,6 +438,82 @@ int ib_cache_gid_find_by_port(struct ib_device *ib_dev,
438 438
439 return -ENOENT; 439 return -ENOENT;
440} 440}
441EXPORT_SYMBOL(ib_find_cached_gid_by_port);
442
443/**
444 * ib_find_gid_by_filter - Returns the GID table index where a specified
445 * GID value occurs
446 * @device: The device to query.
447 * @gid: The GID value to search for.
448 * @port_num: The port number of the device where the GID value could be
449 * searched.
450 * @filter: The filter function is executed on any matching GID in the table.
451 * If the filter function returns true, the corresponding index is returned,
452 * otherwise, we continue searching the GID table. It's guaranteed that
453 * while filter is executed, ndev field is valid and the structure won't
454 * change. filter is executed in an atomic context. filter must not be NULL.
455 * @index: The index into the cached GID table where the GID was found. This
456 * parameter may be NULL.
457 *
458 * ib_cache_gid_find_by_filter() searches for the specified GID value
459 * of which the filter function returns true in the port's GID table.
460 * This function is only supported on RoCE ports.
461 *
462 */
463static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
464 const union ib_gid *gid,
465 u8 port,
466 bool (*filter)(const union ib_gid *,
467 const struct ib_gid_attr *,
468 void *),
469 void *context,
470 u16 *index)
471{
472 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
473 struct ib_gid_table *table;
474 unsigned int i;
475 bool found = false;
476
477 if (!ports_table)
478 return -EOPNOTSUPP;
479
480 if (port < rdma_start_port(ib_dev) ||
481 port > rdma_end_port(ib_dev) ||
482 !rdma_protocol_roce(ib_dev, port))
483 return -EPROTONOSUPPORT;
484
485 table = ports_table[port - rdma_start_port(ib_dev)];
486
487 for (i = 0; i < table->sz; i++) {
488 struct ib_gid_attr attr;
489 unsigned long flags;
490
491 read_lock_irqsave(&table->data_vec[i].lock, flags);
492 if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
493 goto next;
494
495 if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
496 goto next;
497
498 memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
499
500 if (filter(gid, &attr, context))
501 found = true;
502
503next:
504 read_unlock_irqrestore(&table->data_vec[i].lock, flags);
505
506 if (found)
507 break;
508 }
509
510 if (!found)
511 return -ENOENT;
512
513 if (index)
514 *index = i;
515 return 0;
516}
441 517
442static struct ib_gid_table *alloc_gid_table(int sz) 518static struct ib_gid_table *alloc_gid_table(int sz)
443{ 519{
@@ -649,24 +725,44 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
649int ib_get_cached_gid(struct ib_device *device, 725int ib_get_cached_gid(struct ib_device *device,
650 u8 port_num, 726 u8 port_num,
651 int index, 727 int index,
652 union ib_gid *gid) 728 union ib_gid *gid,
729 struct ib_gid_attr *gid_attr)
653{ 730{
654 if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) 731 if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
655 return -EINVAL; 732 return -EINVAL;
656 733
657 return __ib_cache_gid_get(device, port_num, index, gid, NULL); 734 return __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
658} 735}
659EXPORT_SYMBOL(ib_get_cached_gid); 736EXPORT_SYMBOL(ib_get_cached_gid);
660 737
661int ib_find_cached_gid(struct ib_device *device, 738int ib_find_cached_gid(struct ib_device *device,
662 const union ib_gid *gid, 739 const union ib_gid *gid,
740 struct net_device *ndev,
663 u8 *port_num, 741 u8 *port_num,
664 u16 *index) 742 u16 *index)
665{ 743{
666 return ib_cache_gid_find(device, gid, NULL, port_num, index); 744 return ib_cache_gid_find(device, gid, ndev, port_num, index);
667} 745}
668EXPORT_SYMBOL(ib_find_cached_gid); 746EXPORT_SYMBOL(ib_find_cached_gid);
669 747
748int ib_find_gid_by_filter(struct ib_device *device,
749 const union ib_gid *gid,
750 u8 port_num,
751 bool (*filter)(const union ib_gid *gid,
752 const struct ib_gid_attr *,
753 void *),
754 void *context, u16 *index)
755{
756 /* Only RoCE GID table supports filter function */
757 if (!rdma_cap_roce_gid_table(device, port_num) && filter)
758 return -EPROTONOSUPPORT;
759
760 return ib_cache_gid_find_by_filter(device, gid,
761 port_num, filter,
762 context, index);
763}
764EXPORT_SYMBOL(ib_find_gid_by_filter);
765
670int ib_get_cached_pkey(struct ib_device *device, 766int ib_get_cached_pkey(struct ib_device *device,
671 u8 port_num, 767 u8 port_num,
672 int index, 768 int index,
@@ -845,7 +941,7 @@ static void ib_cache_update(struct ib_device *device,
845 if (!use_roce_gid_table) { 941 if (!use_roce_gid_table) {
846 for (i = 0; i < gid_cache->table_len; ++i) { 942 for (i = 0; i < gid_cache->table_len; ++i) {
847 ret = ib_query_gid(device, port, i, 943 ret = ib_query_gid(device, port, i,
848 gid_cache->table + i); 944 gid_cache->table + i, NULL);
849 if (ret) { 945 if (ret) {
850 printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n", 946 printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n",
851 ret, device->name, i); 947 ret, device->name, i);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 4f918b929eca..0a26dd6d9b19 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -179,8 +179,6 @@ struct cm_av {
179 struct ib_ah_attr ah_attr; 179 struct ib_ah_attr ah_attr;
180 u16 pkey_index; 180 u16 pkey_index;
181 u8 timeout; 181 u8 timeout;
182 u8 valid;
183 u8 smac[ETH_ALEN];
184}; 182};
185 183
186struct cm_work { 184struct cm_work {
@@ -361,17 +359,21 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
361 unsigned long flags; 359 unsigned long flags;
362 int ret; 360 int ret;
363 u8 p; 361 u8 p;
362 struct net_device *ndev = ib_get_ndev_from_path(path);
364 363
365 read_lock_irqsave(&cm.device_lock, flags); 364 read_lock_irqsave(&cm.device_lock, flags);
366 list_for_each_entry(cm_dev, &cm.device_list, list) { 365 list_for_each_entry(cm_dev, &cm.device_list, list) {
367 if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, 366 if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
368 &p, NULL)) { 367 ndev, &p, NULL)) {
369 port = cm_dev->port[p-1]; 368 port = cm_dev->port[p-1];
370 break; 369 break;
371 } 370 }
372 } 371 }
373 read_unlock_irqrestore(&cm.device_lock, flags); 372 read_unlock_irqrestore(&cm.device_lock, flags);
374 373
374 if (ndev)
375 dev_put(ndev);
376
375 if (!port) 377 if (!port)
376 return -EINVAL; 378 return -EINVAL;
377 379
@@ -384,9 +386,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
384 ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path, 386 ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
385 &av->ah_attr); 387 &av->ah_attr);
386 av->timeout = path->packet_life_time + 1; 388 av->timeout = path->packet_life_time + 1;
387 memcpy(av->smac, path->smac, sizeof(av->smac));
388 389
389 av->valid = 1;
390 return 0; 390 return 0;
391} 391}
392 392
@@ -1639,11 +1639,11 @@ static int cm_req_handler(struct cm_work *work)
1639 cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); 1639 cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
1640 1640
1641 memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN); 1641 memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
1642 work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
1643 ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); 1642 ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
1644 if (ret) { 1643 if (ret) {
1645 ib_get_cached_gid(work->port->cm_dev->ib_device, 1644 ib_get_cached_gid(work->port->cm_dev->ib_device,
1646 work->port->port_num, 0, &work->path[0].sgid); 1645 work->port->port_num, 0, &work->path[0].sgid,
1646 NULL);
1647 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, 1647 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1648 &work->path[0].sgid, sizeof work->path[0].sgid, 1648 &work->path[0].sgid, sizeof work->path[0].sgid,
1649 NULL, 0); 1649 NULL, 0);
@@ -3618,32 +3618,6 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3618 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | 3618 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
3619 IB_QP_DEST_QPN | IB_QP_RQ_PSN; 3619 IB_QP_DEST_QPN | IB_QP_RQ_PSN;
3620 qp_attr->ah_attr = cm_id_priv->av.ah_attr; 3620 qp_attr->ah_attr = cm_id_priv->av.ah_attr;
3621 if (!cm_id_priv->av.valid) {
3622 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3623 return -EINVAL;
3624 }
3625 if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) {
3626 qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id;
3627 *qp_attr_mask |= IB_QP_VID;
3628 }
3629 if (!is_zero_ether_addr(cm_id_priv->av.smac)) {
3630 memcpy(qp_attr->smac, cm_id_priv->av.smac,
3631 sizeof(qp_attr->smac));
3632 *qp_attr_mask |= IB_QP_SMAC;
3633 }
3634 if (cm_id_priv->alt_av.valid) {
3635 if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) {
3636 qp_attr->alt_vlan_id =
3637 cm_id_priv->alt_av.ah_attr.vlan_id;
3638 *qp_attr_mask |= IB_QP_ALT_VID;
3639 }
3640 if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) {
3641 memcpy(qp_attr->alt_smac,
3642 cm_id_priv->alt_av.smac,
3643 sizeof(qp_attr->alt_smac));
3644 *qp_attr_mask |= IB_QP_ALT_SMAC;
3645 }
3646 }
3647 qp_attr->path_mtu = cm_id_priv->path_mtu; 3621 qp_attr->path_mtu = cm_id_priv->path_mtu;
3648 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); 3622 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
3649 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); 3623 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 36b12d560e17..944cd90417bc 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -44,6 +44,8 @@
44#include <linux/module.h> 44#include <linux/module.h>
45#include <net/route.h> 45#include <net/route.h>
46 46
47#include <net/net_namespace.h>
48#include <net/netns/generic.h>
47#include <net/tcp.h> 49#include <net/tcp.h>
48#include <net/ipv6.h> 50#include <net/ipv6.h>
49#include <net/ip_fib.h> 51#include <net/ip_fib.h>
@@ -86,7 +88,7 @@ static const char * const cma_events[] = {
86 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", 88 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit",
87}; 89};
88 90
89const char *rdma_event_msg(enum rdma_cm_event_type event) 91const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
90{ 92{
91 size_t index = event; 93 size_t index = event;
92 94
@@ -110,22 +112,33 @@ static LIST_HEAD(dev_list);
110static LIST_HEAD(listen_any_list); 112static LIST_HEAD(listen_any_list);
111static DEFINE_MUTEX(lock); 113static DEFINE_MUTEX(lock);
112static struct workqueue_struct *cma_wq; 114static struct workqueue_struct *cma_wq;
113static DEFINE_IDR(tcp_ps); 115static int cma_pernet_id;
114static DEFINE_IDR(udp_ps);
115static DEFINE_IDR(ipoib_ps);
116static DEFINE_IDR(ib_ps);
117 116
118static struct idr *cma_idr(enum rdma_port_space ps) 117struct cma_pernet {
118 struct idr tcp_ps;
119 struct idr udp_ps;
120 struct idr ipoib_ps;
121 struct idr ib_ps;
122};
123
124static struct cma_pernet *cma_pernet(struct net *net)
125{
126 return net_generic(net, cma_pernet_id);
127}
128
129static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps)
119{ 130{
131 struct cma_pernet *pernet = cma_pernet(net);
132
120 switch (ps) { 133 switch (ps) {
121 case RDMA_PS_TCP: 134 case RDMA_PS_TCP:
122 return &tcp_ps; 135 return &pernet->tcp_ps;
123 case RDMA_PS_UDP: 136 case RDMA_PS_UDP:
124 return &udp_ps; 137 return &pernet->udp_ps;
125 case RDMA_PS_IPOIB: 138 case RDMA_PS_IPOIB:
126 return &ipoib_ps; 139 return &pernet->ipoib_ps;
127 case RDMA_PS_IB: 140 case RDMA_PS_IB:
128 return &ib_ps; 141 return &pernet->ib_ps;
129 default: 142 default:
130 return NULL; 143 return NULL;
131 } 144 }
@@ -145,24 +158,25 @@ struct rdma_bind_list {
145 unsigned short port; 158 unsigned short port;
146}; 159};
147 160
148static int cma_ps_alloc(enum rdma_port_space ps, 161static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
149 struct rdma_bind_list *bind_list, int snum) 162 struct rdma_bind_list *bind_list, int snum)
150{ 163{
151 struct idr *idr = cma_idr(ps); 164 struct idr *idr = cma_pernet_idr(net, ps);
152 165
153 return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); 166 return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL);
154} 167}
155 168
156static struct rdma_bind_list *cma_ps_find(enum rdma_port_space ps, int snum) 169static struct rdma_bind_list *cma_ps_find(struct net *net,
170 enum rdma_port_space ps, int snum)
157{ 171{
158 struct idr *idr = cma_idr(ps); 172 struct idr *idr = cma_pernet_idr(net, ps);
159 173
160 return idr_find(idr, snum); 174 return idr_find(idr, snum);
161} 175}
162 176
163static void cma_ps_remove(enum rdma_port_space ps, int snum) 177static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum)
164{ 178{
165 struct idr *idr = cma_idr(ps); 179 struct idr *idr = cma_pernet_idr(net, ps);
166 180
167 idr_remove(idr, snum); 181 idr_remove(idr, snum);
168} 182}
@@ -427,10 +441,11 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
427} 441}
428 442
429static inline int cma_validate_port(struct ib_device *device, u8 port, 443static inline int cma_validate_port(struct ib_device *device, u8 port,
430 union ib_gid *gid, int dev_type) 444 union ib_gid *gid, int dev_type,
445 int bound_if_index)
431{ 446{
432 u8 found_port;
433 int ret = -ENODEV; 447 int ret = -ENODEV;
448 struct net_device *ndev = NULL;
434 449
435 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) 450 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
436 return ret; 451 return ret;
@@ -438,9 +453,13 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
438 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) 453 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
439 return ret; 454 return ret;
440 455
441 ret = ib_find_cached_gid(device, gid, &found_port, NULL); 456 if (dev_type == ARPHRD_ETHER)
442 if (port != found_port) 457 ndev = dev_get_by_index(&init_net, bound_if_index);
443 return -ENODEV; 458
459 ret = ib_find_cached_gid_by_port(device, gid, port, ndev, NULL);
460
461 if (ndev)
462 dev_put(ndev);
444 463
445 return ret; 464 return ret;
446} 465}
@@ -472,7 +491,8 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
472 &iboe_gid : &gid; 491 &iboe_gid : &gid;
473 492
474 ret = cma_validate_port(cma_dev->device, port, gidp, 493 ret = cma_validate_port(cma_dev->device, port, gidp,
475 dev_addr->dev_type); 494 dev_addr->dev_type,
495 dev_addr->bound_dev_if);
476 if (!ret) { 496 if (!ret) {
477 id_priv->id.port_num = port; 497 id_priv->id.port_num = port;
478 goto out; 498 goto out;
@@ -490,7 +510,8 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
490 &iboe_gid : &gid; 510 &iboe_gid : &gid;
491 511
492 ret = cma_validate_port(cma_dev->device, port, gidp, 512 ret = cma_validate_port(cma_dev->device, port, gidp,
493 dev_addr->dev_type); 513 dev_addr->dev_type,
514 dev_addr->bound_dev_if);
494 if (!ret) { 515 if (!ret) {
495 id_priv->id.port_num = port; 516 id_priv->id.port_num = port;
496 goto out; 517 goto out;
@@ -531,7 +552,9 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
531 if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) 552 if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
532 continue; 553 continue;
533 554
534 for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid); i++) { 555 for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
556 &gid, NULL);
557 i++) {
535 if (!memcmp(&gid, dgid, sizeof(gid))) { 558 if (!memcmp(&gid, dgid, sizeof(gid))) {
536 cma_dev = cur_dev; 559 cma_dev = cur_dev;
537 sgid = gid; 560 sgid = gid;
@@ -577,7 +600,8 @@ static int cma_disable_callback(struct rdma_id_private *id_priv,
577 return 0; 600 return 0;
578} 601}
579 602
580struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, 603struct rdma_cm_id *rdma_create_id(struct net *net,
604 rdma_cm_event_handler event_handler,
581 void *context, enum rdma_port_space ps, 605 void *context, enum rdma_port_space ps,
582 enum ib_qp_type qp_type) 606 enum ib_qp_type qp_type)
583{ 607{
@@ -601,6 +625,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
601 INIT_LIST_HEAD(&id_priv->listen_list); 625 INIT_LIST_HEAD(&id_priv->listen_list);
602 INIT_LIST_HEAD(&id_priv->mc_list); 626 INIT_LIST_HEAD(&id_priv->mc_list);
603 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); 627 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
628 id_priv->id.route.addr.dev_addr.net = get_net(net);
604 629
605 return &id_priv->id; 630 return &id_priv->id;
606} 631}
@@ -718,18 +743,12 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
718 goto out; 743 goto out;
719 744
720 ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, 745 ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
721 qp_attr.ah_attr.grh.sgid_index, &sgid); 746 qp_attr.ah_attr.grh.sgid_index, &sgid, NULL);
722 if (ret) 747 if (ret)
723 goto out; 748 goto out;
724 749
725 BUG_ON(id_priv->cma_dev->device != id_priv->id.device); 750 BUG_ON(id_priv->cma_dev->device != id_priv->id.device);
726 751
727 if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) {
728 ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL);
729
730 if (ret)
731 goto out;
732 }
733 if (conn_param) 752 if (conn_param)
734 qp_attr.max_dest_rd_atomic = conn_param->responder_resources; 753 qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
735 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 754 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
@@ -1260,7 +1279,7 @@ static bool cma_match_net_dev(const struct rdma_id_private *id_priv,
1260 cma_protocol_roce(&id_priv->id); 1279 cma_protocol_roce(&id_priv->id);
1261 1280
1262 return !addr->dev_addr.bound_dev_if || 1281 return !addr->dev_addr.bound_dev_if ||
1263 (net_eq(dev_net(net_dev), &init_net) && 1282 (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
1264 addr->dev_addr.bound_dev_if == net_dev->ifindex); 1283 addr->dev_addr.bound_dev_if == net_dev->ifindex);
1265} 1284}
1266 1285
@@ -1321,7 +1340,8 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
1321 } 1340 }
1322 } 1341 }
1323 1342
1324 bind_list = cma_ps_find(rdma_ps_from_service_id(req.service_id), 1343 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
1344 rdma_ps_from_service_id(req.service_id),
1325 cma_port_from_service_id(req.service_id)); 1345 cma_port_from_service_id(req.service_id));
1326 id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); 1346 id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
1327 if (IS_ERR(id_priv) && *net_dev) { 1347 if (IS_ERR(id_priv) && *net_dev) {
@@ -1392,6 +1412,7 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
1392static void cma_release_port(struct rdma_id_private *id_priv) 1412static void cma_release_port(struct rdma_id_private *id_priv)
1393{ 1413{
1394 struct rdma_bind_list *bind_list = id_priv->bind_list; 1414 struct rdma_bind_list *bind_list = id_priv->bind_list;
1415 struct net *net = id_priv->id.route.addr.dev_addr.net;
1395 1416
1396 if (!bind_list) 1417 if (!bind_list)
1397 return; 1418 return;
@@ -1399,7 +1420,7 @@ static void cma_release_port(struct rdma_id_private *id_priv)
1399 mutex_lock(&lock); 1420 mutex_lock(&lock);
1400 hlist_del(&id_priv->node); 1421 hlist_del(&id_priv->node);
1401 if (hlist_empty(&bind_list->owners)) { 1422 if (hlist_empty(&bind_list->owners)) {
1402 cma_ps_remove(bind_list->ps, bind_list->port); 1423 cma_ps_remove(net, bind_list->ps, bind_list->port);
1403 kfree(bind_list); 1424 kfree(bind_list);
1404 } 1425 }
1405 mutex_unlock(&lock); 1426 mutex_unlock(&lock);
@@ -1458,6 +1479,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
1458 cma_deref_id(id_priv->id.context); 1479 cma_deref_id(id_priv->id.context);
1459 1480
1460 kfree(id_priv->id.route.path_rec); 1481 kfree(id_priv->id.route.path_rec);
1482 put_net(id_priv->id.route.addr.dev_addr.net);
1461 kfree(id_priv); 1483 kfree(id_priv);
1462} 1484}
1463EXPORT_SYMBOL(rdma_destroy_id); 1485EXPORT_SYMBOL(rdma_destroy_id);
@@ -1588,7 +1610,8 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
1588 ib_event->param.req_rcvd.primary_path->service_id; 1610 ib_event->param.req_rcvd.primary_path->service_id;
1589 int ret; 1611 int ret;
1590 1612
1591 id = rdma_create_id(listen_id->event_handler, listen_id->context, 1613 id = rdma_create_id(listen_id->route.addr.dev_addr.net,
1614 listen_id->event_handler, listen_id->context,
1592 listen_id->ps, ib_event->param.req_rcvd.qp_type); 1615 listen_id->ps, ib_event->param.req_rcvd.qp_type);
1593 if (IS_ERR(id)) 1616 if (IS_ERR(id))
1594 return NULL; 1617 return NULL;
@@ -1643,9 +1666,10 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1643 struct rdma_id_private *id_priv; 1666 struct rdma_id_private *id_priv;
1644 struct rdma_cm_id *id; 1667 struct rdma_cm_id *id;
1645 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 1668 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
1669 struct net *net = listen_id->route.addr.dev_addr.net;
1646 int ret; 1670 int ret;
1647 1671
1648 id = rdma_create_id(listen_id->event_handler, listen_id->context, 1672 id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
1649 listen_id->ps, IB_QPT_UD); 1673 listen_id->ps, IB_QPT_UD);
1650 if (IS_ERR(id)) 1674 if (IS_ERR(id))
1651 return NULL; 1675 return NULL;
@@ -1882,7 +1906,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1882 return -ECONNABORTED; 1906 return -ECONNABORTED;
1883 1907
1884 /* Create a new RDMA id for the new IW CM ID */ 1908 /* Create a new RDMA id for the new IW CM ID */
1885 new_cm_id = rdma_create_id(listen_id->id.event_handler, 1909 new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
1910 listen_id->id.event_handler,
1886 listen_id->id.context, 1911 listen_id->id.context,
1887 RDMA_PS_TCP, IB_QPT_RC); 1912 RDMA_PS_TCP, IB_QPT_RC);
1888 if (IS_ERR(new_cm_id)) { 1913 if (IS_ERR(new_cm_id)) {
@@ -2010,12 +2035,13 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
2010{ 2035{
2011 struct rdma_id_private *dev_id_priv; 2036 struct rdma_id_private *dev_id_priv;
2012 struct rdma_cm_id *id; 2037 struct rdma_cm_id *id;
2038 struct net *net = id_priv->id.route.addr.dev_addr.net;
2013 int ret; 2039 int ret;
2014 2040
2015 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) 2041 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
2016 return; 2042 return;
2017 2043
2018 id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps, 2044 id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
2019 id_priv->id.qp_type); 2045 id_priv->id.qp_type);
2020 if (IS_ERR(id)) 2046 if (IS_ERR(id))
2021 return; 2047 return;
@@ -2294,16 +2320,17 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
2294 2320
2295 route->num_paths = 1; 2321 route->num_paths = 1;
2296 2322
2297 if (addr->dev_addr.bound_dev_if) 2323 if (addr->dev_addr.bound_dev_if) {
2298 ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); 2324 ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
2325 route->path_rec->net = &init_net;
2326 route->path_rec->ifindex = addr->dev_addr.bound_dev_if;
2327 }
2299 if (!ndev) { 2328 if (!ndev) {
2300 ret = -ENODEV; 2329 ret = -ENODEV;
2301 goto err2; 2330 goto err2;
2302 } 2331 }
2303 2332
2304 route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev);
2305 memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); 2333 memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
2306 memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len);
2307 2334
2308 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 2335 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
2309 &route->path_rec->sgid); 2336 &route->path_rec->sgid);
@@ -2426,7 +2453,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
2426 p = 1; 2453 p = 1;
2427 2454
2428port_found: 2455port_found:
2429 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid); 2456 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL);
2430 if (ret) 2457 if (ret)
2431 goto out; 2458 goto out;
2432 2459
@@ -2688,7 +2715,8 @@ static int cma_alloc_port(enum rdma_port_space ps,
2688 if (!bind_list) 2715 if (!bind_list)
2689 return -ENOMEM; 2716 return -ENOMEM;
2690 2717
2691 ret = cma_ps_alloc(ps, bind_list, snum); 2718 ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list,
2719 snum);
2692 if (ret < 0) 2720 if (ret < 0)
2693 goto err; 2721 goto err;
2694 2722
@@ -2707,13 +2735,14 @@ static int cma_alloc_any_port(enum rdma_port_space ps,
2707 static unsigned int last_used_port; 2735 static unsigned int last_used_port;
2708 int low, high, remaining; 2736 int low, high, remaining;
2709 unsigned int rover; 2737 unsigned int rover;
2738 struct net *net = id_priv->id.route.addr.dev_addr.net;
2710 2739
2711 inet_get_local_port_range(&init_net, &low, &high); 2740 inet_get_local_port_range(net, &low, &high);
2712 remaining = (high - low) + 1; 2741 remaining = (high - low) + 1;
2713 rover = prandom_u32() % remaining + low; 2742 rover = prandom_u32() % remaining + low;
2714retry: 2743retry:
2715 if (last_used_port != rover && 2744 if (last_used_port != rover &&
2716 !cma_ps_find(ps, (unsigned short)rover)) { 2745 !cma_ps_find(net, ps, (unsigned short)rover)) {
2717 int ret = cma_alloc_port(ps, id_priv, rover); 2746 int ret = cma_alloc_port(ps, id_priv, rover);
2718 /* 2747 /*
2719 * Remember previously used port number in order to avoid 2748 * Remember previously used port number in order to avoid
@@ -2779,7 +2808,7 @@ static int cma_use_port(enum rdma_port_space ps,
2779 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) 2808 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
2780 return -EACCES; 2809 return -EACCES;
2781 2810
2782 bind_list = cma_ps_find(ps, snum); 2811 bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum);
2783 if (!bind_list) { 2812 if (!bind_list) {
2784 ret = cma_alloc_port(ps, id_priv, snum); 2813 ret = cma_alloc_port(ps, id_priv, snum);
2785 } else { 2814 } else {
@@ -2971,8 +3000,11 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2971 if (addr->sa_family == AF_INET) 3000 if (addr->sa_family == AF_INET)
2972 id_priv->afonly = 1; 3001 id_priv->afonly = 1;
2973#if IS_ENABLED(CONFIG_IPV6) 3002#if IS_ENABLED(CONFIG_IPV6)
2974 else if (addr->sa_family == AF_INET6) 3003 else if (addr->sa_family == AF_INET6) {
2975 id_priv->afonly = init_net.ipv6.sysctl.bindv6only; 3004 struct net *net = id_priv->id.route.addr.dev_addr.net;
3005
3006 id_priv->afonly = net->ipv6.sysctl.bindv6only;
3007 }
2976#endif 3008#endif
2977 } 3009 }
2978 ret = cma_get_port(id_priv); 3010 ret = cma_get_port(id_priv);
@@ -3777,6 +3809,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
3777 dev_addr = &id_priv->id.route.addr.dev_addr; 3809 dev_addr = &id_priv->id.route.addr.dev_addr;
3778 3810
3779 if ((dev_addr->bound_dev_if == ndev->ifindex) && 3811 if ((dev_addr->bound_dev_if == ndev->ifindex) &&
3812 (net_eq(dev_net(ndev), dev_addr->net)) &&
3780 memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { 3813 memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
3781 printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", 3814 printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
3782 ndev->name, &id_priv->id); 3815 ndev->name, &id_priv->id);
@@ -3802,9 +3835,6 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
3802 struct rdma_id_private *id_priv; 3835 struct rdma_id_private *id_priv;
3803 int ret = NOTIFY_DONE; 3836 int ret = NOTIFY_DONE;
3804 3837
3805 if (dev_net(ndev) != &init_net)
3806 return NOTIFY_DONE;
3807
3808 if (event != NETDEV_BONDING_FAILOVER) 3838 if (event != NETDEV_BONDING_FAILOVER)
3809 return NOTIFY_DONE; 3839 return NOTIFY_DONE;
3810 3840
@@ -3999,6 +4029,35 @@ static const struct ibnl_client_cbs cma_cb_table[] = {
3999 .module = THIS_MODULE }, 4029 .module = THIS_MODULE },
4000}; 4030};
4001 4031
4032static int cma_init_net(struct net *net)
4033{
4034 struct cma_pernet *pernet = cma_pernet(net);
4035
4036 idr_init(&pernet->tcp_ps);
4037 idr_init(&pernet->udp_ps);
4038 idr_init(&pernet->ipoib_ps);
4039 idr_init(&pernet->ib_ps);
4040
4041 return 0;
4042}
4043
4044static void cma_exit_net(struct net *net)
4045{
4046 struct cma_pernet *pernet = cma_pernet(net);
4047
4048 idr_destroy(&pernet->tcp_ps);
4049 idr_destroy(&pernet->udp_ps);
4050 idr_destroy(&pernet->ipoib_ps);
4051 idr_destroy(&pernet->ib_ps);
4052}
4053
4054static struct pernet_operations cma_pernet_operations = {
4055 .init = cma_init_net,
4056 .exit = cma_exit_net,
4057 .id = &cma_pernet_id,
4058 .size = sizeof(struct cma_pernet),
4059};
4060
4002static int __init cma_init(void) 4061static int __init cma_init(void)
4003{ 4062{
4004 int ret; 4063 int ret;
@@ -4007,6 +4066,10 @@ static int __init cma_init(void)
4007 if (!cma_wq) 4066 if (!cma_wq)
4008 return -ENOMEM; 4067 return -ENOMEM;
4009 4068
4069 ret = register_pernet_subsys(&cma_pernet_operations);
4070 if (ret)
4071 goto err_wq;
4072
4010 ib_sa_register_client(&sa_client); 4073 ib_sa_register_client(&sa_client);
4011 rdma_addr_register_client(&addr_client); 4074 rdma_addr_register_client(&addr_client);
4012 register_netdevice_notifier(&cma_nb); 4075 register_netdevice_notifier(&cma_nb);
@@ -4024,6 +4087,7 @@ err:
4024 unregister_netdevice_notifier(&cma_nb); 4087 unregister_netdevice_notifier(&cma_nb);
4025 rdma_addr_unregister_client(&addr_client); 4088 rdma_addr_unregister_client(&addr_client);
4026 ib_sa_unregister_client(&sa_client); 4089 ib_sa_unregister_client(&sa_client);
4090err_wq:
4027 destroy_workqueue(cma_wq); 4091 destroy_workqueue(cma_wq);
4028 return ret; 4092 return ret;
4029} 4093}
@@ -4035,11 +4099,8 @@ static void __exit cma_cleanup(void)
4035 unregister_netdevice_notifier(&cma_nb); 4099 unregister_netdevice_notifier(&cma_nb);
4036 rdma_addr_unregister_client(&addr_client); 4100 rdma_addr_unregister_client(&addr_client);
4037 ib_sa_unregister_client(&sa_client); 4101 ib_sa_unregister_client(&sa_client);
4102 unregister_pernet_subsys(&cma_pernet_operations);
4038 destroy_workqueue(cma_wq); 4103 destroy_workqueue(cma_wq);
4039 idr_destroy(&tcp_ps);
4040 idr_destroy(&udp_ps);
4041 idr_destroy(&ipoib_ps);
4042 idr_destroy(&ib_ps);
4043} 4104}
4044 4105
4045module_init(cma_init); 4106module_init(cma_init);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 70bb36ebb03b..5cf6eb716f00 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -46,8 +46,8 @@ void ib_device_unregister_sysfs(struct ib_device *device);
46void ib_cache_setup(void); 46void ib_cache_setup(void);
47void ib_cache_cleanup(void); 47void ib_cache_cleanup(void);
48 48
49int ib_resolve_eth_l2_attrs(struct ib_qp *qp, 49int ib_resolve_eth_dmac(struct ib_qp *qp,
50 struct ib_qp_attr *qp_attr, int *qp_attr_mask); 50 struct ib_qp_attr *qp_attr, int *qp_attr_mask);
51 51
52typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, 52typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
53 struct net_device *idev, void *cookie); 53 struct net_device *idev, void *cookie);
@@ -65,11 +65,6 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
65 roce_netdev_callback cb, 65 roce_netdev_callback cb,
66 void *cookie); 66 void *cookie);
67 67
68int ib_cache_gid_find_by_port(struct ib_device *ib_dev,
69 const union ib_gid *gid,
70 u8 port, struct net_device *ndev,
71 u16 *index);
72
73enum ib_cache_gid_default_mode { 68enum ib_cache_gid_default_mode {
74 IB_CACHE_GID_DEFAULT_MODE_SET, 69 IB_CACHE_GID_DEFAULT_MODE_SET,
75 IB_CACHE_GID_DEFAULT_MODE_DELETE 70 IB_CACHE_GID_DEFAULT_MODE_DELETE
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 17639117afc6..179e8134d57f 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -672,14 +672,20 @@ EXPORT_SYMBOL(ib_query_port);
672 * @port_num:Port number to query 672 * @port_num:Port number to query
673 * @index:GID table index to query 673 * @index:GID table index to query
674 * @gid:Returned GID 674 * @gid:Returned GID
675 * @attr: Returned GID attributes related to this GID index (only in RoCE).
676 * NULL means ignore.
675 * 677 *
676 * ib_query_gid() fetches the specified GID table entry. 678 * ib_query_gid() fetches the specified GID table entry.
677 */ 679 */
678int ib_query_gid(struct ib_device *device, 680int ib_query_gid(struct ib_device *device,
679 u8 port_num, int index, union ib_gid *gid) 681 u8 port_num, int index, union ib_gid *gid,
682 struct ib_gid_attr *attr)
680{ 683{
681 if (rdma_cap_roce_gid_table(device, port_num)) 684 if (rdma_cap_roce_gid_table(device, port_num))
682 return ib_get_cached_gid(device, port_num, index, gid); 685 return ib_get_cached_gid(device, port_num, index, gid, attr);
686
687 if (attr)
688 return -EINVAL;
683 689
684 return device->query_gid(device, port_num, index, gid); 690 return device->query_gid(device, port_num, index, gid);
685} 691}
@@ -819,27 +825,28 @@ EXPORT_SYMBOL(ib_modify_port);
819 * a specified GID value occurs. 825 * a specified GID value occurs.
820 * @device: The device to query. 826 * @device: The device to query.
821 * @gid: The GID value to search for. 827 * @gid: The GID value to search for.
828 * @ndev: The ndev related to the GID to search for.
822 * @port_num: The port number of the device where the GID value was found. 829 * @port_num: The port number of the device where the GID value was found.
823 * @index: The index into the GID table where the GID was found. This 830 * @index: The index into the GID table where the GID was found. This
824 * parameter may be NULL. 831 * parameter may be NULL.
825 */ 832 */
826int ib_find_gid(struct ib_device *device, union ib_gid *gid, 833int ib_find_gid(struct ib_device *device, union ib_gid *gid,
827 u8 *port_num, u16 *index) 834 struct net_device *ndev, u8 *port_num, u16 *index)
828{ 835{
829 union ib_gid tmp_gid; 836 union ib_gid tmp_gid;
830 int ret, port, i; 837 int ret, port, i;
831 838
832 for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) { 839 for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
833 if (rdma_cap_roce_gid_table(device, port)) { 840 if (rdma_cap_roce_gid_table(device, port)) {
834 if (!ib_cache_gid_find_by_port(device, gid, port, 841 if (!ib_find_cached_gid_by_port(device, gid, port,
835 NULL, index)) { 842 ndev, index)) {
836 *port_num = port; 843 *port_num = port;
837 return 0; 844 return 0;
838 } 845 }
839 } 846 }
840 847
841 for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { 848 for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
842 ret = ib_query_gid(device, port, i, &tmp_gid); 849 ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
843 if (ret) 850 if (ret)
844 return ret; 851 return ret;
845 if (!memcmp(&tmp_gid, gid, sizeof *gid)) { 852 if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 4b5c72311deb..8d8af7a41a30 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -752,7 +752,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
752 struct ib_device *device = mad_agent_priv->agent.device; 752 struct ib_device *device = mad_agent_priv->agent.device;
753 u8 port_num; 753 u8 port_num;
754 struct ib_wc mad_wc; 754 struct ib_wc mad_wc;
755 struct ib_send_wr *send_wr = &mad_send_wr->send_wr; 755 struct ib_ud_wr *send_wr = &mad_send_wr->send_wr;
756 size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv); 756 size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv);
757 u16 out_mad_pkey_index = 0; 757 u16 out_mad_pkey_index = 0;
758 u16 drslid; 758 u16 drslid;
@@ -761,7 +761,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
761 761
762 if (rdma_cap_ib_switch(device) && 762 if (rdma_cap_ib_switch(device) &&
763 smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) 763 smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
764 port_num = send_wr->wr.ud.port_num; 764 port_num = send_wr->port_num;
765 else 765 else
766 port_num = mad_agent_priv->agent.port_num; 766 port_num = mad_agent_priv->agent.port_num;
767 767
@@ -832,9 +832,9 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
832 } 832 }
833 833
834 build_smp_wc(mad_agent_priv->agent.qp, 834 build_smp_wc(mad_agent_priv->agent.qp,
835 send_wr->wr_id, drslid, 835 send_wr->wr.wr_id, drslid,
836 send_wr->wr.ud.pkey_index, 836 send_wr->pkey_index,
837 send_wr->wr.ud.port_num, &mad_wc); 837 send_wr->port_num, &mad_wc);
838 838
839 if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) { 839 if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) {
840 mad_wc.byte_len = mad_send_wr->send_buf.hdr_len 840 mad_wc.byte_len = mad_send_wr->send_buf.hdr_len
@@ -894,7 +894,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
894 894
895 local->mad_send_wr = mad_send_wr; 895 local->mad_send_wr = mad_send_wr;
896 if (opa) { 896 if (opa) {
897 local->mad_send_wr->send_wr.wr.ud.pkey_index = out_mad_pkey_index; 897 local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index;
898 local->return_wc_byte_len = mad_size; 898 local->return_wc_byte_len = mad_size;
899 } 899 }
900 /* Reference MAD agent until send side of local completion handled */ 900 /* Reference MAD agent until send side of local completion handled */
@@ -1039,14 +1039,14 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
1039 1039
1040 mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey; 1040 mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
1041 1041
1042 mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr; 1042 mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr;
1043 mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list; 1043 mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
1044 mad_send_wr->send_wr.num_sge = 2; 1044 mad_send_wr->send_wr.wr.num_sge = 2;
1045 mad_send_wr->send_wr.opcode = IB_WR_SEND; 1045 mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
1046 mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED; 1046 mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED;
1047 mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn; 1047 mad_send_wr->send_wr.remote_qpn = remote_qpn;
1048 mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; 1048 mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY;
1049 mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index; 1049 mad_send_wr->send_wr.pkey_index = pkey_index;
1050 1050
1051 if (rmpp_active) { 1051 if (rmpp_active) {
1052 ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask); 1052 ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask);
@@ -1151,7 +1151,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
1151 1151
1152 /* Set WR ID to find mad_send_wr upon completion */ 1152 /* Set WR ID to find mad_send_wr upon completion */
1153 qp_info = mad_send_wr->mad_agent_priv->qp_info; 1153 qp_info = mad_send_wr->mad_agent_priv->qp_info;
1154 mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list; 1154 mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
1155 mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; 1155 mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
1156 1156
1157 mad_agent = mad_send_wr->send_buf.mad_agent; 1157 mad_agent = mad_send_wr->send_buf.mad_agent;
@@ -1179,7 +1179,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
1179 1179
1180 spin_lock_irqsave(&qp_info->send_queue.lock, flags); 1180 spin_lock_irqsave(&qp_info->send_queue.lock, flags);
1181 if (qp_info->send_queue.count < qp_info->send_queue.max_active) { 1181 if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
1182 ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr, 1182 ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr,
1183 &bad_send_wr); 1183 &bad_send_wr);
1184 list = &qp_info->send_queue.list; 1184 list = &qp_info->send_queue.list;
1185 } else { 1185 } else {
@@ -1244,7 +1244,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
1244 * request associated with the completion 1244 * request associated with the completion
1245 */ 1245 */
1246 next_send_buf = send_buf->next; 1246 next_send_buf = send_buf->next;
1247 mad_send_wr->send_wr.wr.ud.ah = send_buf->ah; 1247 mad_send_wr->send_wr.ah = send_buf->ah;
1248 1248
1249 if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class == 1249 if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
1250 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { 1250 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
@@ -1877,7 +1877,7 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
1877 ((1 << lmc) - 1))); 1877 ((1 << lmc) - 1)));
1878 } else { 1878 } else {
1879 if (ib_get_cached_gid(device, port_num, 1879 if (ib_get_cached_gid(device, port_num,
1880 attr.grh.sgid_index, &sgid)) 1880 attr.grh.sgid_index, &sgid, NULL))
1881 return 0; 1881 return 0;
1882 return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, 1882 return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
1883 16); 1883 16);
@@ -2457,7 +2457,7 @@ retry:
2457 ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); 2457 ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
2458 2458
2459 if (queued_send_wr) { 2459 if (queued_send_wr) {
2460 ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr, 2460 ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr,
2461 &bad_send_wr); 2461 &bad_send_wr);
2462 if (ret) { 2462 if (ret) {
2463 dev_err(&port_priv->device->dev, 2463 dev_err(&port_priv->device->dev,
@@ -2515,7 +2515,7 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
2515 struct ib_send_wr *bad_send_wr; 2515 struct ib_send_wr *bad_send_wr;
2516 2516
2517 mad_send_wr->retry = 0; 2517 mad_send_wr->retry = 0;
2518 ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr, 2518 ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
2519 &bad_send_wr); 2519 &bad_send_wr);
2520 if (ret) 2520 if (ret)
2521 ib_mad_send_done_handler(port_priv, wc); 2521 ib_mad_send_done_handler(port_priv, wc);
@@ -2713,7 +2713,7 @@ static void local_completions(struct work_struct *work)
2713 build_smp_wc(recv_mad_agent->agent.qp, 2713 build_smp_wc(recv_mad_agent->agent.qp,
2714 (unsigned long) local->mad_send_wr, 2714 (unsigned long) local->mad_send_wr,
2715 be16_to_cpu(IB_LID_PERMISSIVE), 2715 be16_to_cpu(IB_LID_PERMISSIVE),
2716 local->mad_send_wr->send_wr.wr.ud.pkey_index, 2716 local->mad_send_wr->send_wr.pkey_index,
2717 recv_mad_agent->agent.port_num, &wc); 2717 recv_mad_agent->agent.port_num, &wc);
2718 2718
2719 local->mad_priv->header.recv_wc.wc = &wc; 2719 local->mad_priv->header.recv_wc.wc = &wc;
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 4a4f7aad0978..990698a6ab4b 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -123,7 +123,7 @@ struct ib_mad_send_wr_private {
123 struct ib_mad_send_buf send_buf; 123 struct ib_mad_send_buf send_buf;
124 u64 header_mapping; 124 u64 header_mapping;
125 u64 payload_mapping; 125 u64 payload_mapping;
126 struct ib_send_wr send_wr; 126 struct ib_ud_wr send_wr;
127 struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; 127 struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
128 __be64 tid; 128 __be64 tid;
129 unsigned long timeout; 129 unsigned long timeout;
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index d38d8b2b2979..bb6685fb08c6 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -729,7 +729,8 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
729 u16 gid_index; 729 u16 gid_index;
730 u8 p; 730 u8 p;
731 731
732 ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index); 732 ret = ib_find_cached_gid(device, &rec->port_gid,
733 NULL, &p, &gid_index);
733 if (ret) 734 if (ret)
734 return ret; 735 return ret;
735 736
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 8c014b33d8e0..dcdaa79e3f0f 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1007,26 +1007,29 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
1007 force_grh = rdma_cap_eth_ah(device, port_num); 1007 force_grh = rdma_cap_eth_ah(device, port_num);
1008 1008
1009 if (rec->hop_limit > 1 || force_grh) { 1009 if (rec->hop_limit > 1 || force_grh) {
1010 struct net_device *ndev = ib_get_ndev_from_path(rec);
1011
1010 ah_attr->ah_flags = IB_AH_GRH; 1012 ah_attr->ah_flags = IB_AH_GRH;
1011 ah_attr->grh.dgid = rec->dgid; 1013 ah_attr->grh.dgid = rec->dgid;
1012 1014
1013 ret = ib_find_cached_gid(device, &rec->sgid, &port_num, 1015 ret = ib_find_cached_gid(device, &rec->sgid, ndev, &port_num,
1014 &gid_index); 1016 &gid_index);
1015 if (ret) 1017 if (ret) {
1018 if (ndev)
1019 dev_put(ndev);
1016 return ret; 1020 return ret;
1021 }
1017 1022
1018 ah_attr->grh.sgid_index = gid_index; 1023 ah_attr->grh.sgid_index = gid_index;
1019 ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); 1024 ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
1020 ah_attr->grh.hop_limit = rec->hop_limit; 1025 ah_attr->grh.hop_limit = rec->hop_limit;
1021 ah_attr->grh.traffic_class = rec->traffic_class; 1026 ah_attr->grh.traffic_class = rec->traffic_class;
1027 if (ndev)
1028 dev_put(ndev);
1022 } 1029 }
1023 if (force_grh) { 1030 if (force_grh) {
1024 memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN); 1031 memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
1025 ah_attr->vlan_id = rec->vlan_id;
1026 } else {
1027 ah_attr->vlan_id = 0xffff;
1028 } 1032 }
1029
1030 return 0; 1033 return 0;
1031} 1034}
1032EXPORT_SYMBOL(ib_init_ah_from_path); 1035EXPORT_SYMBOL(ib_init_ah_from_path);
@@ -1150,9 +1153,9 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
1150 1153
1151 ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), 1154 ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
1152 mad->data, &rec); 1155 mad->data, &rec);
1153 rec.vlan_id = 0xffff; 1156 rec.net = NULL;
1157 rec.ifindex = 0;
1154 memset(rec.dmac, 0, ETH_ALEN); 1158 memset(rec.dmac, 0, ETH_ALEN);
1155 memset(rec.smac, 0, ETH_ALEN);
1156 query->callback(status, &rec, query->context); 1159 query->callback(status, &rec, query->context);
1157 } else 1160 } else
1158 query->callback(status, NULL, query->context); 1161 query->callback(status, NULL, query->context);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 34cdd74b0a17..b1f37d4095fa 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -289,7 +289,7 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
289 union ib_gid gid; 289 union ib_gid gid;
290 ssize_t ret; 290 ssize_t ret;
291 291
292 ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid); 292 ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL);
293 if (ret) 293 if (ret)
294 return ret; 294 return ret;
295 295
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 30467d10df91..8b5a934e1133 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -42,6 +42,7 @@
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/sysctl.h> 43#include <linux/sysctl.h>
44#include <linux/module.h> 44#include <linux/module.h>
45#include <linux/nsproxy.h>
45 46
46#include <rdma/rdma_user_cm.h> 47#include <rdma/rdma_user_cm.h>
47#include <rdma/ib_marshall.h> 48#include <rdma/ib_marshall.h>
@@ -472,7 +473,8 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
472 return -ENOMEM; 473 return -ENOMEM;
473 474
474 ctx->uid = cmd.uid; 475 ctx->uid = cmd.uid;
475 ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type); 476 ctx->cm_id = rdma_create_id(current->nsproxy->net_ns,
477 ucma_event_handler, ctx, cmd.ps, qp_type);
476 if (IS_ERR(ctx->cm_id)) { 478 if (IS_ERR(ctx->cm_id)) {
477 ret = PTR_ERR(ctx->cm_id); 479 ret = PTR_ERR(ctx->cm_id);
478 goto err1; 480 goto err1;
@@ -1211,7 +1213,6 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
1211 return -EINVAL; 1213 return -EINVAL;
1212 1214
1213 memset(&sa_path, 0, sizeof(sa_path)); 1215 memset(&sa_path, 0, sizeof(sa_path));
1214 sa_path.vlan_id = 0xffff;
1215 1216
1216 ib_sa_unpack_path(path_data->path_rec, &sa_path); 1217 ib_sa_unpack_path(path_data->path_rec, &sa_path);
1217 ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1); 1218 ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 3863d33c243d..94bbd8c155fc 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -272,5 +272,6 @@ IB_UVERBS_DECLARE_EX_CMD(create_flow);
272IB_UVERBS_DECLARE_EX_CMD(destroy_flow); 272IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
273IB_UVERBS_DECLARE_EX_CMD(query_device); 273IB_UVERBS_DECLARE_EX_CMD(query_device);
274IB_UVERBS_DECLARE_EX_CMD(create_cq); 274IB_UVERBS_DECLARE_EX_CMD(create_cq);
275IB_UVERBS_DECLARE_EX_CMD(create_qp);
275 276
276#endif /* UVERBS_H */ 277#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index be4cb9f04be3..94816aeb95a0 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1478,7 +1478,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
1478 if (copy_from_user(&cmd, buf, sizeof(cmd))) 1478 if (copy_from_user(&cmd, buf, sizeof(cmd)))
1479 return -EFAULT; 1479 return -EFAULT;
1480 1480
1481 INIT_UDATA(&ucore, buf, cmd.response, sizeof(cmd), sizeof(resp)); 1481 INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), sizeof(resp));
1482 1482
1483 INIT_UDATA(&uhw, buf + sizeof(cmd), 1483 INIT_UDATA(&uhw, buf + sizeof(cmd),
1484 (unsigned long)cmd.response + sizeof(resp), 1484 (unsigned long)cmd.response + sizeof(resp),
@@ -1741,66 +1741,65 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
1741 return in_len; 1741 return in_len;
1742} 1742}
1743 1743
1744ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, 1744static int create_qp(struct ib_uverbs_file *file,
1745 struct ib_device *ib_dev, 1745 struct ib_udata *ucore,
1746 const char __user *buf, int in_len, 1746 struct ib_udata *uhw,
1747 int out_len) 1747 struct ib_uverbs_ex_create_qp *cmd,
1748{ 1748 size_t cmd_sz,
1749 struct ib_uverbs_create_qp cmd; 1749 int (*cb)(struct ib_uverbs_file *file,
1750 struct ib_uverbs_create_qp_resp resp; 1750 struct ib_uverbs_ex_create_qp_resp *resp,
1751 struct ib_udata udata; 1751 struct ib_udata *udata),
1752 struct ib_uqp_object *obj; 1752 void *context)
1753 struct ib_device *device; 1753{
1754 struct ib_pd *pd = NULL; 1754 struct ib_uqp_object *obj;
1755 struct ib_xrcd *xrcd = NULL; 1755 struct ib_device *device;
1756 struct ib_uobject *uninitialized_var(xrcd_uobj); 1756 struct ib_pd *pd = NULL;
1757 struct ib_cq *scq = NULL, *rcq = NULL; 1757 struct ib_xrcd *xrcd = NULL;
1758 struct ib_srq *srq = NULL; 1758 struct ib_uobject *uninitialized_var(xrcd_uobj);
1759 struct ib_qp *qp; 1759 struct ib_cq *scq = NULL, *rcq = NULL;
1760 struct ib_qp_init_attr attr; 1760 struct ib_srq *srq = NULL;
1761 int ret; 1761 struct ib_qp *qp;
1762 1762 char *buf;
1763 if (out_len < sizeof resp) 1763 struct ib_qp_init_attr attr;
1764 return -ENOSPC; 1764 struct ib_uverbs_ex_create_qp_resp resp;
1765 1765 int ret;
1766 if (copy_from_user(&cmd, buf, sizeof cmd))
1767 return -EFAULT;
1768 1766
1769 if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) 1767 if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
1770 return -EPERM; 1768 return -EPERM;
1771 1769
1772 INIT_UDATA(&udata, buf + sizeof cmd,
1773 (unsigned long) cmd.response + sizeof resp,
1774 in_len - sizeof cmd, out_len - sizeof resp);
1775
1776 obj = kzalloc(sizeof *obj, GFP_KERNEL); 1770 obj = kzalloc(sizeof *obj, GFP_KERNEL);
1777 if (!obj) 1771 if (!obj)
1778 return -ENOMEM; 1772 return -ENOMEM;
1779 1773
1780 init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); 1774 init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
1775 &qp_lock_class);
1781 down_write(&obj->uevent.uobject.mutex); 1776 down_write(&obj->uevent.uobject.mutex);
1782 1777
1783 if (cmd.qp_type == IB_QPT_XRC_TGT) { 1778 if (cmd->qp_type == IB_QPT_XRC_TGT) {
1784 xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); 1779 xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
1780 &xrcd_uobj);
1785 if (!xrcd) { 1781 if (!xrcd) {
1786 ret = -EINVAL; 1782 ret = -EINVAL;
1787 goto err_put; 1783 goto err_put;
1788 } 1784 }
1789 device = xrcd->device; 1785 device = xrcd->device;
1790 } else { 1786 } else {
1791 if (cmd.qp_type == IB_QPT_XRC_INI) { 1787 if (cmd->qp_type == IB_QPT_XRC_INI) {
1792 cmd.max_recv_wr = cmd.max_recv_sge = 0; 1788 cmd->max_recv_wr = 0;
1789 cmd->max_recv_sge = 0;
1793 } else { 1790 } else {
1794 if (cmd.is_srq) { 1791 if (cmd->is_srq) {
1795 srq = idr_read_srq(cmd.srq_handle, file->ucontext); 1792 srq = idr_read_srq(cmd->srq_handle,
1793 file->ucontext);
1796 if (!srq || srq->srq_type != IB_SRQT_BASIC) { 1794 if (!srq || srq->srq_type != IB_SRQT_BASIC) {
1797 ret = -EINVAL; 1795 ret = -EINVAL;
1798 goto err_put; 1796 goto err_put;
1799 } 1797 }
1800 } 1798 }
1801 1799
1802 if (cmd.recv_cq_handle != cmd.send_cq_handle) { 1800 if (cmd->recv_cq_handle != cmd->send_cq_handle) {
1803 rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0); 1801 rcq = idr_read_cq(cmd->recv_cq_handle,
1802 file->ucontext, 0);
1804 if (!rcq) { 1803 if (!rcq) {
1805 ret = -EINVAL; 1804 ret = -EINVAL;
1806 goto err_put; 1805 goto err_put;
@@ -1808,9 +1807,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1808 } 1807 }
1809 } 1808 }
1810 1809
1811 scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq); 1810 scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
1812 rcq = rcq ?: scq; 1811 rcq = rcq ?: scq;
1813 pd = idr_read_pd(cmd.pd_handle, file->ucontext); 1812 pd = idr_read_pd(cmd->pd_handle, file->ucontext);
1814 if (!pd || !scq) { 1813 if (!pd || !scq) {
1815 ret = -EINVAL; 1814 ret = -EINVAL;
1816 goto err_put; 1815 goto err_put;
@@ -1825,31 +1824,49 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1825 attr.recv_cq = rcq; 1824 attr.recv_cq = rcq;
1826 attr.srq = srq; 1825 attr.srq = srq;
1827 attr.xrcd = xrcd; 1826 attr.xrcd = xrcd;
1828 attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; 1827 attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR :
1829 attr.qp_type = cmd.qp_type; 1828 IB_SIGNAL_REQ_WR;
1829 attr.qp_type = cmd->qp_type;
1830 attr.create_flags = 0; 1830 attr.create_flags = 0;
1831 1831
1832 attr.cap.max_send_wr = cmd.max_send_wr; 1832 attr.cap.max_send_wr = cmd->max_send_wr;
1833 attr.cap.max_recv_wr = cmd.max_recv_wr; 1833 attr.cap.max_recv_wr = cmd->max_recv_wr;
1834 attr.cap.max_send_sge = cmd.max_send_sge; 1834 attr.cap.max_send_sge = cmd->max_send_sge;
1835 attr.cap.max_recv_sge = cmd.max_recv_sge; 1835 attr.cap.max_recv_sge = cmd->max_recv_sge;
1836 attr.cap.max_inline_data = cmd.max_inline_data; 1836 attr.cap.max_inline_data = cmd->max_inline_data;
1837 1837
1838 obj->uevent.events_reported = 0; 1838 obj->uevent.events_reported = 0;
1839 INIT_LIST_HEAD(&obj->uevent.event_list); 1839 INIT_LIST_HEAD(&obj->uevent.event_list);
1840 INIT_LIST_HEAD(&obj->mcast_list); 1840 INIT_LIST_HEAD(&obj->mcast_list);
1841 1841
1842 if (cmd.qp_type == IB_QPT_XRC_TGT) 1842 if (cmd_sz >= offsetof(typeof(*cmd), create_flags) +
1843 sizeof(cmd->create_flags))
1844 attr.create_flags = cmd->create_flags;
1845
1846 if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
1847 ret = -EINVAL;
1848 goto err_put;
1849 }
1850
1851 buf = (void *)cmd + sizeof(*cmd);
1852 if (cmd_sz > sizeof(*cmd))
1853 if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
1854 cmd_sz - sizeof(*cmd) - 1))) {
1855 ret = -EINVAL;
1856 goto err_put;
1857 }
1858
1859 if (cmd->qp_type == IB_QPT_XRC_TGT)
1843 qp = ib_create_qp(pd, &attr); 1860 qp = ib_create_qp(pd, &attr);
1844 else 1861 else
1845 qp = device->create_qp(pd, &attr, &udata); 1862 qp = device->create_qp(pd, &attr, uhw);
1846 1863
1847 if (IS_ERR(qp)) { 1864 if (IS_ERR(qp)) {
1848 ret = PTR_ERR(qp); 1865 ret = PTR_ERR(qp);
1849 goto err_put; 1866 goto err_put;
1850 } 1867 }
1851 1868
1852 if (cmd.qp_type != IB_QPT_XRC_TGT) { 1869 if (cmd->qp_type != IB_QPT_XRC_TGT) {
1853 qp->real_qp = qp; 1870 qp->real_qp = qp;
1854 qp->device = device; 1871 qp->device = device;
1855 qp->pd = pd; 1872 qp->pd = pd;
@@ -1875,19 +1892,20 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1875 goto err_destroy; 1892 goto err_destroy;
1876 1893
1877 memset(&resp, 0, sizeof resp); 1894 memset(&resp, 0, sizeof resp);
1878 resp.qpn = qp->qp_num; 1895 resp.base.qpn = qp->qp_num;
1879 resp.qp_handle = obj->uevent.uobject.id; 1896 resp.base.qp_handle = obj->uevent.uobject.id;
1880 resp.max_recv_sge = attr.cap.max_recv_sge; 1897 resp.base.max_recv_sge = attr.cap.max_recv_sge;
1881 resp.max_send_sge = attr.cap.max_send_sge; 1898 resp.base.max_send_sge = attr.cap.max_send_sge;
1882 resp.max_recv_wr = attr.cap.max_recv_wr; 1899 resp.base.max_recv_wr = attr.cap.max_recv_wr;
1883 resp.max_send_wr = attr.cap.max_send_wr; 1900 resp.base.max_send_wr = attr.cap.max_send_wr;
1884 resp.max_inline_data = attr.cap.max_inline_data; 1901 resp.base.max_inline_data = attr.cap.max_inline_data;
1885 1902
1886 if (copy_to_user((void __user *) (unsigned long) cmd.response, 1903 resp.response_length = offsetof(typeof(resp), response_length) +
1887 &resp, sizeof resp)) { 1904 sizeof(resp.response_length);
1888 ret = -EFAULT; 1905
1889 goto err_copy; 1906 ret = cb(file, &resp, ucore);
1890 } 1907 if (ret)
1908 goto err_cb;
1891 1909
1892 if (xrcd) { 1910 if (xrcd) {
1893 obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, 1911 obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
@@ -1913,9 +1931,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1913 1931
1914 up_write(&obj->uevent.uobject.mutex); 1932 up_write(&obj->uevent.uobject.mutex);
1915 1933
1916 return in_len; 1934 return 0;
1917 1935err_cb:
1918err_copy:
1919 idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); 1936 idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
1920 1937
1921err_destroy: 1938err_destroy:
@@ -1937,6 +1954,113 @@ err_put:
1937 return ret; 1954 return ret;
1938} 1955}
1939 1956
1957static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
1958 struct ib_uverbs_ex_create_qp_resp *resp,
1959 struct ib_udata *ucore)
1960{
1961 if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
1962 return -EFAULT;
1963
1964 return 0;
1965}
1966
1967ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1968 struct ib_device *ib_dev,
1969 const char __user *buf, int in_len,
1970 int out_len)
1971{
1972 struct ib_uverbs_create_qp cmd;
1973 struct ib_uverbs_ex_create_qp cmd_ex;
1974 struct ib_udata ucore;
1975 struct ib_udata uhw;
1976 ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
1977 int err;
1978
1979 if (out_len < resp_size)
1980 return -ENOSPC;
1981
1982 if (copy_from_user(&cmd, buf, sizeof(cmd)))
1983 return -EFAULT;
1984
1985 INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd),
1986 resp_size);
1987 INIT_UDATA(&uhw, buf + sizeof(cmd),
1988 (unsigned long)cmd.response + resp_size,
1989 in_len - sizeof(cmd), out_len - resp_size);
1990
1991 memset(&cmd_ex, 0, sizeof(cmd_ex));
1992 cmd_ex.user_handle = cmd.user_handle;
1993 cmd_ex.pd_handle = cmd.pd_handle;
1994 cmd_ex.send_cq_handle = cmd.send_cq_handle;
1995 cmd_ex.recv_cq_handle = cmd.recv_cq_handle;
1996 cmd_ex.srq_handle = cmd.srq_handle;
1997 cmd_ex.max_send_wr = cmd.max_send_wr;
1998 cmd_ex.max_recv_wr = cmd.max_recv_wr;
1999 cmd_ex.max_send_sge = cmd.max_send_sge;
2000 cmd_ex.max_recv_sge = cmd.max_recv_sge;
2001 cmd_ex.max_inline_data = cmd.max_inline_data;
2002 cmd_ex.sq_sig_all = cmd.sq_sig_all;
2003 cmd_ex.qp_type = cmd.qp_type;
2004 cmd_ex.is_srq = cmd.is_srq;
2005
2006 err = create_qp(file, &ucore, &uhw, &cmd_ex,
2007 offsetof(typeof(cmd_ex), is_srq) +
2008 sizeof(cmd.is_srq), ib_uverbs_create_qp_cb,
2009 NULL);
2010
2011 if (err)
2012 return err;
2013
2014 return in_len;
2015}
2016
2017static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
2018 struct ib_uverbs_ex_create_qp_resp *resp,
2019 struct ib_udata *ucore)
2020{
2021 if (ib_copy_to_udata(ucore, resp, resp->response_length))
2022 return -EFAULT;
2023
2024 return 0;
2025}
2026
2027int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
2028 struct ib_device *ib_dev,
2029 struct ib_udata *ucore,
2030 struct ib_udata *uhw)
2031{
2032 struct ib_uverbs_ex_create_qp_resp resp;
2033 struct ib_uverbs_ex_create_qp cmd = {0};
2034 int err;
2035
2036 if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) +
2037 sizeof(cmd.comp_mask)))
2038 return -EINVAL;
2039
2040 err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
2041 if (err)
2042 return err;
2043
2044 if (cmd.comp_mask)
2045 return -EINVAL;
2046
2047 if (cmd.reserved)
2048 return -EINVAL;
2049
2050 if (ucore->outlen < (offsetof(typeof(resp), response_length) +
2051 sizeof(resp.response_length)))
2052 return -ENOSPC;
2053
2054 err = create_qp(file, ucore, uhw, &cmd,
2055 min(ucore->inlen, sizeof(cmd)),
2056 ib_uverbs_ex_create_qp_cb, NULL);
2057
2058 if (err)
2059 return err;
2060
2061 return 0;
2062}
2063
1940ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, 2064ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
1941 struct ib_device *ib_dev, 2065 struct ib_device *ib_dev,
1942 const char __user *buf, int in_len, int out_len) 2066 const char __user *buf, int in_len, int out_len)
@@ -2221,7 +2345,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
2221 attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; 2345 attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
2222 2346
2223 if (qp->real_qp == qp) { 2347 if (qp->real_qp == qp) {
2224 ret = ib_resolve_eth_l2_attrs(qp, attr, &cmd.attr_mask); 2348 ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask);
2225 if (ret) 2349 if (ret)
2226 goto release_qp; 2350 goto release_qp;
2227 ret = qp->device->modify_qp(qp, attr, 2351 ret = qp->device->modify_qp(qp, attr,
@@ -2303,6 +2427,12 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
2303 return in_len; 2427 return in_len;
2304} 2428}
2305 2429
2430static void *alloc_wr(size_t wr_size, __u32 num_sge)
2431{
2432 return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) +
2433 num_sge * sizeof (struct ib_sge), GFP_KERNEL);
2434};
2435
2306ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, 2436ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
2307 struct ib_device *ib_dev, 2437 struct ib_device *ib_dev,
2308 const char __user *buf, int in_len, 2438 const char __user *buf, int in_len,
@@ -2351,14 +2481,83 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
2351 goto out_put; 2481 goto out_put;
2352 } 2482 }
2353 2483
2354 next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + 2484 if (is_ud) {
2355 user_wr->num_sge * sizeof (struct ib_sge), 2485 struct ib_ud_wr *ud;
2356 GFP_KERNEL); 2486
2357 if (!next) { 2487 if (user_wr->opcode != IB_WR_SEND &&
2358 ret = -ENOMEM; 2488 user_wr->opcode != IB_WR_SEND_WITH_IMM) {
2489 ret = -EINVAL;
2490 goto out_put;
2491 }
2492
2493 ud = alloc_wr(sizeof(*ud), user_wr->num_sge);
2494 if (!ud) {
2495 ret = -ENOMEM;
2496 goto out_put;
2497 }
2498
2499 ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext);
2500 if (!ud->ah) {
2501 kfree(ud);
2502 ret = -EINVAL;
2503 goto out_put;
2504 }
2505 ud->remote_qpn = user_wr->wr.ud.remote_qpn;
2506 ud->remote_qkey = user_wr->wr.ud.remote_qkey;
2507
2508 next = &ud->wr;
2509 } else if (user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2510 user_wr->opcode == IB_WR_RDMA_WRITE ||
2511 user_wr->opcode == IB_WR_RDMA_READ) {
2512 struct ib_rdma_wr *rdma;
2513
2514 rdma = alloc_wr(sizeof(*rdma), user_wr->num_sge);
2515 if (!rdma) {
2516 ret = -ENOMEM;
2517 goto out_put;
2518 }
2519
2520 rdma->remote_addr = user_wr->wr.rdma.remote_addr;
2521 rdma->rkey = user_wr->wr.rdma.rkey;
2522
2523 next = &rdma->wr;
2524 } else if (user_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
2525 user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
2526 struct ib_atomic_wr *atomic;
2527
2528 atomic = alloc_wr(sizeof(*atomic), user_wr->num_sge);
2529 if (!atomic) {
2530 ret = -ENOMEM;
2531 goto out_put;
2532 }
2533
2534 atomic->remote_addr = user_wr->wr.atomic.remote_addr;
2535 atomic->compare_add = user_wr->wr.atomic.compare_add;
2536 atomic->swap = user_wr->wr.atomic.swap;
2537 atomic->rkey = user_wr->wr.atomic.rkey;
2538
2539 next = &atomic->wr;
2540 } else if (user_wr->opcode == IB_WR_SEND ||
2541 user_wr->opcode == IB_WR_SEND_WITH_IMM ||
2542 user_wr->opcode == IB_WR_SEND_WITH_INV) {
2543 next = alloc_wr(sizeof(*next), user_wr->num_sge);
2544 if (!next) {
2545 ret = -ENOMEM;
2546 goto out_put;
2547 }
2548 } else {
2549 ret = -EINVAL;
2359 goto out_put; 2550 goto out_put;
2360 } 2551 }
2361 2552
2553 if (user_wr->opcode == IB_WR_SEND_WITH_IMM ||
2554 user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
2555 next->ex.imm_data =
2556 (__be32 __force) user_wr->ex.imm_data;
2557 } else if (user_wr->opcode == IB_WR_SEND_WITH_INV) {
2558 next->ex.invalidate_rkey = user_wr->ex.invalidate_rkey;
2559 }
2560
2362 if (!last) 2561 if (!last)
2363 wr = next; 2562 wr = next;
2364 else 2563 else
@@ -2371,60 +2570,6 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
2371 next->opcode = user_wr->opcode; 2570 next->opcode = user_wr->opcode;
2372 next->send_flags = user_wr->send_flags; 2571 next->send_flags = user_wr->send_flags;
2373 2572
2374 if (is_ud) {
2375 if (next->opcode != IB_WR_SEND &&
2376 next->opcode != IB_WR_SEND_WITH_IMM) {
2377 ret = -EINVAL;
2378 goto out_put;
2379 }
2380
2381 next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah,
2382 file->ucontext);
2383 if (!next->wr.ud.ah) {
2384 ret = -EINVAL;
2385 goto out_put;
2386 }
2387 next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn;
2388 next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
2389 if (next->opcode == IB_WR_SEND_WITH_IMM)
2390 next->ex.imm_data =
2391 (__be32 __force) user_wr->ex.imm_data;
2392 } else {
2393 switch (next->opcode) {
2394 case IB_WR_RDMA_WRITE_WITH_IMM:
2395 next->ex.imm_data =
2396 (__be32 __force) user_wr->ex.imm_data;
2397 case IB_WR_RDMA_WRITE:
2398 case IB_WR_RDMA_READ:
2399 next->wr.rdma.remote_addr =
2400 user_wr->wr.rdma.remote_addr;
2401 next->wr.rdma.rkey =
2402 user_wr->wr.rdma.rkey;
2403 break;
2404 case IB_WR_SEND_WITH_IMM:
2405 next->ex.imm_data =
2406 (__be32 __force) user_wr->ex.imm_data;
2407 break;
2408 case IB_WR_SEND_WITH_INV:
2409 next->ex.invalidate_rkey =
2410 user_wr->ex.invalidate_rkey;
2411 break;
2412 case IB_WR_ATOMIC_CMP_AND_SWP:
2413 case IB_WR_ATOMIC_FETCH_AND_ADD:
2414 next->wr.atomic.remote_addr =
2415 user_wr->wr.atomic.remote_addr;
2416 next->wr.atomic.compare_add =
2417 user_wr->wr.atomic.compare_add;
2418 next->wr.atomic.swap = user_wr->wr.atomic.swap;
2419 next->wr.atomic.rkey = user_wr->wr.atomic.rkey;
2420 case IB_WR_SEND:
2421 break;
2422 default:
2423 ret = -EINVAL;
2424 goto out_put;
2425 }
2426 }
2427
2428 if (next->num_sge) { 2573 if (next->num_sge) {
2429 next->sg_list = (void *) next + 2574 next->sg_list = (void *) next +
2430 ALIGN(sizeof *next, sizeof (struct ib_sge)); 2575 ALIGN(sizeof *next, sizeof (struct ib_sge));
@@ -2458,8 +2603,8 @@ out_put:
2458 put_qp_read(qp); 2603 put_qp_read(qp);
2459 2604
2460 while (wr) { 2605 while (wr) {
2461 if (is_ud && wr->wr.ud.ah) 2606 if (is_ud && ud_wr(wr)->ah)
2462 put_ah_read(wr->wr.ud.ah); 2607 put_ah_read(ud_wr(wr)->ah);
2463 next = wr->next; 2608 next = wr->next;
2464 kfree(wr); 2609 kfree(wr);
2465 wr = next; 2610 wr = next;
@@ -2698,7 +2843,6 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
2698 attr.grh.sgid_index = cmd.attr.grh.sgid_index; 2843 attr.grh.sgid_index = cmd.attr.grh.sgid_index;
2699 attr.grh.hop_limit = cmd.attr.grh.hop_limit; 2844 attr.grh.hop_limit = cmd.attr.grh.hop_limit;
2700 attr.grh.traffic_class = cmd.attr.grh.traffic_class; 2845 attr.grh.traffic_class = cmd.attr.grh.traffic_class;
2701 attr.vlan_id = 0;
2702 memset(&attr.dmac, 0, sizeof(attr.dmac)); 2846 memset(&attr.dmac, 0, sizeof(attr.dmac));
2703 memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); 2847 memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
2704 2848
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index c29a660c72fe..e3ef28861be6 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -127,6 +127,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
127 [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, 127 [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow,
128 [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, 128 [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device,
129 [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, 129 [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq,
130 [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp,
130}; 131};
131 132
132static void ib_uverbs_add_one(struct ib_device *device); 133static void ib_uverbs_add_one(struct ib_device *device);
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index abd97247443e..7d2f14c9bbef 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -141,8 +141,8 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
141 dst->preference = src->preference; 141 dst->preference = src->preference;
142 dst->packet_life_time_selector = src->packet_life_time_selector; 142 dst->packet_life_time_selector = src->packet_life_time_selector;
143 143
144 memset(dst->smac, 0, sizeof(dst->smac));
145 memset(dst->dmac, 0, sizeof(dst->dmac)); 144 memset(dst->dmac, 0, sizeof(dst->dmac));
146 dst->vlan_id = 0xffff; 145 dst->net = NULL;
146 dst->ifindex = 0;
147} 147}
148EXPORT_SYMBOL(ib_copy_path_rec_from_user); 148EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index e1f2c9887f3f..043a60ee6836 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -41,6 +41,9 @@
41#include <linux/export.h> 41#include <linux/export.h>
42#include <linux/string.h> 42#include <linux/string.h>
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/in.h>
45#include <linux/in6.h>
46#include <net/addrconf.h>
44 47
45#include <rdma/ib_verbs.h> 48#include <rdma/ib_verbs.h>
46#include <rdma/ib_cache.h> 49#include <rdma/ib_cache.h>
@@ -70,7 +73,7 @@ static const char * const ib_events[] = {
70 [IB_EVENT_GID_CHANGE] = "GID changed", 73 [IB_EVENT_GID_CHANGE] = "GID changed",
71}; 74};
72 75
73const char *ib_event_msg(enum ib_event_type event) 76const char *__attribute_const__ ib_event_msg(enum ib_event_type event)
74{ 77{
75 size_t index = event; 78 size_t index = event;
76 79
@@ -104,7 +107,7 @@ static const char * const wc_statuses[] = {
104 [IB_WC_GENERAL_ERR] = "general error", 107 [IB_WC_GENERAL_ERR] = "general error",
105}; 108};
106 109
107const char *ib_wc_status_msg(enum ib_wc_status status) 110const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status)
108{ 111{
109 size_t index = status; 112 size_t index = status;
110 113
@@ -308,6 +311,35 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
308} 311}
309EXPORT_SYMBOL(ib_create_ah); 312EXPORT_SYMBOL(ib_create_ah);
310 313
314struct find_gid_index_context {
315 u16 vlan_id;
316};
317
318static bool find_gid_index(const union ib_gid *gid,
319 const struct ib_gid_attr *gid_attr,
320 void *context)
321{
322 struct find_gid_index_context *ctx =
323 (struct find_gid_index_context *)context;
324
325 if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
326 (is_vlan_dev(gid_attr->ndev) &&
327 vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
328 return false;
329
330 return true;
331}
332
333static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
334 u16 vlan_id, const union ib_gid *sgid,
335 u16 *gid_index)
336{
337 struct find_gid_index_context context = {.vlan_id = vlan_id};
338
339 return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
340 &context, gid_index);
341}
342
311int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, 343int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
312 const struct ib_wc *wc, const struct ib_grh *grh, 344 const struct ib_wc *wc, const struct ib_grh *grh,
313 struct ib_ah_attr *ah_attr) 345 struct ib_ah_attr *ah_attr)
@@ -318,21 +350,30 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
318 350
319 memset(ah_attr, 0, sizeof *ah_attr); 351 memset(ah_attr, 0, sizeof *ah_attr);
320 if (rdma_cap_eth_ah(device, port_num)) { 352 if (rdma_cap_eth_ah(device, port_num)) {
353 u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
354 wc->vlan_id : 0xffff;
355
321 if (!(wc->wc_flags & IB_WC_GRH)) 356 if (!(wc->wc_flags & IB_WC_GRH))
322 return -EPROTOTYPE; 357 return -EPROTOTYPE;
323 358
324 if (wc->wc_flags & IB_WC_WITH_SMAC && 359 if (!(wc->wc_flags & IB_WC_WITH_SMAC) ||
325 wc->wc_flags & IB_WC_WITH_VLAN) { 360 !(wc->wc_flags & IB_WC_WITH_VLAN)) {
326 memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
327 ah_attr->vlan_id = wc->vlan_id;
328 } else {
329 ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid, 361 ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
330 ah_attr->dmac, &ah_attr->vlan_id); 362 ah_attr->dmac,
363 wc->wc_flags & IB_WC_WITH_VLAN ?
364 NULL : &vlan_id,
365 0);
331 if (ret) 366 if (ret)
332 return ret; 367 return ret;
333 } 368 }
334 } else { 369
335 ah_attr->vlan_id = 0xffff; 370 ret = get_sgid_index_from_eth(device, port_num, vlan_id,
371 &grh->dgid, &gid_index);
372 if (ret)
373 return ret;
374
375 if (wc->wc_flags & IB_WC_WITH_SMAC)
376 memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
336 } 377 }
337 378
338 ah_attr->dlid = wc->slid; 379 ah_attr->dlid = wc->slid;
@@ -344,10 +385,13 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
344 ah_attr->ah_flags = IB_AH_GRH; 385 ah_attr->ah_flags = IB_AH_GRH;
345 ah_attr->grh.dgid = grh->sgid; 386 ah_attr->grh.dgid = grh->sgid;
346 387
347 ret = ib_find_cached_gid(device, &grh->dgid, &port_num, 388 if (!rdma_cap_eth_ah(device, port_num)) {
348 &gid_index); 389 ret = ib_find_cached_gid_by_port(device, &grh->dgid,
349 if (ret) 390 port_num, NULL,
350 return ret; 391 &gid_index);
392 if (ret)
393 return ret;
394 }
351 395
352 ah_attr->grh.sgid_index = (u8) gid_index; 396 ah_attr->grh.sgid_index = (u8) gid_index;
353 flow_class = be32_to_cpu(grh->version_tclass_flow); 397 flow_class = be32_to_cpu(grh->version_tclass_flow);
@@ -617,9 +661,7 @@ EXPORT_SYMBOL(ib_create_qp);
617static const struct { 661static const struct {
618 int valid; 662 int valid;
619 enum ib_qp_attr_mask req_param[IB_QPT_MAX]; 663 enum ib_qp_attr_mask req_param[IB_QPT_MAX];
620 enum ib_qp_attr_mask req_param_add_eth[IB_QPT_MAX];
621 enum ib_qp_attr_mask opt_param[IB_QPT_MAX]; 664 enum ib_qp_attr_mask opt_param[IB_QPT_MAX];
622 enum ib_qp_attr_mask opt_param_add_eth[IB_QPT_MAX];
623} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { 665} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
624 [IB_QPS_RESET] = { 666 [IB_QPS_RESET] = {
625 [IB_QPS_RESET] = { .valid = 1 }, 667 [IB_QPS_RESET] = { .valid = 1 },
@@ -700,12 +742,6 @@ static const struct {
700 IB_QP_MAX_DEST_RD_ATOMIC | 742 IB_QP_MAX_DEST_RD_ATOMIC |
701 IB_QP_MIN_RNR_TIMER), 743 IB_QP_MIN_RNR_TIMER),
702 }, 744 },
703 .req_param_add_eth = {
704 [IB_QPT_RC] = (IB_QP_SMAC),
705 [IB_QPT_UC] = (IB_QP_SMAC),
706 [IB_QPT_XRC_INI] = (IB_QP_SMAC),
707 [IB_QPT_XRC_TGT] = (IB_QP_SMAC)
708 },
709 .opt_param = { 745 .opt_param = {
710 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 746 [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
711 IB_QP_QKEY), 747 IB_QP_QKEY),
@@ -726,21 +762,7 @@ static const struct {
726 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 762 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
727 IB_QP_QKEY), 763 IB_QP_QKEY),
728 }, 764 },
729 .opt_param_add_eth = { 765 },
730 [IB_QPT_RC] = (IB_QP_ALT_SMAC |
731 IB_QP_VID |
732 IB_QP_ALT_VID),
733 [IB_QPT_UC] = (IB_QP_ALT_SMAC |
734 IB_QP_VID |
735 IB_QP_ALT_VID),
736 [IB_QPT_XRC_INI] = (IB_QP_ALT_SMAC |
737 IB_QP_VID |
738 IB_QP_ALT_VID),
739 [IB_QPT_XRC_TGT] = (IB_QP_ALT_SMAC |
740 IB_QP_VID |
741 IB_QP_ALT_VID)
742 }
743 }
744 }, 766 },
745 [IB_QPS_RTR] = { 767 [IB_QPS_RTR] = {
746 [IB_QPS_RESET] = { .valid = 1 }, 768 [IB_QPS_RESET] = { .valid = 1 },
@@ -962,13 +984,6 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
962 req_param = qp_state_table[cur_state][next_state].req_param[type]; 984 req_param = qp_state_table[cur_state][next_state].req_param[type];
963 opt_param = qp_state_table[cur_state][next_state].opt_param[type]; 985 opt_param = qp_state_table[cur_state][next_state].opt_param[type];
964 986
965 if (ll == IB_LINK_LAYER_ETHERNET) {
966 req_param |= qp_state_table[cur_state][next_state].
967 req_param_add_eth[type];
968 opt_param |= qp_state_table[cur_state][next_state].
969 opt_param_add_eth[type];
970 }
971
972 if ((mask & req_param) != req_param) 987 if ((mask & req_param) != req_param)
973 return 0; 988 return 0;
974 989
@@ -979,40 +994,52 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
979} 994}
980EXPORT_SYMBOL(ib_modify_qp_is_ok); 995EXPORT_SYMBOL(ib_modify_qp_is_ok);
981 996
982int ib_resolve_eth_l2_attrs(struct ib_qp *qp, 997int ib_resolve_eth_dmac(struct ib_qp *qp,
983 struct ib_qp_attr *qp_attr, int *qp_attr_mask) 998 struct ib_qp_attr *qp_attr, int *qp_attr_mask)
984{ 999{
985 int ret = 0; 1000 int ret = 0;
986 union ib_gid sgid;
987 1001
988 if ((*qp_attr_mask & IB_QP_AV) && 1002 if (*qp_attr_mask & IB_QP_AV) {
989 (rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))) { 1003 if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) ||
990 ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num, 1004 qp_attr->ah_attr.port_num > rdma_end_port(qp->device))
991 qp_attr->ah_attr.grh.sgid_index, &sgid); 1005 return -EINVAL;
992 if (ret) 1006
993 goto out; 1007 if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))
1008 return 0;
1009
994 if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) { 1010 if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
995 rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac); 1011 rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw,
996 rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac); 1012 qp_attr->ah_attr.dmac);
997 if (!(*qp_attr_mask & IB_QP_VID))
998 qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
999 } else { 1013 } else {
1000 ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid, 1014 union ib_gid sgid;
1001 qp_attr->ah_attr.dmac, &qp_attr->vlan_id); 1015 struct ib_gid_attr sgid_attr;
1002 if (ret) 1016 int ifindex;
1003 goto out; 1017
1004 ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr->smac, NULL); 1018 ret = ib_query_gid(qp->device,
1005 if (ret) 1019 qp_attr->ah_attr.port_num,
1020 qp_attr->ah_attr.grh.sgid_index,
1021 &sgid, &sgid_attr);
1022
1023 if (ret || !sgid_attr.ndev) {
1024 if (!ret)
1025 ret = -ENXIO;
1006 goto out; 1026 goto out;
1027 }
1028
1029 ifindex = sgid_attr.ndev->ifindex;
1030
1031 ret = rdma_addr_find_dmac_by_grh(&sgid,
1032 &qp_attr->ah_attr.grh.dgid,
1033 qp_attr->ah_attr.dmac,
1034 NULL, ifindex);
1035
1036 dev_put(sgid_attr.ndev);
1007 } 1037 }
1008 *qp_attr_mask |= IB_QP_SMAC;
1009 if (qp_attr->vlan_id < 0xFFFF)
1010 *qp_attr_mask |= IB_QP_VID;
1011 } 1038 }
1012out: 1039out:
1013 return ret; 1040 return ret;
1014} 1041}
1015EXPORT_SYMBOL(ib_resolve_eth_l2_attrs); 1042EXPORT_SYMBOL(ib_resolve_eth_dmac);
1016 1043
1017 1044
1018int ib_modify_qp(struct ib_qp *qp, 1045int ib_modify_qp(struct ib_qp *qp,
@@ -1021,7 +1048,7 @@ int ib_modify_qp(struct ib_qp *qp,
1021{ 1048{
1022 int ret; 1049 int ret;
1023 1050
1024 ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask); 1051 ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask);
1025 if (ret) 1052 if (ret)
1026 return ret; 1053 return ret;
1027 1054
@@ -1253,31 +1280,6 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
1253} 1280}
1254EXPORT_SYMBOL(ib_alloc_mr); 1281EXPORT_SYMBOL(ib_alloc_mr);
1255 1282
1256struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device *device,
1257 int max_page_list_len)
1258{
1259 struct ib_fast_reg_page_list *page_list;
1260
1261 if (!device->alloc_fast_reg_page_list)
1262 return ERR_PTR(-ENOSYS);
1263
1264 page_list = device->alloc_fast_reg_page_list(device, max_page_list_len);
1265
1266 if (!IS_ERR(page_list)) {
1267 page_list->device = device;
1268 page_list->max_page_list_len = max_page_list_len;
1269 }
1270
1271 return page_list;
1272}
1273EXPORT_SYMBOL(ib_alloc_fast_reg_page_list);
1274
1275void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
1276{
1277 page_list->device->free_fast_reg_page_list(page_list);
1278}
1279EXPORT_SYMBOL(ib_free_fast_reg_page_list);
1280
1281/* Memory windows */ 1283/* Memory windows */
1282 1284
1283struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) 1285struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
@@ -1469,3 +1471,110 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
1469 mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS; 1471 mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
1470} 1472}
1471EXPORT_SYMBOL(ib_check_mr_status); 1473EXPORT_SYMBOL(ib_check_mr_status);
1474
1475/**
1476 * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list
1477 * and set it the memory region.
1478 * @mr: memory region
1479 * @sg: dma mapped scatterlist
1480 * @sg_nents: number of entries in sg
1481 * @page_size: page vector desired page size
1482 *
1483 * Constraints:
1484 * - The first sg element is allowed to have an offset.
1485 * - Each sg element must be aligned to page_size (or physically
1486 * contiguous to the previous element). In case an sg element has a
1487 * non contiguous offset, the mapping prefix will not include it.
1488 * - The last sg element is allowed to have length less than page_size.
1489 * - If sg_nents total byte length exceeds the mr max_num_sge * page_size
1490 * then only max_num_sg entries will be mapped.
1491 *
1492 * Returns the number of sg elements that were mapped to the memory region.
1493 *
1494 * After this completes successfully, the memory region
1495 * is ready for registration.
1496 */
1497int ib_map_mr_sg(struct ib_mr *mr,
1498 struct scatterlist *sg,
1499 int sg_nents,
1500 unsigned int page_size)
1501{
1502 if (unlikely(!mr->device->map_mr_sg))
1503 return -ENOSYS;
1504
1505 mr->page_size = page_size;
1506
1507 return mr->device->map_mr_sg(mr, sg, sg_nents);
1508}
1509EXPORT_SYMBOL(ib_map_mr_sg);
1510
1511/**
1512 * ib_sg_to_pages() - Convert the largest prefix of a sg list
1513 * to a page vector
1514 * @mr: memory region
1515 * @sgl: dma mapped scatterlist
1516 * @sg_nents: number of entries in sg
1517 * @set_page: driver page assignment function pointer
1518 *
1519 * Core service helper for drivers to covert the largest
1520 * prefix of given sg list to a page vector. The sg list
1521 * prefix converted is the prefix that meet the requirements
1522 * of ib_map_mr_sg.
1523 *
1524 * Returns the number of sg elements that were assigned to
1525 * a page vector.
1526 */
1527int ib_sg_to_pages(struct ib_mr *mr,
1528 struct scatterlist *sgl,
1529 int sg_nents,
1530 int (*set_page)(struct ib_mr *, u64))
1531{
1532 struct scatterlist *sg;
1533 u64 last_end_dma_addr = 0, last_page_addr = 0;
1534 unsigned int last_page_off = 0;
1535 u64 page_mask = ~((u64)mr->page_size - 1);
1536 int i;
1537
1538 mr->iova = sg_dma_address(&sgl[0]);
1539 mr->length = 0;
1540
1541 for_each_sg(sgl, sg, sg_nents, i) {
1542 u64 dma_addr = sg_dma_address(sg);
1543 unsigned int dma_len = sg_dma_len(sg);
1544 u64 end_dma_addr = dma_addr + dma_len;
1545 u64 page_addr = dma_addr & page_mask;
1546
1547 if (i && page_addr != dma_addr) {
1548 if (last_end_dma_addr != dma_addr) {
1549 /* gap */
1550 goto done;
1551
1552 } else if (last_page_off + dma_len <= mr->page_size) {
1553 /* chunk this fragment with the last */
1554 mr->length += dma_len;
1555 last_end_dma_addr += dma_len;
1556 last_page_off += dma_len;
1557 continue;
1558 } else {
1559 /* map starting from the next page */
1560 page_addr = last_page_addr + mr->page_size;
1561 dma_len -= mr->page_size - last_page_off;
1562 }
1563 }
1564
1565 do {
1566 if (unlikely(set_page(mr, page_addr)))
1567 goto done;
1568 page_addr += mr->page_size;
1569 } while (page_addr < end_dma_addr);
1570
1571 mr->length += dma_len;
1572 last_end_dma_addr = end_dma_addr;
1573 last_page_addr = end_dma_addr & page_mask;
1574 last_page_off = end_dma_addr & ~page_mask;
1575 }
1576
1577done:
1578 return i;
1579}
1580EXPORT_SYMBOL(ib_sg_to_pages);