aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Cohen <eli@dev.mellanox.co.il>2010-10-13 15:26:51 -0400
committerRoland Dreier <rolandd@cisco.com>2010-10-13 18:46:43 -0400
commit3c86aa70bf677a31b71c8292e349242e26cbc743 (patch)
tree7f38edd826e444b1232185e154f313e70966d250
parentfac70d51914674ce8ae742ed73441ddb4770ad20 (diff)
RDMA/cm: Add RDMA CM support for IBoE devices
Add support for IBoE device binding and IP --> GID resolution. Path resolving and multicast joining are implemented within cma.c by filling in the responses and running callbacks in the CMA work queue. IP --> GID resolution always yields IPv6 link local addresses; remote GIDs are derived from the destination MAC address of the remote port. Multicast GIDs are always mapped to multicast MACs as is done in IPv6. (IPv4 multicast is enabled by translating IPv4 multicast addresses to IPv6 multicast as described in <http://www.mail-archive.com/ipng@sunroof.eng.sun.com/msg02134.html>.) Some helper functions are added to ib_addr.h. Signed-off-by: Eli Cohen <eli@mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/core/cma.c309
-rw-r--r--drivers/infiniband/core/sa_query.c5
-rw-r--r--drivers/infiniband/core/ucma.c45
-rw-r--r--include/rdma/ib_addr.h99
4 files changed, 431 insertions, 27 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index b930b8110a63..f61bc0738488 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -59,6 +59,7 @@ MODULE_LICENSE("Dual BSD/GPL");
59#define CMA_CM_RESPONSE_TIMEOUT 20 59#define CMA_CM_RESPONSE_TIMEOUT 20
60#define CMA_MAX_CM_RETRIES 15 60#define CMA_MAX_CM_RETRIES 15
61#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) 61#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
62#define CMA_IBOE_PACKET_LIFETIME 18
62 63
63static void cma_add_one(struct ib_device *device); 64static void cma_add_one(struct ib_device *device);
64static void cma_remove_one(struct ib_device *device); 65static void cma_remove_one(struct ib_device *device);
@@ -157,6 +158,7 @@ struct cma_multicast {
157 struct list_head list; 158 struct list_head list;
158 void *context; 159 void *context;
159 struct sockaddr_storage addr; 160 struct sockaddr_storage addr;
161 struct kref mcref;
160}; 162};
161 163
162struct cma_work { 164struct cma_work {
@@ -173,6 +175,12 @@ struct cma_ndev_work {
173 struct rdma_cm_event event; 175 struct rdma_cm_event event;
174}; 176};
175 177
178struct iboe_mcast_work {
179 struct work_struct work;
180 struct rdma_id_private *id;
181 struct cma_multicast *mc;
182};
183
176union cma_ip_addr { 184union cma_ip_addr {
177 struct in6_addr ip6; 185 struct in6_addr ip6;
178 struct { 186 struct {
@@ -281,6 +289,8 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv,
281 atomic_inc(&cma_dev->refcount); 289 atomic_inc(&cma_dev->refcount);
282 id_priv->cma_dev = cma_dev; 290 id_priv->cma_dev = cma_dev;
283 id_priv->id.device = cma_dev->device; 291 id_priv->id.device = cma_dev->device;
292 id_priv->id.route.addr.dev_addr.transport =
293 rdma_node_get_transport(cma_dev->device->node_type);
284 list_add_tail(&id_priv->list, &cma_dev->id_list); 294 list_add_tail(&id_priv->list, &cma_dev->id_list);
285} 295}
286 296
@@ -290,6 +300,14 @@ static inline void cma_deref_dev(struct cma_device *cma_dev)
290 complete(&cma_dev->comp); 300 complete(&cma_dev->comp);
291} 301}
292 302
303static inline void release_mc(struct kref *kref)
304{
305 struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
306
307 kfree(mc->multicast.ib);
308 kfree(mc);
309}
310
293static void cma_detach_from_dev(struct rdma_id_private *id_priv) 311static void cma_detach_from_dev(struct rdma_id_private *id_priv)
294{ 312{
295 list_del(&id_priv->list); 313 list_del(&id_priv->list);
@@ -323,22 +341,63 @@ static int cma_set_qkey(struct rdma_id_private *id_priv)
323 return ret; 341 return ret;
324} 342}
325 343
344static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
345{
346 int i;
347 int err;
348 struct ib_port_attr props;
349 union ib_gid tmp;
350
351 err = ib_query_port(device, port_num, &props);
352 if (err)
353 return 1;
354
355 for (i = 0; i < props.gid_tbl_len; ++i) {
356 err = ib_query_gid(device, port_num, i, &tmp);
357 if (err)
358 return 1;
359 if (!memcmp(&tmp, gid, sizeof tmp))
360 return 0;
361 }
362
363 return -EAGAIN;
364}
365
326static int cma_acquire_dev(struct rdma_id_private *id_priv) 366static int cma_acquire_dev(struct rdma_id_private *id_priv)
327{ 367{
328 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 368 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
329 struct cma_device *cma_dev; 369 struct cma_device *cma_dev;
330 union ib_gid gid; 370 union ib_gid gid, iboe_gid;
331 int ret = -ENODEV; 371 int ret = -ENODEV;
372 u8 port;
373 enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
374 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
332 375
333 rdma_addr_get_sgid(dev_addr, &gid); 376 iboe_addr_get_sgid(dev_addr, &iboe_gid);
377 memcpy(&gid, dev_addr->src_dev_addr +
378 rdma_addr_gid_offset(dev_addr), sizeof gid);
334 list_for_each_entry(cma_dev, &dev_list, list) { 379 list_for_each_entry(cma_dev, &dev_list, list) {
335 ret = ib_find_cached_gid(cma_dev->device, &gid, 380 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
336 &id_priv->id.port_num, NULL); 381 if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
337 if (!ret) { 382 if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
338 cma_attach_to_dev(id_priv, cma_dev); 383 rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
339 break; 384 ret = find_gid_port(cma_dev->device, &iboe_gid, port);
385 else
386 ret = find_gid_port(cma_dev->device, &gid, port);
387
388 if (!ret) {
389 id_priv->id.port_num = port;
390 goto out;
391 } else if (ret == 1)
392 break;
393 }
340 } 394 }
341 } 395 }
396
397out:
398 if (!ret)
399 cma_attach_to_dev(id_priv, cma_dev);
400
342 return ret; 401 return ret;
343} 402}
344 403
@@ -556,10 +615,16 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
556{ 615{
557 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 616 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
558 int ret; 617 int ret;
618 u16 pkey;
619
620 if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) ==
621 IB_LINK_LAYER_INFINIBAND)
622 pkey = ib_addr_get_pkey(dev_addr);
623 else
624 pkey = 0xffff;
559 625
560 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, 626 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
561 ib_addr_get_pkey(dev_addr), 627 pkey, &qp_attr->pkey_index);
562 &qp_attr->pkey_index);
563 if (ret) 628 if (ret)
564 return ret; 629 return ret;
565 630
@@ -737,8 +802,8 @@ static inline int cma_user_data_offset(enum rdma_port_space ps)
737 802
738static void cma_cancel_route(struct rdma_id_private *id_priv) 803static void cma_cancel_route(struct rdma_id_private *id_priv)
739{ 804{
740 switch (rdma_node_get_transport(id_priv->id.device->node_type)) { 805 switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) {
741 case RDMA_TRANSPORT_IB: 806 case IB_LINK_LAYER_INFINIBAND:
742 if (id_priv->query) 807 if (id_priv->query)
743 ib_sa_cancel_query(id_priv->query_id, id_priv->query); 808 ib_sa_cancel_query(id_priv->query_id, id_priv->query);
744 break; 809 break;
@@ -816,8 +881,17 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
816 mc = container_of(id_priv->mc_list.next, 881 mc = container_of(id_priv->mc_list.next,
817 struct cma_multicast, list); 882 struct cma_multicast, list);
818 list_del(&mc->list); 883 list_del(&mc->list);
819 ib_sa_free_multicast(mc->multicast.ib); 884 switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) {
820 kfree(mc); 885 case IB_LINK_LAYER_INFINIBAND:
886 ib_sa_free_multicast(mc->multicast.ib);
887 kfree(mc);
888 break;
889 case IB_LINK_LAYER_ETHERNET:
890 kref_put(&mc->mcref, release_mc);
891 break;
892 default:
893 break;
894 }
821 } 895 }
822} 896}
823 897
@@ -833,7 +907,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
833 mutex_lock(&lock); 907 mutex_lock(&lock);
834 if (id_priv->cma_dev) { 908 if (id_priv->cma_dev) {
835 mutex_unlock(&lock); 909 mutex_unlock(&lock);
836 switch (rdma_node_get_transport(id->device->node_type)) { 910 switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
837 case RDMA_TRANSPORT_IB: 911 case RDMA_TRANSPORT_IB:
838 if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) 912 if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
839 ib_destroy_cm_id(id_priv->cm_id.ib); 913 ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -1708,6 +1782,77 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
1708 return 0; 1782 return 0;
1709} 1783}
1710 1784
1785static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
1786{
1787 struct rdma_route *route = &id_priv->id.route;
1788 struct rdma_addr *addr = &route->addr;
1789 struct cma_work *work;
1790 int ret;
1791 struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr;
1792 struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;
1793 struct net_device *ndev = NULL;
1794
1795 if (src_addr->sin_family != dst_addr->sin_family)
1796 return -EINVAL;
1797
1798 work = kzalloc(sizeof *work, GFP_KERNEL);
1799 if (!work)
1800 return -ENOMEM;
1801
1802 work->id = id_priv;
1803 INIT_WORK(&work->work, cma_work_handler);
1804
1805 route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
1806 if (!route->path_rec) {
1807 ret = -ENOMEM;
1808 goto err1;
1809 }
1810
1811 route->num_paths = 1;
1812
1813 iboe_mac_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr);
1814 iboe_mac_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr);
1815
1816 route->path_rec->hop_limit = 1;
1817 route->path_rec->reversible = 1;
1818 route->path_rec->pkey = cpu_to_be16(0xffff);
1819 route->path_rec->mtu_selector = IB_SA_EQ;
1820
1821 if (addr->dev_addr.bound_dev_if)
1822 ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
1823 if (!ndev) {
1824 ret = -ENODEV;
1825 goto err2;
1826 }
1827
1828 route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
1829 route->path_rec->rate_selector = IB_SA_EQ;
1830 route->path_rec->rate = iboe_get_rate(ndev);
1831 dev_put(ndev);
1832 route->path_rec->packet_life_time_selector = IB_SA_EQ;
1833 route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
1834 if (!route->path_rec->mtu) {
1835 ret = -EINVAL;
1836 goto err2;
1837 }
1838
1839 work->old_state = CMA_ROUTE_QUERY;
1840 work->new_state = CMA_ROUTE_RESOLVED;
1841 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1842 work->event.status = 0;
1843
1844 queue_work(cma_wq, &work->work);
1845
1846 return 0;
1847
1848err2:
1849 kfree(route->path_rec);
1850 route->path_rec = NULL;
1851err1:
1852 kfree(work);
1853 return ret;
1854}
1855
1711int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) 1856int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1712{ 1857{
1713 struct rdma_id_private *id_priv; 1858 struct rdma_id_private *id_priv;
@@ -1720,7 +1865,16 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1720 atomic_inc(&id_priv->refcount); 1865 atomic_inc(&id_priv->refcount);
1721 switch (rdma_node_get_transport(id->device->node_type)) { 1866 switch (rdma_node_get_transport(id->device->node_type)) {
1722 case RDMA_TRANSPORT_IB: 1867 case RDMA_TRANSPORT_IB:
1723 ret = cma_resolve_ib_route(id_priv, timeout_ms); 1868 switch (rdma_port_get_link_layer(id->device, id->port_num)) {
1869 case IB_LINK_LAYER_INFINIBAND:
1870 ret = cma_resolve_ib_route(id_priv, timeout_ms);
1871 break;
1872 case IB_LINK_LAYER_ETHERNET:
1873 ret = cma_resolve_iboe_route(id_priv);
1874 break;
1875 default:
1876 ret = -ENOSYS;
1877 }
1724 break; 1878 break;
1725 case RDMA_TRANSPORT_IWARP: 1879 case RDMA_TRANSPORT_IWARP:
1726 ret = cma_resolve_iw_route(id_priv, timeout_ms); 1880 ret = cma_resolve_iw_route(id_priv, timeout_ms);
@@ -1773,7 +1927,7 @@ port_found:
1773 goto out; 1927 goto out;
1774 1928
1775 id_priv->id.route.addr.dev_addr.dev_type = 1929 id_priv->id.route.addr.dev_addr.dev_type =
1776 (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ? 1930 (rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ?
1777 ARPHRD_INFINIBAND : ARPHRD_ETHER; 1931 ARPHRD_INFINIBAND : ARPHRD_ETHER;
1778 1932
1779 rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); 1933 rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
@@ -2758,6 +2912,102 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
2758 return 0; 2912 return 0;
2759} 2913}
2760 2914
2915static void iboe_mcast_work_handler(struct work_struct *work)
2916{
2917 struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
2918 struct cma_multicast *mc = mw->mc;
2919 struct ib_sa_multicast *m = mc->multicast.ib;
2920
2921 mc->multicast.ib->context = mc;
2922 cma_ib_mc_handler(0, m);
2923 kref_put(&mc->mcref, release_mc);
2924 kfree(mw);
2925}
2926
2927static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
2928{
2929 struct sockaddr_in *sin = (struct sockaddr_in *)addr;
2930 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
2931
2932 if (cma_any_addr(addr)) {
2933 memset(mgid, 0, sizeof *mgid);
2934 } else if (addr->sa_family == AF_INET6) {
2935 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
2936 } else {
2937 mgid->raw[0] = 0xff;
2938 mgid->raw[1] = 0x0e;
2939 mgid->raw[2] = 0;
2940 mgid->raw[3] = 0;
2941 mgid->raw[4] = 0;
2942 mgid->raw[5] = 0;
2943 mgid->raw[6] = 0;
2944 mgid->raw[7] = 0;
2945 mgid->raw[8] = 0;
2946 mgid->raw[9] = 0;
2947 mgid->raw[10] = 0xff;
2948 mgid->raw[11] = 0xff;
2949 *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr;
2950 }
2951}
2952
2953static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
2954 struct cma_multicast *mc)
2955{
2956 struct iboe_mcast_work *work;
2957 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2958 int err;
2959 struct sockaddr *addr = (struct sockaddr *)&mc->addr;
2960 struct net_device *ndev = NULL;
2961
2962 if (cma_zero_addr((struct sockaddr *)&mc->addr))
2963 return -EINVAL;
2964
2965 work = kzalloc(sizeof *work, GFP_KERNEL);
2966 if (!work)
2967 return -ENOMEM;
2968
2969 mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
2970 if (!mc->multicast.ib) {
2971 err = -ENOMEM;
2972 goto out1;
2973 }
2974
2975 cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid);
2976
2977 mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
2978 if (id_priv->id.ps == RDMA_PS_UDP)
2979 mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
2980
2981 if (dev_addr->bound_dev_if)
2982 ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
2983 if (!ndev) {
2984 err = -ENODEV;
2985 goto out2;
2986 }
2987 mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
2988 mc->multicast.ib->rec.hop_limit = 1;
2989 mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
2990 dev_put(ndev);
2991 if (!mc->multicast.ib->rec.mtu) {
2992 err = -EINVAL;
2993 goto out2;
2994 }
2995 iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid);
2996 work->id = id_priv;
2997 work->mc = mc;
2998 INIT_WORK(&work->work, iboe_mcast_work_handler);
2999 kref_get(&mc->mcref);
3000 queue_work(cma_wq, &work->work);
3001
3002 return 0;
3003
3004out2:
3005 kfree(mc->multicast.ib);
3006out1:
3007 kfree(work);
3008 return err;
3009}
3010
2761int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, 3011int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
2762 void *context) 3012 void *context)
2763{ 3013{
@@ -2784,7 +3034,17 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
2784 3034
2785 switch (rdma_node_get_transport(id->device->node_type)) { 3035 switch (rdma_node_get_transport(id->device->node_type)) {
2786 case RDMA_TRANSPORT_IB: 3036 case RDMA_TRANSPORT_IB:
2787 ret = cma_join_ib_multicast(id_priv, mc); 3037 switch (rdma_port_get_link_layer(id->device, id->port_num)) {
3038 case IB_LINK_LAYER_INFINIBAND:
3039 ret = cma_join_ib_multicast(id_priv, mc);
3040 break;
3041 case IB_LINK_LAYER_ETHERNET:
3042 kref_init(&mc->mcref);
3043 ret = cma_iboe_join_multicast(id_priv, mc);
3044 break;
3045 default:
3046 ret = -EINVAL;
3047 }
2788 break; 3048 break;
2789 default: 3049 default:
2790 ret = -ENOSYS; 3050 ret = -ENOSYS;
@@ -2817,8 +3077,19 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
2817 ib_detach_mcast(id->qp, 3077 ib_detach_mcast(id->qp,
2818 &mc->multicast.ib->rec.mgid, 3078 &mc->multicast.ib->rec.mgid,
2819 mc->multicast.ib->rec.mlid); 3079 mc->multicast.ib->rec.mlid);
2820 ib_sa_free_multicast(mc->multicast.ib); 3080 if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) {
2821 kfree(mc); 3081 switch (rdma_port_get_link_layer(id->device, id->port_num)) {
3082 case IB_LINK_LAYER_INFINIBAND:
3083 ib_sa_free_multicast(mc->multicast.ib);
3084 kfree(mc);
3085 break;
3086 case IB_LINK_LAYER_ETHERNET:
3087 kref_put(&mc->mcref, release_mc);
3088 break;
3089 default:
3090 break;
3091 }
3092 }
2822 return; 3093 return;
2823 } 3094 }
2824 } 3095 }
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 27674c790a73..91a660310b7c 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -496,6 +496,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
496{ 496{
497 int ret; 497 int ret;
498 u16 gid_index; 498 u16 gid_index;
499 int force_grh;
499 500
500 memset(ah_attr, 0, sizeof *ah_attr); 501 memset(ah_attr, 0, sizeof *ah_attr);
501 ah_attr->dlid = be16_to_cpu(rec->dlid); 502 ah_attr->dlid = be16_to_cpu(rec->dlid);
@@ -505,7 +506,9 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
505 ah_attr->port_num = port_num; 506 ah_attr->port_num = port_num;
506 ah_attr->static_rate = rec->rate; 507 ah_attr->static_rate = rec->rate;
507 508
508 if (rec->hop_limit > 1) { 509 force_grh = rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_ETHERNET;
510
511 if (rec->hop_limit > 1 || force_grh) {
509 ah_attr->ah_flags = IB_AH_GRH; 512 ah_attr->ah_flags = IB_AH_GRH;
510 ah_attr->grh.dgid = rec->dgid; 513 ah_attr->grh.dgid = rec->dgid;
511 514
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index ac7edc24165c..3d3c9264c450 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -583,6 +583,34 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
583 } 583 }
584} 584}
585 585
586static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
587 struct rdma_route *route)
588{
589 struct rdma_dev_addr *dev_addr;
590
591 resp->num_paths = route->num_paths;
592 switch (route->num_paths) {
593 case 0:
594 dev_addr = &route->addr.dev_addr;
595 iboe_mac_to_ll((union ib_gid *) &resp->ib_route[0].dgid,
596 dev_addr->dst_dev_addr);
597 iboe_addr_get_sgid(dev_addr,
598 (union ib_gid *) &resp->ib_route[0].sgid);
599 resp->ib_route[0].pkey = cpu_to_be16(0xffff);
600 break;
601 case 2:
602 ib_copy_path_rec_to_user(&resp->ib_route[1],
603 &route->path_rec[1]);
604 /* fall through */
605 case 1:
606 ib_copy_path_rec_to_user(&resp->ib_route[0],
607 &route->path_rec[0]);
608 break;
609 default:
610 break;
611 }
612}
613
586static ssize_t ucma_query_route(struct ucma_file *file, 614static ssize_t ucma_query_route(struct ucma_file *file,
587 const char __user *inbuf, 615 const char __user *inbuf,
588 int in_len, int out_len) 616 int in_len, int out_len)
@@ -617,12 +645,17 @@ static ssize_t ucma_query_route(struct ucma_file *file,
617 645
618 resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; 646 resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
619 resp.port_num = ctx->cm_id->port_num; 647 resp.port_num = ctx->cm_id->port_num;
620 switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) { 648 if (rdma_node_get_transport(ctx->cm_id->device->node_type) == RDMA_TRANSPORT_IB) {
621 case RDMA_TRANSPORT_IB: 649 switch (rdma_port_get_link_layer(ctx->cm_id->device, ctx->cm_id->port_num)) {
622 ucma_copy_ib_route(&resp, &ctx->cm_id->route); 650 case IB_LINK_LAYER_INFINIBAND:
623 break; 651 ucma_copy_ib_route(&resp, &ctx->cm_id->route);
624 default: 652 break;
625 break; 653 case IB_LINK_LAYER_ETHERNET:
654 ucma_copy_iboe_route(&resp, &ctx->cm_id->route);
655 break;
656 default:
657 break;
658 }
626 } 659 }
627 660
628out: 661out:
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index fa0d52b8e622..904ffa92fc93 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -40,6 +40,7 @@
40#include <linux/netdevice.h> 40#include <linux/netdevice.h>
41#include <linux/socket.h> 41#include <linux/socket.h>
42#include <rdma/ib_verbs.h> 42#include <rdma/ib_verbs.h>
43#include <rdma/ib_pack.h>
43 44
44struct rdma_addr_client { 45struct rdma_addr_client {
45 atomic_t refcount; 46 atomic_t refcount;
@@ -63,6 +64,7 @@ struct rdma_dev_addr {
63 unsigned char broadcast[MAX_ADDR_LEN]; 64 unsigned char broadcast[MAX_ADDR_LEN];
64 unsigned short dev_type; 65 unsigned short dev_type;
65 int bound_dev_if; 66 int bound_dev_if;
67 enum rdma_transport_type transport;
66}; 68};
67 69
68/** 70/**
@@ -127,9 +129,31 @@ static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr)
127 return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; 129 return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0;
128} 130}
129 131
132static inline void iboe_mac_to_ll(union ib_gid *gid, u8 *mac)
133{
134 memset(gid->raw, 0, 16);
135 *((__be32 *) gid->raw) = cpu_to_be32(0xfe800000);
136 gid->raw[12] = 0xfe;
137 gid->raw[11] = 0xff;
138 memcpy(gid->raw + 13, mac + 3, 3);
139 memcpy(gid->raw + 8, mac, 3);
140 gid->raw[8] ^= 2;
141}
142
143static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr,
144 union ib_gid *gid)
145{
146 iboe_mac_to_ll(gid, dev_addr->src_dev_addr);
147}
148
130static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) 149static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
131{ 150{
132 memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); 151 if (dev_addr->transport == RDMA_TRANSPORT_IB &&
152 dev_addr->dev_type != ARPHRD_INFINIBAND)
153 iboe_addr_get_sgid(dev_addr, gid);
154 else
155 memcpy(gid, dev_addr->src_dev_addr +
156 rdma_addr_gid_offset(dev_addr), sizeof *gid);
133} 157}
134 158
135static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) 159static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
@@ -147,4 +171,77 @@ static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_g
147 memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); 171 memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
148} 172}
149 173
174static inline enum ib_mtu iboe_get_mtu(int mtu)
175{
176 /*
177 * reduce IB headers from effective IBoE MTU. 28 stands for
178 * atomic header which is the biggest possible header after BTH
179 */
180 mtu = mtu - IB_GRH_BYTES - IB_BTH_BYTES - 28;
181
182 if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096))
183 return IB_MTU_4096;
184 else if (mtu >= ib_mtu_enum_to_int(IB_MTU_2048))
185 return IB_MTU_2048;
186 else if (mtu >= ib_mtu_enum_to_int(IB_MTU_1024))
187 return IB_MTU_1024;
188 else if (mtu >= ib_mtu_enum_to_int(IB_MTU_512))
189 return IB_MTU_512;
190 else if (mtu >= ib_mtu_enum_to_int(IB_MTU_256))
191 return IB_MTU_256;
192 else
193 return 0;
194}
195
196static inline int iboe_get_rate(struct net_device *dev)
197{
198 struct ethtool_cmd cmd;
199
200 if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings ||
201 dev->ethtool_ops->get_settings(dev, &cmd))
202 return IB_RATE_PORT_CURRENT;
203
204 if (cmd.speed >= 40000)
205 return IB_RATE_40_GBPS;
206 else if (cmd.speed >= 30000)
207 return IB_RATE_30_GBPS;
208 else if (cmd.speed >= 20000)
209 return IB_RATE_20_GBPS;
210 else if (cmd.speed >= 10000)
211 return IB_RATE_10_GBPS;
212 else
213 return IB_RATE_PORT_CURRENT;
214}
215
216static inline int rdma_link_local_addr(struct in6_addr *addr)
217{
218 if (addr->s6_addr32[0] == htonl(0xfe800000) &&
219 addr->s6_addr32[1] == 0)
220 return 1;
221
222 return 0;
223}
224
225static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac)
226{
227 memcpy(mac, &addr->s6_addr[8], 3);
228 memcpy(mac + 3, &addr->s6_addr[13], 3);
229 mac[0] ^= 2;
230}
231
232static inline int rdma_is_multicast_addr(struct in6_addr *addr)
233{
234 return addr->s6_addr[0] == 0xff;
235}
236
237static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac)
238{
239 int i;
240
241 mac[0] = 0x33;
242 mac[1] = 0x33;
243 for (i = 2; i < 6; ++i)
244 mac[i] = addr->s6_addr[i + 10];
245}
246
150#endif /* IB_ADDR_H */ 247#endif /* IB_ADDR_H */