aboutsummaryrefslogtreecommitdiffstats
path: root/net/rds/ib.c
diff options
context:
space:
mode:
authorKa-Cheong Poon <ka-cheong.poon@oracle.com>2018-07-23 23:51:22 -0400
committerDavid S. Miller <davem@davemloft.net>2018-07-24 00:17:44 -0400
commit1e2b44e78eead7bcadfbf96f70d95773191541c9 (patch)
treee7944339dd957ae23cfd690cb0ad6962d98c053c /net/rds/ib.c
parenteee2fa6ab3225192d6d894c54a6fb02ac9efdff6 (diff)
rds: Enable RDS IPv6 support
This patch enables RDS to use IPv6 addresses. For RDS/TCP, the listener is now an IPv6 endpoint which accepts both IPv4 and IPv6 connection requests. RDS/RDMA/IB uses a private data (struct rds_ib_connect_private) exchange between endpoints at RDS connection establishment time to support RDMA. This private data exchange uses a 32 bit integer to represent an IP address. This needs to be changed in order to support IPv6. A new private data struct rds6_ib_connect_private is introduced to handle this. To ensure backward compatibility, an IPv6 capable RDS stack uses another RDMA listener port (RDS_CM_PORT) to accept IPv6 connection. And it continues to use the original RDS_PORT for IPv4 RDS connections. When it needs to communicate with an IPv6 peer, it uses the RDS_CM_PORT to send the connection set up request. v5: Fixed syntax problem (David Miller). v4: Changed port history comments in rds.h (Sowmini Varadhan). v3: Added support to set up IPv4 connection using mapped address (David Miller). Added support to set up connection between link local and non-link addresses. Various review comments from Santosh Shilimkar and Sowmini Varadhan. v2: Fixed bound and peer address scope mismatched issue. Added back rds_connect() IPv6 changes. Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com> Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/rds/ib.c')
-rw-r--r--net/rds/ib.c55
1 files changed, 47 insertions, 8 deletions
diff --git a/net/rds/ib.c b/net/rds/ib.c
index c712a848957d..756225c5540f 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. 2 * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
3 * 3 *
4 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -39,6 +39,7 @@
39#include <linux/delay.h> 39#include <linux/delay.h>
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/module.h> 41#include <linux/module.h>
42#include <net/addrconf.h>
42 43
43#include "rds_single_path.h" 44#include "rds_single_path.h"
44#include "rds.h" 45#include "rds.h"
@@ -295,6 +296,8 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
295 /* We will only ever look at IB transports */ 296 /* We will only ever look at IB transports */
296 if (conn->c_trans != &rds_ib_transport) 297 if (conn->c_trans != &rds_ib_transport)
297 return 0; 298 return 0;
299 if (conn->c_isv6)
300 return 0;
298 301
299 iinfo->src_addr = conn->c_laddr.s6_addr32[3]; 302 iinfo->src_addr = conn->c_laddr.s6_addr32[3];
300 iinfo->dst_addr = conn->c_faddr.s6_addr32[3]; 303 iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
@@ -330,7 +333,6 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len,
330 sizeof(struct rds_info_rdma_connection)); 333 sizeof(struct rds_info_rdma_connection));
331} 334}
332 335
333
334/* 336/*
335 * Early RDS/IB was built to only bind to an address if there is an IPoIB 337 * Early RDS/IB was built to only bind to an address if there is an IPoIB
336 * device with that address set. 338 * device with that address set.
@@ -346,8 +348,12 @@ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr,
346{ 348{
347 int ret; 349 int ret;
348 struct rdma_cm_id *cm_id; 350 struct rdma_cm_id *cm_id;
351 struct sockaddr_in6 sin6;
349 struct sockaddr_in sin; 352 struct sockaddr_in sin;
353 struct sockaddr *sa;
354 bool isv4;
350 355
356 isv4 = ipv6_addr_v4mapped(addr);
351 /* Create a CMA ID and try to bind it. This catches both 357 /* Create a CMA ID and try to bind it. This catches both
352 * IB and iWARP capable NICs. 358 * IB and iWARP capable NICs.
353 */ 359 */
@@ -356,20 +362,53 @@ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr,
356 if (IS_ERR(cm_id)) 362 if (IS_ERR(cm_id))
357 return PTR_ERR(cm_id); 363 return PTR_ERR(cm_id);
358 364
359 memset(&sin, 0, sizeof(sin)); 365 if (isv4) {
360 sin.sin_family = AF_INET; 366 memset(&sin, 0, sizeof(sin));
361 sin.sin_addr.s_addr = addr->s6_addr32[3]; 367 sin.sin_family = AF_INET;
368 sin.sin_addr.s_addr = addr->s6_addr32[3];
369 sa = (struct sockaddr *)&sin;
370 } else {
371 memset(&sin6, 0, sizeof(sin6));
372 sin6.sin6_family = AF_INET6;
373 sin6.sin6_addr = *addr;
374 sin6.sin6_scope_id = scope_id;
375 sa = (struct sockaddr *)&sin6;
376
377 /* XXX Do a special IPv6 link local address check here. The
378 * reason is that rdma_bind_addr() always succeeds with IPv6
379 * link local address regardless it is indeed configured in a
380 * system.
381 */
382 if (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL) {
383 struct net_device *dev;
384
385 if (scope_id == 0)
386 return -EADDRNOTAVAIL;
387
388 /* Use init_net for now as RDS is not network
389 * name space aware.
390 */
391 dev = dev_get_by_index(&init_net, scope_id);
392 if (!dev)
393 return -EADDRNOTAVAIL;
394 if (!ipv6_chk_addr(&init_net, addr, dev, 1)) {
395 dev_put(dev);
396 return -EADDRNOTAVAIL;
397 }
398 dev_put(dev);
399 }
400 }
362 401
363 /* rdma_bind_addr will only succeed for IB & iWARP devices */ 402 /* rdma_bind_addr will only succeed for IB & iWARP devices */
364 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); 403 ret = rdma_bind_addr(cm_id, sa);
365 /* due to this, we will claim to support iWARP devices unless we 404 /* due to this, we will claim to support iWARP devices unless we
366 check node_type. */ 405 check node_type. */
367 if (ret || !cm_id->device || 406 if (ret || !cm_id->device ||
368 cm_id->device->node_type != RDMA_NODE_IB_CA) 407 cm_id->device->node_type != RDMA_NODE_IB_CA)
369 ret = -EADDRNOTAVAIL; 408 ret = -EADDRNOTAVAIL;
370 409
371 rdsdebug("addr %pI6c ret %d node type %d\n", 410 rdsdebug("addr %pI6c%%%u ret %d node type %d\n",
372 addr, ret, 411 addr, scope_id, ret,
373 cm_id->device ? cm_id->device->node_type : -1); 412 cm_id->device ? cm_id->device->node_type : -1);
374 413
375 rdma_destroy_id(cm_id); 414 rdma_destroy_id(cm_id);