diff options
author | Ka-Cheong Poon <ka-cheong.poon@oracle.com> | 2018-07-23 23:51:22 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-07-24 00:17:44 -0400 |
commit | 1e2b44e78eead7bcadfbf96f70d95773191541c9 (patch) | |
tree | e7944339dd957ae23cfd690cb0ad6962d98c053c /net/rds/ib.c | |
parent | eee2fa6ab3225192d6d894c54a6fb02ac9efdff6 (diff) |
rds: Enable RDS IPv6 support
This patch enables RDS to use IPv6 addresses. For RDS/TCP, the
listener is now an IPv6 endpoint which accepts both IPv4 and IPv6
connection requests. RDS/RDMA/IB uses a private data (struct
rds_ib_connect_private) exchange between endpoints at RDS connection
establishment time to support RDMA. This private data exchange uses a
32 bit integer to represent an IP address. This needs to be changed in
order to support IPv6. A new private data struct
rds6_ib_connect_private is introduced to handle this. To ensure
backward compatibility, an IPv6 capable RDS stack uses another RDMA
listener port (RDS_CM_PORT) to accept IPv6 connection. And it
continues to use the original RDS_PORT for IPv4 RDS connections. When
it needs to communicate with an IPv6 peer, it uses the RDS_CM_PORT to
send the connection set up request.
v5: Fixed syntax problem (David Miller).
v4: Changed port history comments in rds.h (Sowmini Varadhan).
v3: Added support to set up IPv4 connection using mapped address
(David Miller).
Added support to set up connection between link local and non-link
addresses.
Various review comments from Santosh Shilimkar and Sowmini Varadhan.
v2: Fixed bound and peer address scope mismatched issue.
Added back rds_connect() IPv6 changes.
Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/rds/ib.c')
-rw-r--r-- | net/rds/ib.c | 55 |
1 files changed, 47 insertions, 8 deletions
diff --git a/net/rds/ib.c b/net/rds/ib.c index c712a848957d..756225c5540f 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. | 2 | * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/delay.h> | 39 | #include <linux/delay.h> |
40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
41 | #include <linux/module.h> | 41 | #include <linux/module.h> |
42 | #include <net/addrconf.h> | ||
42 | 43 | ||
43 | #include "rds_single_path.h" | 44 | #include "rds_single_path.h" |
44 | #include "rds.h" | 45 | #include "rds.h" |
@@ -295,6 +296,8 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, | |||
295 | /* We will only ever look at IB transports */ | 296 | /* We will only ever look at IB transports */ |
296 | if (conn->c_trans != &rds_ib_transport) | 297 | if (conn->c_trans != &rds_ib_transport) |
297 | return 0; | 298 | return 0; |
299 | if (conn->c_isv6) | ||
300 | return 0; | ||
298 | 301 | ||
299 | iinfo->src_addr = conn->c_laddr.s6_addr32[3]; | 302 | iinfo->src_addr = conn->c_laddr.s6_addr32[3]; |
300 | iinfo->dst_addr = conn->c_faddr.s6_addr32[3]; | 303 | iinfo->dst_addr = conn->c_faddr.s6_addr32[3]; |
@@ -330,7 +333,6 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len, | |||
330 | sizeof(struct rds_info_rdma_connection)); | 333 | sizeof(struct rds_info_rdma_connection)); |
331 | } | 334 | } |
332 | 335 | ||
333 | |||
334 | /* | 336 | /* |
335 | * Early RDS/IB was built to only bind to an address if there is an IPoIB | 337 | * Early RDS/IB was built to only bind to an address if there is an IPoIB |
336 | * device with that address set. | 338 | * device with that address set. |
@@ -346,8 +348,12 @@ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, | |||
346 | { | 348 | { |
347 | int ret; | 349 | int ret; |
348 | struct rdma_cm_id *cm_id; | 350 | struct rdma_cm_id *cm_id; |
351 | struct sockaddr_in6 sin6; | ||
349 | struct sockaddr_in sin; | 352 | struct sockaddr_in sin; |
353 | struct sockaddr *sa; | ||
354 | bool isv4; | ||
350 | 355 | ||
356 | isv4 = ipv6_addr_v4mapped(addr); | ||
351 | /* Create a CMA ID and try to bind it. This catches both | 357 | /* Create a CMA ID and try to bind it. This catches both |
352 | * IB and iWARP capable NICs. | 358 | * IB and iWARP capable NICs. |
353 | */ | 359 | */ |
@@ -356,20 +362,53 @@ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, | |||
356 | if (IS_ERR(cm_id)) | 362 | if (IS_ERR(cm_id)) |
357 | return PTR_ERR(cm_id); | 363 | return PTR_ERR(cm_id); |
358 | 364 | ||
359 | memset(&sin, 0, sizeof(sin)); | 365 | if (isv4) { |
360 | sin.sin_family = AF_INET; | 366 | memset(&sin, 0, sizeof(sin)); |
361 | sin.sin_addr.s_addr = addr->s6_addr32[3]; | 367 | sin.sin_family = AF_INET; |
368 | sin.sin_addr.s_addr = addr->s6_addr32[3]; | ||
369 | sa = (struct sockaddr *)&sin; | ||
370 | } else { | ||
371 | memset(&sin6, 0, sizeof(sin6)); | ||
372 | sin6.sin6_family = AF_INET6; | ||
373 | sin6.sin6_addr = *addr; | ||
374 | sin6.sin6_scope_id = scope_id; | ||
375 | sa = (struct sockaddr *)&sin6; | ||
376 | |||
377 | /* XXX Do a special IPv6 link local address check here. The | ||
378 | * reason is that rdma_bind_addr() always succeeds with IPv6 | ||
379 | * link local address regardless it is indeed configured in a | ||
380 | * system. | ||
381 | */ | ||
382 | if (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL) { | ||
383 | struct net_device *dev; | ||
384 | |||
385 | if (scope_id == 0) | ||
386 | return -EADDRNOTAVAIL; | ||
387 | |||
388 | /* Use init_net for now as RDS is not network | ||
389 | * name space aware. | ||
390 | */ | ||
391 | dev = dev_get_by_index(&init_net, scope_id); | ||
392 | if (!dev) | ||
393 | return -EADDRNOTAVAIL; | ||
394 | if (!ipv6_chk_addr(&init_net, addr, dev, 1)) { | ||
395 | dev_put(dev); | ||
396 | return -EADDRNOTAVAIL; | ||
397 | } | ||
398 | dev_put(dev); | ||
399 | } | ||
400 | } | ||
362 | 401 | ||
363 | /* rdma_bind_addr will only succeed for IB & iWARP devices */ | 402 | /* rdma_bind_addr will only succeed for IB & iWARP devices */ |
364 | ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); | 403 | ret = rdma_bind_addr(cm_id, sa); |
365 | /* due to this, we will claim to support iWARP devices unless we | 404 | /* due to this, we will claim to support iWARP devices unless we |
366 | check node_type. */ | 405 | check node_type. */ |
367 | if (ret || !cm_id->device || | 406 | if (ret || !cm_id->device || |
368 | cm_id->device->node_type != RDMA_NODE_IB_CA) | 407 | cm_id->device->node_type != RDMA_NODE_IB_CA) |
369 | ret = -EADDRNOTAVAIL; | 408 | ret = -EADDRNOTAVAIL; |
370 | 409 | ||
371 | rdsdebug("addr %pI6c ret %d node type %d\n", | 410 | rdsdebug("addr %pI6c%%%u ret %d node type %d\n", |
372 | addr, ret, | 411 | addr, scope_id, ret, |
373 | cm_id->device ? cm_id->device->node_type : -1); | 412 | cm_id->device ? cm_id->device->node_type : -1); |
374 | 413 | ||
375 | rdma_destroy_id(cm_id); | 414 | rdma_destroy_id(cm_id); |