diff options
author | Jason Gunthorpe <jgg@mellanox.com> | 2018-08-16 16:13:03 -0400 |
---|---|---|
committer | Jason Gunthorpe <jgg@mellanox.com> | 2018-08-16 16:21:29 -0400 |
commit | 0a3173a5f09bc58a3638ecfd0a80bdbae55e123c (patch) | |
tree | d6c0bc84863cca54dfbde3b7463e5d49c82af9f1 /net/rds | |
parent | 92f4e77c85918eab5e5803d7e28ab89a7e6bd3a2 (diff) | |
parent | 5c60a7389d795e001c8748b458eb76e3a5b6008c (diff) |
Merge branch 'linus/master' into rdma.git for-next
rdma.git merge resolution for the 4.19 merge window
Conflicts:
drivers/infiniband/core/rdma_core.c
- Use the rdma code and revise with the new spelling for
atomic_fetch_add_unless
drivers/nvme/host/rdma.c
- Replace max_sge with max_send_sge in new blk code
drivers/nvme/target/rdma.c
- Use the blk code and revise to use NULL for ib_post_recv when
appropriate
- Replace max_sge with max_recv_sge in new blk code
net/rds/ib_send.c
- Use the net code and revise to use NULL for ib_post_recv when
appropriate
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'net/rds')
-rw-r--r-- | net/rds/Kconfig | 2 | ||||
-rw-r--r-- | net/rds/Makefile | 1 | ||||
-rw-r--r-- | net/rds/af_rds.c | 205 | ||||
-rw-r--r-- | net/rds/bind.c | 138 | ||||
-rw-r--r-- | net/rds/cong.c | 23 | ||||
-rw-r--r-- | net/rds/connection.c | 283 | ||||
-rw-r--r-- | net/rds/ib.c | 136 | ||||
-rw-r--r-- | net/rds/ib.h | 53 | ||||
-rw-r--r-- | net/rds/ib_cm.c | 320 | ||||
-rw-r--r-- | net/rds/ib_frmr.c | 1 | ||||
-rw-r--r-- | net/rds/ib_mr.h | 2 | ||||
-rw-r--r-- | net/rds/ib_rdma.c | 26 | ||||
-rw-r--r-- | net/rds/ib_recv.c | 33 | ||||
-rw-r--r-- | net/rds/ib_send.c | 13 | ||||
-rw-r--r-- | net/rds/loop.c | 7 | ||||
-rw-r--r-- | net/rds/message.c | 1 | ||||
-rw-r--r-- | net/rds/rdma.c | 6 | ||||
-rw-r--r-- | net/rds/rdma_transport.c | 95 | ||||
-rw-r--r-- | net/rds/rdma_transport.h | 5 | ||||
-rw-r--r-- | net/rds/rds.h | 88 | ||||
-rw-r--r-- | net/rds/recv.c | 78 | ||||
-rw-r--r-- | net/rds/send.c | 116 | ||||
-rw-r--r-- | net/rds/tcp.c | 154 | ||||
-rw-r--r-- | net/rds/tcp.h | 2 | ||||
-rw-r--r-- | net/rds/tcp_connect.c | 68 | ||||
-rw-r--r-- | net/rds/tcp_listen.c | 87 | ||||
-rw-r--r-- | net/rds/tcp_recv.c | 9 | ||||
-rw-r--r-- | net/rds/tcp_send.c | 4 | ||||
-rw-r--r-- | net/rds/threads.c | 69 | ||||
-rw-r--r-- | net/rds/transport.c | 16 |
30 files changed, 1600 insertions, 441 deletions
diff --git a/net/rds/Kconfig b/net/rds/Kconfig index bffde4b46c5d..01b3bd6a3708 100644 --- a/net/rds/Kconfig +++ b/net/rds/Kconfig | |||
@@ -16,6 +16,7 @@ config RDS_RDMA | |||
16 | config RDS_TCP | 16 | config RDS_TCP |
17 | tristate "RDS over TCP" | 17 | tristate "RDS over TCP" |
18 | depends on RDS | 18 | depends on RDS |
19 | depends on IPV6 || !IPV6 | ||
19 | ---help--- | 20 | ---help--- |
20 | Allow RDS to use TCP as a transport. | 21 | Allow RDS to use TCP as a transport. |
21 | This transport does not support RDMA operations. | 22 | This transport does not support RDMA operations. |
@@ -24,4 +25,3 @@ config RDS_DEBUG | |||
24 | bool "RDS debugging messages" | 25 | bool "RDS debugging messages" |
25 | depends on RDS | 26 | depends on RDS |
26 | default n | 27 | default n |
27 | |||
diff --git a/net/rds/Makefile b/net/rds/Makefile index b5d568bd479c..e647f9de104a 100644 --- a/net/rds/Makefile +++ b/net/rds/Makefile | |||
@@ -15,4 +15,3 @@ rds_tcp-y := tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \ | |||
15 | tcp_send.o tcp_stats.o | 15 | tcp_send.o tcp_stats.o |
16 | 16 | ||
17 | ccflags-$(CONFIG_RDS_DEBUG) := -DRDS_DEBUG | 17 | ccflags-$(CONFIG_RDS_DEBUG) := -DRDS_DEBUG |
18 | |||
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index ab751a150f70..65387e1e6964 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | 2 | * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/kernel.h> | 35 | #include <linux/kernel.h> |
36 | #include <linux/gfp.h> | 36 | #include <linux/gfp.h> |
37 | #include <linux/in.h> | 37 | #include <linux/in.h> |
38 | #include <linux/ipv6.h> | ||
38 | #include <linux/poll.h> | 39 | #include <linux/poll.h> |
39 | #include <net/sock.h> | 40 | #include <net/sock.h> |
40 | 41 | ||
@@ -113,26 +114,82 @@ void rds_wake_sk_sleep(struct rds_sock *rs) | |||
113 | static int rds_getname(struct socket *sock, struct sockaddr *uaddr, | 114 | static int rds_getname(struct socket *sock, struct sockaddr *uaddr, |
114 | int peer) | 115 | int peer) |
115 | { | 116 | { |
116 | struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; | ||
117 | struct rds_sock *rs = rds_sk_to_rs(sock->sk); | 117 | struct rds_sock *rs = rds_sk_to_rs(sock->sk); |
118 | 118 | struct sockaddr_in6 *sin6; | |
119 | memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); | 119 | struct sockaddr_in *sin; |
120 | int uaddr_len; | ||
120 | 121 | ||
121 | /* racey, don't care */ | 122 | /* racey, don't care */ |
122 | if (peer) { | 123 | if (peer) { |
123 | if (!rs->rs_conn_addr) | 124 | if (ipv6_addr_any(&rs->rs_conn_addr)) |
124 | return -ENOTCONN; | 125 | return -ENOTCONN; |
125 | 126 | ||
126 | sin->sin_port = rs->rs_conn_port; | 127 | if (ipv6_addr_v4mapped(&rs->rs_conn_addr)) { |
127 | sin->sin_addr.s_addr = rs->rs_conn_addr; | 128 | sin = (struct sockaddr_in *)uaddr; |
129 | memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); | ||
130 | sin->sin_family = AF_INET; | ||
131 | sin->sin_port = rs->rs_conn_port; | ||
132 | sin->sin_addr.s_addr = rs->rs_conn_addr_v4; | ||
133 | uaddr_len = sizeof(*sin); | ||
134 | } else { | ||
135 | sin6 = (struct sockaddr_in6 *)uaddr; | ||
136 | sin6->sin6_family = AF_INET6; | ||
137 | sin6->sin6_port = rs->rs_conn_port; | ||
138 | sin6->sin6_addr = rs->rs_conn_addr; | ||
139 | sin6->sin6_flowinfo = 0; | ||
140 | /* scope_id is the same as in the bound address. */ | ||
141 | sin6->sin6_scope_id = rs->rs_bound_scope_id; | ||
142 | uaddr_len = sizeof(*sin6); | ||
143 | } | ||
128 | } else { | 144 | } else { |
129 | sin->sin_port = rs->rs_bound_port; | 145 | /* If socket is not yet bound and the socket is connected, |
130 | sin->sin_addr.s_addr = rs->rs_bound_addr; | 146 | * set the return address family to be the same as the |
147 | * connected address, but with 0 address value. If it is not | ||
148 | * connected, set the family to be AF_UNSPEC (value 0) and | ||
149 | * the address size to be that of an IPv4 address. | ||
150 | */ | ||
151 | if (ipv6_addr_any(&rs->rs_bound_addr)) { | ||
152 | if (ipv6_addr_any(&rs->rs_conn_addr)) { | ||
153 | sin = (struct sockaddr_in *)uaddr; | ||
154 | memset(sin, 0, sizeof(*sin)); | ||
155 | sin->sin_family = AF_UNSPEC; | ||
156 | return sizeof(*sin); | ||
157 | } | ||
158 | |||
159 | #if IS_ENABLED(CONFIG_IPV6) | ||
160 | if (!(ipv6_addr_type(&rs->rs_conn_addr) & | ||
161 | IPV6_ADDR_MAPPED)) { | ||
162 | sin6 = (struct sockaddr_in6 *)uaddr; | ||
163 | memset(sin6, 0, sizeof(*sin6)); | ||
164 | sin6->sin6_family = AF_INET6; | ||
165 | return sizeof(*sin6); | ||
166 | } | ||
167 | #endif | ||
168 | |||
169 | sin = (struct sockaddr_in *)uaddr; | ||
170 | memset(sin, 0, sizeof(*sin)); | ||
171 | sin->sin_family = AF_INET; | ||
172 | return sizeof(*sin); | ||
173 | } | ||
174 | if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) { | ||
175 | sin = (struct sockaddr_in *)uaddr; | ||
176 | memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); | ||
177 | sin->sin_family = AF_INET; | ||
178 | sin->sin_port = rs->rs_bound_port; | ||
179 | sin->sin_addr.s_addr = rs->rs_bound_addr_v4; | ||
180 | uaddr_len = sizeof(*sin); | ||
181 | } else { | ||
182 | sin6 = (struct sockaddr_in6 *)uaddr; | ||
183 | sin6->sin6_family = AF_INET6; | ||
184 | sin6->sin6_port = rs->rs_bound_port; | ||
185 | sin6->sin6_addr = rs->rs_bound_addr; | ||
186 | sin6->sin6_flowinfo = 0; | ||
187 | sin6->sin6_scope_id = rs->rs_bound_scope_id; | ||
188 | uaddr_len = sizeof(*sin6); | ||
189 | } | ||
131 | } | 190 | } |
132 | 191 | ||
133 | sin->sin_family = AF_INET; | 192 | return uaddr_len; |
134 | |||
135 | return sizeof(*sin); | ||
136 | } | 193 | } |
137 | 194 | ||
138 | /* | 195 | /* |
@@ -203,11 +260,12 @@ static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
203 | static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, | 260 | static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, |
204 | int len) | 261 | int len) |
205 | { | 262 | { |
263 | struct sockaddr_in6 sin6; | ||
206 | struct sockaddr_in sin; | 264 | struct sockaddr_in sin; |
207 | int ret = 0; | 265 | int ret = 0; |
208 | 266 | ||
209 | /* racing with another thread binding seems ok here */ | 267 | /* racing with another thread binding seems ok here */ |
210 | if (rs->rs_bound_addr == 0) { | 268 | if (ipv6_addr_any(&rs->rs_bound_addr)) { |
211 | ret = -ENOTCONN; /* XXX not a great errno */ | 269 | ret = -ENOTCONN; /* XXX not a great errno */ |
212 | goto out; | 270 | goto out; |
213 | } | 271 | } |
@@ -215,14 +273,23 @@ static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, | |||
215 | if (len < sizeof(struct sockaddr_in)) { | 273 | if (len < sizeof(struct sockaddr_in)) { |
216 | ret = -EINVAL; | 274 | ret = -EINVAL; |
217 | goto out; | 275 | goto out; |
276 | } else if (len < sizeof(struct sockaddr_in6)) { | ||
277 | /* Assume IPv4 */ | ||
278 | if (copy_from_user(&sin, optval, sizeof(struct sockaddr_in))) { | ||
279 | ret = -EFAULT; | ||
280 | goto out; | ||
281 | } | ||
282 | ipv6_addr_set_v4mapped(sin.sin_addr.s_addr, &sin6.sin6_addr); | ||
283 | sin6.sin6_port = sin.sin_port; | ||
284 | } else { | ||
285 | if (copy_from_user(&sin6, optval, | ||
286 | sizeof(struct sockaddr_in6))) { | ||
287 | ret = -EFAULT; | ||
288 | goto out; | ||
289 | } | ||
218 | } | 290 | } |
219 | 291 | ||
220 | if (copy_from_user(&sin, optval, sizeof(sin))) { | 292 | rds_send_drop_to(rs, &sin6); |
221 | ret = -EFAULT; | ||
222 | goto out; | ||
223 | } | ||
224 | |||
225 | rds_send_drop_to(rs, &sin); | ||
226 | out: | 293 | out: |
227 | return ret; | 294 | return ret; |
228 | } | 295 | } |
@@ -435,31 +502,91 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, | |||
435 | int addr_len, int flags) | 502 | int addr_len, int flags) |
436 | { | 503 | { |
437 | struct sock *sk = sock->sk; | 504 | struct sock *sk = sock->sk; |
438 | struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; | 505 | struct sockaddr_in *sin; |
439 | struct rds_sock *rs = rds_sk_to_rs(sk); | 506 | struct rds_sock *rs = rds_sk_to_rs(sk); |
440 | int ret = 0; | 507 | int ret = 0; |
441 | 508 | ||
442 | lock_sock(sk); | 509 | lock_sock(sk); |
443 | 510 | ||
444 | if (addr_len != sizeof(struct sockaddr_in)) { | 511 | switch (uaddr->sa_family) { |
445 | ret = -EINVAL; | 512 | case AF_INET: |
446 | goto out; | 513 | sin = (struct sockaddr_in *)uaddr; |
447 | } | 514 | if (addr_len < sizeof(struct sockaddr_in)) { |
515 | ret = -EINVAL; | ||
516 | break; | ||
517 | } | ||
518 | if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) { | ||
519 | ret = -EDESTADDRREQ; | ||
520 | break; | ||
521 | } | ||
522 | if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) || | ||
523 | sin->sin_addr.s_addr == htonl(INADDR_BROADCAST)) { | ||
524 | ret = -EINVAL; | ||
525 | break; | ||
526 | } | ||
527 | ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &rs->rs_conn_addr); | ||
528 | rs->rs_conn_port = sin->sin_port; | ||
529 | break; | ||
448 | 530 | ||
449 | if (sin->sin_family != AF_INET) { | 531 | #if IS_ENABLED(CONFIG_IPV6) |
450 | ret = -EAFNOSUPPORT; | 532 | case AF_INET6: { |
451 | goto out; | 533 | struct sockaddr_in6 *sin6; |
452 | } | 534 | int addr_type; |
453 | 535 | ||
454 | if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) { | 536 | sin6 = (struct sockaddr_in6 *)uaddr; |
455 | ret = -EDESTADDRREQ; | 537 | if (addr_len < sizeof(struct sockaddr_in6)) { |
456 | goto out; | 538 | ret = -EINVAL; |
539 | break; | ||
540 | } | ||
541 | addr_type = ipv6_addr_type(&sin6->sin6_addr); | ||
542 | if (!(addr_type & IPV6_ADDR_UNICAST)) { | ||
543 | __be32 addr4; | ||
544 | |||
545 | if (!(addr_type & IPV6_ADDR_MAPPED)) { | ||
546 | ret = -EPROTOTYPE; | ||
547 | break; | ||
548 | } | ||
549 | |||
550 | /* It is a mapped address. Need to do some sanity | ||
551 | * checks. | ||
552 | */ | ||
553 | addr4 = sin6->sin6_addr.s6_addr32[3]; | ||
554 | if (addr4 == htonl(INADDR_ANY) || | ||
555 | addr4 == htonl(INADDR_BROADCAST) || | ||
556 | IN_MULTICAST(ntohl(addr4))) { | ||
557 | ret = -EPROTOTYPE; | ||
558 | break; | ||
559 | } | ||
560 | } | ||
561 | |||
562 | if (addr_type & IPV6_ADDR_LINKLOCAL) { | ||
563 | /* If socket is arleady bound to a link local address, | ||
564 | * the peer address must be on the same link. | ||
565 | */ | ||
566 | if (sin6->sin6_scope_id == 0 || | ||
567 | (!ipv6_addr_any(&rs->rs_bound_addr) && | ||
568 | rs->rs_bound_scope_id && | ||
569 | sin6->sin6_scope_id != rs->rs_bound_scope_id)) { | ||
570 | ret = -EINVAL; | ||
571 | break; | ||
572 | } | ||
573 | /* Remember the connected address scope ID. It will | ||
574 | * be checked against the binding local address when | ||
575 | * the socket is bound. | ||
576 | */ | ||
577 | rs->rs_bound_scope_id = sin6->sin6_scope_id; | ||
578 | } | ||
579 | rs->rs_conn_addr = sin6->sin6_addr; | ||
580 | rs->rs_conn_port = sin6->sin6_port; | ||
581 | break; | ||
457 | } | 582 | } |
583 | #endif | ||
458 | 584 | ||
459 | rs->rs_conn_addr = sin->sin_addr.s_addr; | 585 | default: |
460 | rs->rs_conn_port = sin->sin_port; | 586 | ret = -EAFNOSUPPORT; |
587 | break; | ||
588 | } | ||
461 | 589 | ||
462 | out: | ||
463 | release_sock(sk); | 590 | release_sock(sk); |
464 | return ret; | 591 | return ret; |
465 | } | 592 | } |
@@ -578,8 +705,10 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len, | |||
578 | list_for_each_entry(inc, &rs->rs_recv_queue, i_item) { | 705 | list_for_each_entry(inc, &rs->rs_recv_queue, i_item) { |
579 | total++; | 706 | total++; |
580 | if (total <= len) | 707 | if (total <= len) |
581 | rds_inc_info_copy(inc, iter, inc->i_saddr, | 708 | rds_inc_info_copy(inc, iter, |
582 | rs->rs_bound_addr, 1); | 709 | inc->i_saddr.s6_addr32[3], |
710 | rs->rs_bound_addr_v4, | ||
711 | 1); | ||
583 | } | 712 | } |
584 | 713 | ||
585 | read_unlock(&rs->rs_recv_lock); | 714 | read_unlock(&rs->rs_recv_lock); |
@@ -608,8 +737,8 @@ static void rds_sock_info(struct socket *sock, unsigned int len, | |||
608 | list_for_each_entry(rs, &rds_sock_list, rs_item) { | 737 | list_for_each_entry(rs, &rds_sock_list, rs_item) { |
609 | sinfo.sndbuf = rds_sk_sndbuf(rs); | 738 | sinfo.sndbuf = rds_sk_sndbuf(rs); |
610 | sinfo.rcvbuf = rds_sk_rcvbuf(rs); | 739 | sinfo.rcvbuf = rds_sk_rcvbuf(rs); |
611 | sinfo.bound_addr = rs->rs_bound_addr; | 740 | sinfo.bound_addr = rs->rs_bound_addr_v4; |
612 | sinfo.connected_addr = rs->rs_conn_addr; | 741 | sinfo.connected_addr = rs->rs_conn_addr_v4; |
613 | sinfo.bound_port = rs->rs_bound_port; | 742 | sinfo.bound_port = rs->rs_bound_port; |
614 | sinfo.connected_port = rs->rs_conn_port; | 743 | sinfo.connected_port = rs->rs_conn_port; |
615 | sinfo.inum = sock_i_ino(rds_rs_to_sk(rs)); | 744 | sinfo.inum = sock_i_ino(rds_rs_to_sk(rs)); |
diff --git a/net/rds/bind.c b/net/rds/bind.c index 5aa3a64aa4f0..3ab55784b637 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | 2 | * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/kernel.h> | 33 | #include <linux/kernel.h> |
34 | #include <net/sock.h> | 34 | #include <net/sock.h> |
35 | #include <linux/in.h> | 35 | #include <linux/in.h> |
36 | #include <linux/ipv6.h> | ||
36 | #include <linux/if_arp.h> | 37 | #include <linux/if_arp.h> |
37 | #include <linux/jhash.h> | 38 | #include <linux/jhash.h> |
38 | #include <linux/ratelimit.h> | 39 | #include <linux/ratelimit.h> |
@@ -42,42 +43,58 @@ static struct rhashtable bind_hash_table; | |||
42 | 43 | ||
43 | static const struct rhashtable_params ht_parms = { | 44 | static const struct rhashtable_params ht_parms = { |
44 | .nelem_hint = 768, | 45 | .nelem_hint = 768, |
45 | .key_len = sizeof(u64), | 46 | .key_len = RDS_BOUND_KEY_LEN, |
46 | .key_offset = offsetof(struct rds_sock, rs_bound_key), | 47 | .key_offset = offsetof(struct rds_sock, rs_bound_key), |
47 | .head_offset = offsetof(struct rds_sock, rs_bound_node), | 48 | .head_offset = offsetof(struct rds_sock, rs_bound_node), |
48 | .max_size = 16384, | 49 | .max_size = 16384, |
49 | .min_size = 1024, | 50 | .min_size = 1024, |
50 | }; | 51 | }; |
51 | 52 | ||
53 | /* Create a key for the bind hash table manipulation. Port is in network byte | ||
54 | * order. | ||
55 | */ | ||
56 | static inline void __rds_create_bind_key(u8 *key, const struct in6_addr *addr, | ||
57 | __be16 port, __u32 scope_id) | ||
58 | { | ||
59 | memcpy(key, addr, sizeof(*addr)); | ||
60 | key += sizeof(*addr); | ||
61 | memcpy(key, &port, sizeof(port)); | ||
62 | key += sizeof(port); | ||
63 | memcpy(key, &scope_id, sizeof(scope_id)); | ||
64 | } | ||
65 | |||
52 | /* | 66 | /* |
53 | * Return the rds_sock bound at the given local address. | 67 | * Return the rds_sock bound at the given local address. |
54 | * | 68 | * |
55 | * The rx path can race with rds_release. We notice if rds_release() has | 69 | * The rx path can race with rds_release. We notice if rds_release() has |
56 | * marked this socket and don't return a rs ref to the rx path. | 70 | * marked this socket and don't return a rs ref to the rx path. |
57 | */ | 71 | */ |
58 | struct rds_sock *rds_find_bound(__be32 addr, __be16 port) | 72 | struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port, |
73 | __u32 scope_id) | ||
59 | { | 74 | { |
60 | u64 key = ((u64)addr << 32) | port; | 75 | u8 key[RDS_BOUND_KEY_LEN]; |
61 | struct rds_sock *rs; | 76 | struct rds_sock *rs; |
62 | 77 | ||
63 | rs = rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms); | 78 | __rds_create_bind_key(key, addr, port, scope_id); |
79 | rs = rhashtable_lookup_fast(&bind_hash_table, key, ht_parms); | ||
64 | if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) | 80 | if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) |
65 | rds_sock_addref(rs); | 81 | rds_sock_addref(rs); |
66 | else | 82 | else |
67 | rs = NULL; | 83 | rs = NULL; |
68 | 84 | ||
69 | rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr, | 85 | rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr, |
70 | ntohs(port)); | 86 | ntohs(port)); |
71 | 87 | ||
72 | return rs; | 88 | return rs; |
73 | } | 89 | } |
74 | 90 | ||
75 | /* returns -ve errno or +ve port */ | 91 | /* returns -ve errno or +ve port */ |
76 | static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) | 92 | static int rds_add_bound(struct rds_sock *rs, const struct in6_addr *addr, |
93 | __be16 *port, __u32 scope_id) | ||
77 | { | 94 | { |
78 | int ret = -EADDRINUSE; | 95 | int ret = -EADDRINUSE; |
79 | u16 rover, last; | 96 | u16 rover, last; |
80 | u64 key; | 97 | u8 key[RDS_BOUND_KEY_LEN]; |
81 | 98 | ||
82 | if (*port != 0) { | 99 | if (*port != 0) { |
83 | rover = be16_to_cpu(*port); | 100 | rover = be16_to_cpu(*port); |
@@ -95,12 +112,13 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) | |||
95 | 112 | ||
96 | if (rover == RDS_FLAG_PROBE_PORT) | 113 | if (rover == RDS_FLAG_PROBE_PORT) |
97 | continue; | 114 | continue; |
98 | key = ((u64)addr << 32) | cpu_to_be16(rover); | 115 | __rds_create_bind_key(key, addr, cpu_to_be16(rover), |
99 | if (rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms)) | 116 | scope_id); |
117 | if (rhashtable_lookup_fast(&bind_hash_table, key, ht_parms)) | ||
100 | continue; | 118 | continue; |
101 | 119 | ||
102 | rs->rs_bound_key = key; | 120 | memcpy(rs->rs_bound_key, key, sizeof(rs->rs_bound_key)); |
103 | rs->rs_bound_addr = addr; | 121 | rs->rs_bound_addr = *addr; |
104 | net_get_random_once(&rs->rs_hash_initval, | 122 | net_get_random_once(&rs->rs_hash_initval, |
105 | sizeof(rs->rs_hash_initval)); | 123 | sizeof(rs->rs_hash_initval)); |
106 | rs->rs_bound_port = cpu_to_be16(rover); | 124 | rs->rs_bound_port = cpu_to_be16(rover); |
@@ -109,12 +127,13 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) | |||
109 | if (!rhashtable_insert_fast(&bind_hash_table, | 127 | if (!rhashtable_insert_fast(&bind_hash_table, |
110 | &rs->rs_bound_node, ht_parms)) { | 128 | &rs->rs_bound_node, ht_parms)) { |
111 | *port = rs->rs_bound_port; | 129 | *port = rs->rs_bound_port; |
130 | rs->rs_bound_scope_id = scope_id; | ||
112 | ret = 0; | 131 | ret = 0; |
113 | rdsdebug("rs %p binding to %pI4:%d\n", | 132 | rdsdebug("rs %p binding to %pI6c:%d\n", |
114 | rs, &addr, (int)ntohs(*port)); | 133 | rs, addr, (int)ntohs(*port)); |
115 | break; | 134 | break; |
116 | } else { | 135 | } else { |
117 | rs->rs_bound_addr = 0; | 136 | rs->rs_bound_addr = in6addr_any; |
118 | rds_sock_put(rs); | 137 | rds_sock_put(rs); |
119 | ret = -ENOMEM; | 138 | ret = -ENOMEM; |
120 | break; | 139 | break; |
@@ -127,44 +146,103 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) | |||
127 | void rds_remove_bound(struct rds_sock *rs) | 146 | void rds_remove_bound(struct rds_sock *rs) |
128 | { | 147 | { |
129 | 148 | ||
130 | if (!rs->rs_bound_addr) | 149 | if (ipv6_addr_any(&rs->rs_bound_addr)) |
131 | return; | 150 | return; |
132 | 151 | ||
133 | rdsdebug("rs %p unbinding from %pI4:%d\n", | 152 | rdsdebug("rs %p unbinding from %pI6c:%d\n", |
134 | rs, &rs->rs_bound_addr, | 153 | rs, &rs->rs_bound_addr, |
135 | ntohs(rs->rs_bound_port)); | 154 | ntohs(rs->rs_bound_port)); |
136 | 155 | ||
137 | rhashtable_remove_fast(&bind_hash_table, &rs->rs_bound_node, ht_parms); | 156 | rhashtable_remove_fast(&bind_hash_table, &rs->rs_bound_node, ht_parms); |
138 | rds_sock_put(rs); | 157 | rds_sock_put(rs); |
139 | rs->rs_bound_addr = 0; | 158 | rs->rs_bound_addr = in6addr_any; |
140 | } | 159 | } |
141 | 160 | ||
142 | int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | 161 | int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) |
143 | { | 162 | { |
144 | struct sock *sk = sock->sk; | 163 | struct sock *sk = sock->sk; |
145 | struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; | ||
146 | struct rds_sock *rs = rds_sk_to_rs(sk); | 164 | struct rds_sock *rs = rds_sk_to_rs(sk); |
165 | struct in6_addr v6addr, *binding_addr; | ||
147 | struct rds_transport *trans; | 166 | struct rds_transport *trans; |
167 | __u32 scope_id = 0; | ||
148 | int ret = 0; | 168 | int ret = 0; |
169 | __be16 port; | ||
170 | |||
171 | /* We allow an RDS socket to be bound to either IPv4 or IPv6 | ||
172 | * address. | ||
173 | */ | ||
174 | if (uaddr->sa_family == AF_INET) { | ||
175 | struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; | ||
176 | |||
177 | if (addr_len < sizeof(struct sockaddr_in) || | ||
178 | sin->sin_addr.s_addr == htonl(INADDR_ANY) || | ||
179 | sin->sin_addr.s_addr == htonl(INADDR_BROADCAST) || | ||
180 | IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) | ||
181 | return -EINVAL; | ||
182 | ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr); | ||
183 | binding_addr = &v6addr; | ||
184 | port = sin->sin_port; | ||
185 | #if IS_ENABLED(CONFIG_IPV6) | ||
186 | } else if (uaddr->sa_family == AF_INET6) { | ||
187 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)uaddr; | ||
188 | int addr_type; | ||
189 | |||
190 | if (addr_len < sizeof(struct sockaddr_in6)) | ||
191 | return -EINVAL; | ||
192 | addr_type = ipv6_addr_type(&sin6->sin6_addr); | ||
193 | if (!(addr_type & IPV6_ADDR_UNICAST)) { | ||
194 | __be32 addr4; | ||
149 | 195 | ||
196 | if (!(addr_type & IPV6_ADDR_MAPPED)) | ||
197 | return -EINVAL; | ||
198 | |||
199 | /* It is a mapped address. Need to do some sanity | ||
200 | * checks. | ||
201 | */ | ||
202 | addr4 = sin6->sin6_addr.s6_addr32[3]; | ||
203 | if (addr4 == htonl(INADDR_ANY) || | ||
204 | addr4 == htonl(INADDR_BROADCAST) || | ||
205 | IN_MULTICAST(ntohl(addr4))) | ||
206 | return -EINVAL; | ||
207 | } | ||
208 | /* The scope ID must be specified for link local address. */ | ||
209 | if (addr_type & IPV6_ADDR_LINKLOCAL) { | ||
210 | if (sin6->sin6_scope_id == 0) | ||
211 | return -EINVAL; | ||
212 | scope_id = sin6->sin6_scope_id; | ||
213 | } | ||
214 | binding_addr = &sin6->sin6_addr; | ||
215 | port = sin6->sin6_port; | ||
216 | #endif | ||
217 | } else { | ||
218 | return -EINVAL; | ||
219 | } | ||
150 | lock_sock(sk); | 220 | lock_sock(sk); |
151 | 221 | ||
152 | if (addr_len != sizeof(struct sockaddr_in) || | 222 | /* RDS socket does not allow re-binding. */ |
153 | sin->sin_family != AF_INET || | 223 | if (!ipv6_addr_any(&rs->rs_bound_addr)) { |
154 | rs->rs_bound_addr || | 224 | ret = -EINVAL; |
155 | sin->sin_addr.s_addr == htonl(INADDR_ANY)) { | 225 | goto out; |
226 | } | ||
227 | /* Socket is connected. The binding address should have the same | ||
228 | * scope ID as the connected address, except the case when one is | ||
229 | * non-link local address (scope_id is 0). | ||
230 | */ | ||
231 | if (!ipv6_addr_any(&rs->rs_conn_addr) && scope_id && | ||
232 | rs->rs_bound_scope_id && | ||
233 | scope_id != rs->rs_bound_scope_id) { | ||
156 | ret = -EINVAL; | 234 | ret = -EINVAL; |
157 | goto out; | 235 | goto out; |
158 | } | 236 | } |
159 | 237 | ||
160 | ret = rds_add_bound(rs, sin->sin_addr.s_addr, &sin->sin_port); | 238 | ret = rds_add_bound(rs, binding_addr, &port, scope_id); |
161 | if (ret) | 239 | if (ret) |
162 | goto out; | 240 | goto out; |
163 | 241 | ||
164 | if (rs->rs_transport) { /* previously bound */ | 242 | if (rs->rs_transport) { /* previously bound */ |
165 | trans = rs->rs_transport; | 243 | trans = rs->rs_transport; |
166 | if (trans->laddr_check(sock_net(sock->sk), | 244 | if (trans->laddr_check(sock_net(sock->sk), |
167 | sin->sin_addr.s_addr) != 0) { | 245 | binding_addr, scope_id) != 0) { |
168 | ret = -ENOPROTOOPT; | 246 | ret = -ENOPROTOOPT; |
169 | rds_remove_bound(rs); | 247 | rds_remove_bound(rs); |
170 | } else { | 248 | } else { |
@@ -172,13 +250,13 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | |||
172 | } | 250 | } |
173 | goto out; | 251 | goto out; |
174 | } | 252 | } |
175 | trans = rds_trans_get_preferred(sock_net(sock->sk), | 253 | trans = rds_trans_get_preferred(sock_net(sock->sk), binding_addr, |
176 | sin->sin_addr.s_addr); | 254 | scope_id); |
177 | if (!trans) { | 255 | if (!trans) { |
178 | ret = -EADDRNOTAVAIL; | 256 | ret = -EADDRNOTAVAIL; |
179 | rds_remove_bound(rs); | 257 | rds_remove_bound(rs); |
180 | pr_info_ratelimited("RDS: %s could not find a transport for %pI4, load rds_tcp or rds_rdma?\n", | 258 | pr_info_ratelimited("RDS: %s could not find a transport for %pI6c, load rds_tcp or rds_rdma?\n", |
181 | __func__, &sin->sin_addr.s_addr); | 259 | __func__, binding_addr); |
182 | goto out; | 260 | goto out; |
183 | } | 261 | } |
184 | 262 | ||
diff --git a/net/rds/cong.c b/net/rds/cong.c index 63da9d2f142d..ccdff09a79c8 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2007 Oracle. All rights reserved. | 2 | * Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -101,7 +101,7 @@ static DEFINE_RWLOCK(rds_cong_monitor_lock); | |||
101 | static DEFINE_SPINLOCK(rds_cong_lock); | 101 | static DEFINE_SPINLOCK(rds_cong_lock); |
102 | static struct rb_root rds_cong_tree = RB_ROOT; | 102 | static struct rb_root rds_cong_tree = RB_ROOT; |
103 | 103 | ||
104 | static struct rds_cong_map *rds_cong_tree_walk(__be32 addr, | 104 | static struct rds_cong_map *rds_cong_tree_walk(const struct in6_addr *addr, |
105 | struct rds_cong_map *insert) | 105 | struct rds_cong_map *insert) |
106 | { | 106 | { |
107 | struct rb_node **p = &rds_cong_tree.rb_node; | 107 | struct rb_node **p = &rds_cong_tree.rb_node; |
@@ -109,12 +109,15 @@ static struct rds_cong_map *rds_cong_tree_walk(__be32 addr, | |||
109 | struct rds_cong_map *map; | 109 | struct rds_cong_map *map; |
110 | 110 | ||
111 | while (*p) { | 111 | while (*p) { |
112 | int diff; | ||
113 | |||
112 | parent = *p; | 114 | parent = *p; |
113 | map = rb_entry(parent, struct rds_cong_map, m_rb_node); | 115 | map = rb_entry(parent, struct rds_cong_map, m_rb_node); |
114 | 116 | ||
115 | if (addr < map->m_addr) | 117 | diff = rds_addr_cmp(addr, &map->m_addr); |
118 | if (diff < 0) | ||
116 | p = &(*p)->rb_left; | 119 | p = &(*p)->rb_left; |
117 | else if (addr > map->m_addr) | 120 | else if (diff > 0) |
118 | p = &(*p)->rb_right; | 121 | p = &(*p)->rb_right; |
119 | else | 122 | else |
120 | return map; | 123 | return map; |
@@ -132,7 +135,7 @@ static struct rds_cong_map *rds_cong_tree_walk(__be32 addr, | |||
132 | * these bitmaps in the process getting pointers to them. The bitmaps are only | 135 | * these bitmaps in the process getting pointers to them. The bitmaps are only |
133 | * ever freed as the module is removed after all connections have been freed. | 136 | * ever freed as the module is removed after all connections have been freed. |
134 | */ | 137 | */ |
135 | static struct rds_cong_map *rds_cong_from_addr(__be32 addr) | 138 | static struct rds_cong_map *rds_cong_from_addr(const struct in6_addr *addr) |
136 | { | 139 | { |
137 | struct rds_cong_map *map; | 140 | struct rds_cong_map *map; |
138 | struct rds_cong_map *ret = NULL; | 141 | struct rds_cong_map *ret = NULL; |
@@ -144,7 +147,7 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr) | |||
144 | if (!map) | 147 | if (!map) |
145 | return NULL; | 148 | return NULL; |
146 | 149 | ||
147 | map->m_addr = addr; | 150 | map->m_addr = *addr; |
148 | init_waitqueue_head(&map->m_waitq); | 151 | init_waitqueue_head(&map->m_waitq); |
149 | INIT_LIST_HEAD(&map->m_conn_list); | 152 | INIT_LIST_HEAD(&map->m_conn_list); |
150 | 153 | ||
@@ -171,7 +174,7 @@ out: | |||
171 | kfree(map); | 174 | kfree(map); |
172 | } | 175 | } |
173 | 176 | ||
174 | rdsdebug("map %p for addr %x\n", ret, be32_to_cpu(addr)); | 177 | rdsdebug("map %p for addr %pI6c\n", ret, addr); |
175 | 178 | ||
176 | return ret; | 179 | return ret; |
177 | } | 180 | } |
@@ -202,8 +205,8 @@ void rds_cong_remove_conn(struct rds_connection *conn) | |||
202 | 205 | ||
203 | int rds_cong_get_maps(struct rds_connection *conn) | 206 | int rds_cong_get_maps(struct rds_connection *conn) |
204 | { | 207 | { |
205 | conn->c_lcong = rds_cong_from_addr(conn->c_laddr); | 208 | conn->c_lcong = rds_cong_from_addr(&conn->c_laddr); |
206 | conn->c_fcong = rds_cong_from_addr(conn->c_faddr); | 209 | conn->c_fcong = rds_cong_from_addr(&conn->c_faddr); |
207 | 210 | ||
208 | if (!(conn->c_lcong && conn->c_fcong)) | 211 | if (!(conn->c_lcong && conn->c_fcong)) |
209 | return -ENOMEM; | 212 | return -ENOMEM; |
@@ -353,7 +356,7 @@ void rds_cong_remove_socket(struct rds_sock *rs) | |||
353 | 356 | ||
354 | /* update congestion map for now-closed port */ | 357 | /* update congestion map for now-closed port */ |
355 | spin_lock_irqsave(&rds_cong_lock, flags); | 358 | spin_lock_irqsave(&rds_cong_lock, flags); |
356 | map = rds_cong_tree_walk(rs->rs_bound_addr, NULL); | 359 | map = rds_cong_tree_walk(&rs->rs_bound_addr, NULL); |
357 | spin_unlock_irqrestore(&rds_cong_lock, flags); | 360 | spin_unlock_irqrestore(&rds_cong_lock, flags); |
358 | 361 | ||
359 | if (map && rds_cong_test_bit(map, rs->rs_bound_port)) { | 362 | if (map && rds_cong_test_bit(map, rs->rs_bound_port)) { |
diff --git a/net/rds/connection.c b/net/rds/connection.c index cfb05953b0e5..3bd2f4a5a30d 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | 2 | * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -34,7 +34,9 @@ | |||
34 | #include <linux/list.h> | 34 | #include <linux/list.h> |
35 | #include <linux/slab.h> | 35 | #include <linux/slab.h> |
36 | #include <linux/export.h> | 36 | #include <linux/export.h> |
37 | #include <net/inet_hashtables.h> | 37 | #include <net/ipv6.h> |
38 | #include <net/inet6_hashtables.h> | ||
39 | #include <net/addrconf.h> | ||
38 | 40 | ||
39 | #include "rds.h" | 41 | #include "rds.h" |
40 | #include "loop.h" | 42 | #include "loop.h" |
@@ -49,18 +51,25 @@ static unsigned long rds_conn_count; | |||
49 | static struct hlist_head rds_conn_hash[RDS_CONNECTION_HASH_ENTRIES]; | 51 | static struct hlist_head rds_conn_hash[RDS_CONNECTION_HASH_ENTRIES]; |
50 | static struct kmem_cache *rds_conn_slab; | 52 | static struct kmem_cache *rds_conn_slab; |
51 | 53 | ||
52 | static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr) | 54 | static struct hlist_head *rds_conn_bucket(const struct in6_addr *laddr, |
55 | const struct in6_addr *faddr) | ||
53 | { | 56 | { |
57 | static u32 rds6_hash_secret __read_mostly; | ||
54 | static u32 rds_hash_secret __read_mostly; | 58 | static u32 rds_hash_secret __read_mostly; |
55 | 59 | ||
56 | unsigned long hash; | 60 | u32 lhash, fhash, hash; |
57 | 61 | ||
58 | net_get_random_once(&rds_hash_secret, sizeof(rds_hash_secret)); | 62 | net_get_random_once(&rds_hash_secret, sizeof(rds_hash_secret)); |
63 | net_get_random_once(&rds6_hash_secret, sizeof(rds6_hash_secret)); | ||
64 | |||
65 | lhash = (__force u32)laddr->s6_addr32[3]; | ||
66 | #if IS_ENABLED(CONFIG_IPV6) | ||
67 | fhash = __ipv6_addr_jhash(faddr, rds6_hash_secret); | ||
68 | #else | ||
69 | fhash = (__force u32)faddr->s6_addr32[3]; | ||
70 | #endif | ||
71 | hash = __inet_ehashfn(lhash, 0, fhash, 0, rds_hash_secret); | ||
59 | 72 | ||
60 | /* Pass NULL, don't need struct net for hash */ | ||
61 | hash = __inet_ehashfn(be32_to_cpu(laddr), 0, | ||
62 | be32_to_cpu(faddr), 0, | ||
63 | rds_hash_secret); | ||
64 | return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK]; | 73 | return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK]; |
65 | } | 74 | } |
66 | 75 | ||
@@ -72,20 +81,25 @@ static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr) | |||
72 | /* rcu read lock must be held or the connection spinlock */ | 81 | /* rcu read lock must be held or the connection spinlock */ |
73 | static struct rds_connection *rds_conn_lookup(struct net *net, | 82 | static struct rds_connection *rds_conn_lookup(struct net *net, |
74 | struct hlist_head *head, | 83 | struct hlist_head *head, |
75 | __be32 laddr, __be32 faddr, | 84 | const struct in6_addr *laddr, |
76 | struct rds_transport *trans) | 85 | const struct in6_addr *faddr, |
86 | struct rds_transport *trans, | ||
87 | int dev_if) | ||
77 | { | 88 | { |
78 | struct rds_connection *conn, *ret = NULL; | 89 | struct rds_connection *conn, *ret = NULL; |
79 | 90 | ||
80 | hlist_for_each_entry_rcu(conn, head, c_hash_node) { | 91 | hlist_for_each_entry_rcu(conn, head, c_hash_node) { |
81 | if (conn->c_faddr == faddr && conn->c_laddr == laddr && | 92 | if (ipv6_addr_equal(&conn->c_faddr, faddr) && |
82 | conn->c_trans == trans && net == rds_conn_net(conn)) { | 93 | ipv6_addr_equal(&conn->c_laddr, laddr) && |
94 | conn->c_trans == trans && | ||
95 | net == rds_conn_net(conn) && | ||
96 | conn->c_dev_if == dev_if) { | ||
83 | ret = conn; | 97 | ret = conn; |
84 | break; | 98 | break; |
85 | } | 99 | } |
86 | } | 100 | } |
87 | rdsdebug("returning conn %p for %pI4 -> %pI4\n", ret, | 101 | rdsdebug("returning conn %p for %pI6c -> %pI6c\n", ret, |
88 | &laddr, &faddr); | 102 | laddr, faddr); |
89 | return ret; | 103 | return ret; |
90 | } | 104 | } |
91 | 105 | ||
@@ -99,8 +113,8 @@ static void rds_conn_path_reset(struct rds_conn_path *cp) | |||
99 | { | 113 | { |
100 | struct rds_connection *conn = cp->cp_conn; | 114 | struct rds_connection *conn = cp->cp_conn; |
101 | 115 | ||
102 | rdsdebug("connection %pI4 to %pI4 reset\n", | 116 | rdsdebug("connection %pI6c to %pI6c reset\n", |
103 | &conn->c_laddr, &conn->c_faddr); | 117 | &conn->c_laddr, &conn->c_faddr); |
104 | 118 | ||
105 | rds_stats_inc(s_conn_reset); | 119 | rds_stats_inc(s_conn_reset); |
106 | rds_send_path_reset(cp); | 120 | rds_send_path_reset(cp); |
@@ -142,9 +156,12 @@ static void __rds_conn_path_init(struct rds_connection *conn, | |||
142 | * are torn down as the module is removed, if ever. | 156 | * are torn down as the module is removed, if ever. |
143 | */ | 157 | */ |
144 | static struct rds_connection *__rds_conn_create(struct net *net, | 158 | static struct rds_connection *__rds_conn_create(struct net *net, |
145 | __be32 laddr, __be32 faddr, | 159 | const struct in6_addr *laddr, |
146 | struct rds_transport *trans, gfp_t gfp, | 160 | const struct in6_addr *faddr, |
147 | int is_outgoing) | 161 | struct rds_transport *trans, |
162 | gfp_t gfp, | ||
163 | int is_outgoing, | ||
164 | int dev_if) | ||
148 | { | 165 | { |
149 | struct rds_connection *conn, *parent = NULL; | 166 | struct rds_connection *conn, *parent = NULL; |
150 | struct hlist_head *head = rds_conn_bucket(laddr, faddr); | 167 | struct hlist_head *head = rds_conn_bucket(laddr, faddr); |
@@ -154,9 +171,12 @@ static struct rds_connection *__rds_conn_create(struct net *net, | |||
154 | int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1); | 171 | int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1); |
155 | 172 | ||
156 | rcu_read_lock(); | 173 | rcu_read_lock(); |
157 | conn = rds_conn_lookup(net, head, laddr, faddr, trans); | 174 | conn = rds_conn_lookup(net, head, laddr, faddr, trans, dev_if); |
158 | if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && | 175 | if (conn && |
159 | laddr == faddr && !is_outgoing) { | 176 | conn->c_loopback && |
177 | conn->c_trans != &rds_loop_transport && | ||
178 | ipv6_addr_equal(laddr, faddr) && | ||
179 | !is_outgoing) { | ||
160 | /* This is a looped back IB connection, and we're | 180 | /* This is a looped back IB connection, and we're |
161 | * called by the code handling the incoming connect. | 181 | * called by the code handling the incoming connect. |
162 | * We need a second connection object into which we | 182 | * We need a second connection object into which we |
@@ -181,8 +201,22 @@ static struct rds_connection *__rds_conn_create(struct net *net, | |||
181 | } | 201 | } |
182 | 202 | ||
183 | INIT_HLIST_NODE(&conn->c_hash_node); | 203 | INIT_HLIST_NODE(&conn->c_hash_node); |
184 | conn->c_laddr = laddr; | 204 | conn->c_laddr = *laddr; |
185 | conn->c_faddr = faddr; | 205 | conn->c_isv6 = !ipv6_addr_v4mapped(laddr); |
206 | conn->c_faddr = *faddr; | ||
207 | conn->c_dev_if = dev_if; | ||
208 | |||
209 | #if IS_ENABLED(CONFIG_IPV6) | ||
210 | /* If the local address is link local, set c_bound_if to be the | ||
211 | * index used for this connection. Otherwise, set it to 0 as | ||
212 | * the socket is not bound to an interface. c_bound_if is used | ||
213 | * to look up a socket when a packet is received | ||
214 | */ | ||
215 | if (ipv6_addr_type(laddr) & IPV6_ADDR_LINKLOCAL) | ||
216 | conn->c_bound_if = dev_if; | ||
217 | else | ||
218 | #endif | ||
219 | conn->c_bound_if = 0; | ||
186 | 220 | ||
187 | rds_conn_net_set(conn, net); | 221 | rds_conn_net_set(conn, net); |
188 | 222 | ||
@@ -199,7 +233,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, | |||
199 | * can bind to the destination address then we'd rather the messages | 233 | * can bind to the destination address then we'd rather the messages |
200 | * flow through loopback rather than either transport. | 234 | * flow through loopback rather than either transport. |
201 | */ | 235 | */ |
202 | loop_trans = rds_trans_get_preferred(net, faddr); | 236 | loop_trans = rds_trans_get_preferred(net, faddr, conn->c_dev_if); |
203 | if (loop_trans) { | 237 | if (loop_trans) { |
204 | rds_trans_put(loop_trans); | 238 | rds_trans_put(loop_trans); |
205 | conn->c_loopback = 1; | 239 | conn->c_loopback = 1; |
@@ -233,10 +267,10 @@ static struct rds_connection *__rds_conn_create(struct net *net, | |||
233 | goto out; | 267 | goto out; |
234 | } | 268 | } |
235 | 269 | ||
236 | rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n", | 270 | rdsdebug("allocated conn %p for %pI6c -> %pI6c over %s %s\n", |
237 | conn, &laddr, &faddr, | 271 | conn, laddr, faddr, |
238 | strnlen(trans->t_name, sizeof(trans->t_name)) ? trans->t_name : | 272 | strnlen(trans->t_name, sizeof(trans->t_name)) ? |
239 | "[unknown]", is_outgoing ? "(outgoing)" : ""); | 273 | trans->t_name : "[unknown]", is_outgoing ? "(outgoing)" : ""); |
240 | 274 | ||
241 | /* | 275 | /* |
242 | * Since we ran without holding the conn lock, someone could | 276 | * Since we ran without holding the conn lock, someone could |
@@ -262,7 +296,8 @@ static struct rds_connection *__rds_conn_create(struct net *net, | |||
262 | /* Creating normal conn */ | 296 | /* Creating normal conn */ |
263 | struct rds_connection *found; | 297 | struct rds_connection *found; |
264 | 298 | ||
265 | found = rds_conn_lookup(net, head, laddr, faddr, trans); | 299 | found = rds_conn_lookup(net, head, laddr, faddr, trans, |
300 | dev_if); | ||
266 | if (found) { | 301 | if (found) { |
267 | struct rds_conn_path *cp; | 302 | struct rds_conn_path *cp; |
268 | int i; | 303 | int i; |
@@ -295,18 +330,22 @@ out: | |||
295 | } | 330 | } |
296 | 331 | ||
297 | struct rds_connection *rds_conn_create(struct net *net, | 332 | struct rds_connection *rds_conn_create(struct net *net, |
298 | __be32 laddr, __be32 faddr, | 333 | const struct in6_addr *laddr, |
299 | struct rds_transport *trans, gfp_t gfp) | 334 | const struct in6_addr *faddr, |
335 | struct rds_transport *trans, gfp_t gfp, | ||
336 | int dev_if) | ||
300 | { | 337 | { |
301 | return __rds_conn_create(net, laddr, faddr, trans, gfp, 0); | 338 | return __rds_conn_create(net, laddr, faddr, trans, gfp, 0, dev_if); |
302 | } | 339 | } |
303 | EXPORT_SYMBOL_GPL(rds_conn_create); | 340 | EXPORT_SYMBOL_GPL(rds_conn_create); |
304 | 341 | ||
305 | struct rds_connection *rds_conn_create_outgoing(struct net *net, | 342 | struct rds_connection *rds_conn_create_outgoing(struct net *net, |
306 | __be32 laddr, __be32 faddr, | 343 | const struct in6_addr *laddr, |
307 | struct rds_transport *trans, gfp_t gfp) | 344 | const struct in6_addr *faddr, |
345 | struct rds_transport *trans, | ||
346 | gfp_t gfp, int dev_if) | ||
308 | { | 347 | { |
309 | return __rds_conn_create(net, laddr, faddr, trans, gfp, 1); | 348 | return __rds_conn_create(net, laddr, faddr, trans, gfp, 1, dev_if); |
310 | } | 349 | } |
311 | EXPORT_SYMBOL_GPL(rds_conn_create_outgoing); | 350 | EXPORT_SYMBOL_GPL(rds_conn_create_outgoing); |
312 | 351 | ||
@@ -464,10 +503,23 @@ void rds_conn_destroy(struct rds_connection *conn) | |||
464 | } | 503 | } |
465 | EXPORT_SYMBOL_GPL(rds_conn_destroy); | 504 | EXPORT_SYMBOL_GPL(rds_conn_destroy); |
466 | 505 | ||
467 | static void rds_conn_message_info(struct socket *sock, unsigned int len, | 506 | static void __rds_inc_msg_cp(struct rds_incoming *inc, |
468 | struct rds_info_iterator *iter, | 507 | struct rds_info_iterator *iter, |
469 | struct rds_info_lengths *lens, | 508 | void *saddr, void *daddr, int flip, bool isv6) |
470 | int want_send) | 509 | { |
510 | #if IS_ENABLED(CONFIG_IPV6) | ||
511 | if (isv6) | ||
512 | rds6_inc_info_copy(inc, iter, saddr, daddr, flip); | ||
513 | else | ||
514 | #endif | ||
515 | rds_inc_info_copy(inc, iter, *(__be32 *)saddr, | ||
516 | *(__be32 *)daddr, flip); | ||
517 | } | ||
518 | |||
519 | static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len, | ||
520 | struct rds_info_iterator *iter, | ||
521 | struct rds_info_lengths *lens, | ||
522 | int want_send, bool isv6) | ||
471 | { | 523 | { |
472 | struct hlist_head *head; | 524 | struct hlist_head *head; |
473 | struct list_head *list; | 525 | struct list_head *list; |
@@ -478,7 +530,10 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, | |||
478 | size_t i; | 530 | size_t i; |
479 | int j; | 531 | int j; |
480 | 532 | ||
481 | len /= sizeof(struct rds_info_message); | 533 | if (isv6) |
534 | len /= sizeof(struct rds6_info_message); | ||
535 | else | ||
536 | len /= sizeof(struct rds_info_message); | ||
482 | 537 | ||
483 | rcu_read_lock(); | 538 | rcu_read_lock(); |
484 | 539 | ||
@@ -488,6 +543,9 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, | |||
488 | struct rds_conn_path *cp; | 543 | struct rds_conn_path *cp; |
489 | int npaths; | 544 | int npaths; |
490 | 545 | ||
546 | if (!isv6 && conn->c_isv6) | ||
547 | continue; | ||
548 | |||
491 | npaths = (conn->c_trans->t_mp_capable ? | 549 | npaths = (conn->c_trans->t_mp_capable ? |
492 | RDS_MPATH_WORKERS : 1); | 550 | RDS_MPATH_WORKERS : 1); |
493 | 551 | ||
@@ -504,11 +562,11 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, | |||
504 | list_for_each_entry(rm, list, m_conn_item) { | 562 | list_for_each_entry(rm, list, m_conn_item) { |
505 | total++; | 563 | total++; |
506 | if (total <= len) | 564 | if (total <= len) |
507 | rds_inc_info_copy(&rm->m_inc, | 565 | __rds_inc_msg_cp(&rm->m_inc, |
508 | iter, | 566 | iter, |
509 | conn->c_laddr, | 567 | &conn->c_laddr, |
510 | conn->c_faddr, | 568 | &conn->c_faddr, |
511 | 0); | 569 | 0, isv6); |
512 | } | 570 | } |
513 | 571 | ||
514 | spin_unlock_irqrestore(&cp->cp_lock, flags); | 572 | spin_unlock_irqrestore(&cp->cp_lock, flags); |
@@ -518,9 +576,30 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, | |||
518 | rcu_read_unlock(); | 576 | rcu_read_unlock(); |
519 | 577 | ||
520 | lens->nr = total; | 578 | lens->nr = total; |
521 | lens->each = sizeof(struct rds_info_message); | 579 | if (isv6) |
580 | lens->each = sizeof(struct rds6_info_message); | ||
581 | else | ||
582 | lens->each = sizeof(struct rds_info_message); | ||
522 | } | 583 | } |
523 | 584 | ||
585 | static void rds_conn_message_info(struct socket *sock, unsigned int len, | ||
586 | struct rds_info_iterator *iter, | ||
587 | struct rds_info_lengths *lens, | ||
588 | int want_send) | ||
589 | { | ||
590 | rds_conn_message_info_cmn(sock, len, iter, lens, want_send, false); | ||
591 | } | ||
592 | |||
593 | #if IS_ENABLED(CONFIG_IPV6) | ||
594 | static void rds6_conn_message_info(struct socket *sock, unsigned int len, | ||
595 | struct rds_info_iterator *iter, | ||
596 | struct rds_info_lengths *lens, | ||
597 | int want_send) | ||
598 | { | ||
599 | rds_conn_message_info_cmn(sock, len, iter, lens, want_send, true); | ||
600 | } | ||
601 | #endif | ||
602 | |||
524 | static void rds_conn_message_info_send(struct socket *sock, unsigned int len, | 603 | static void rds_conn_message_info_send(struct socket *sock, unsigned int len, |
525 | struct rds_info_iterator *iter, | 604 | struct rds_info_iterator *iter, |
526 | struct rds_info_lengths *lens) | 605 | struct rds_info_lengths *lens) |
@@ -528,6 +607,15 @@ static void rds_conn_message_info_send(struct socket *sock, unsigned int len, | |||
528 | rds_conn_message_info(sock, len, iter, lens, 1); | 607 | rds_conn_message_info(sock, len, iter, lens, 1); |
529 | } | 608 | } |
530 | 609 | ||
610 | #if IS_ENABLED(CONFIG_IPV6) | ||
611 | static void rds6_conn_message_info_send(struct socket *sock, unsigned int len, | ||
612 | struct rds_info_iterator *iter, | ||
613 | struct rds_info_lengths *lens) | ||
614 | { | ||
615 | rds6_conn_message_info(sock, len, iter, lens, 1); | ||
616 | } | ||
617 | #endif | ||
618 | |||
531 | static void rds_conn_message_info_retrans(struct socket *sock, | 619 | static void rds_conn_message_info_retrans(struct socket *sock, |
532 | unsigned int len, | 620 | unsigned int len, |
533 | struct rds_info_iterator *iter, | 621 | struct rds_info_iterator *iter, |
@@ -536,6 +624,16 @@ static void rds_conn_message_info_retrans(struct socket *sock, | |||
536 | rds_conn_message_info(sock, len, iter, lens, 0); | 624 | rds_conn_message_info(sock, len, iter, lens, 0); |
537 | } | 625 | } |
538 | 626 | ||
627 | #if IS_ENABLED(CONFIG_IPV6) | ||
628 | static void rds6_conn_message_info_retrans(struct socket *sock, | ||
629 | unsigned int len, | ||
630 | struct rds_info_iterator *iter, | ||
631 | struct rds_info_lengths *lens) | ||
632 | { | ||
633 | rds6_conn_message_info(sock, len, iter, lens, 0); | ||
634 | } | ||
635 | #endif | ||
636 | |||
539 | void rds_for_each_conn_info(struct socket *sock, unsigned int len, | 637 | void rds_for_each_conn_info(struct socket *sock, unsigned int len, |
540 | struct rds_info_iterator *iter, | 638 | struct rds_info_iterator *iter, |
541 | struct rds_info_lengths *lens, | 639 | struct rds_info_lengths *lens, |
@@ -584,7 +682,6 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len, | |||
584 | struct hlist_head *head; | 682 | struct hlist_head *head; |
585 | struct rds_connection *conn; | 683 | struct rds_connection *conn; |
586 | size_t i; | 684 | size_t i; |
587 | int j; | ||
588 | 685 | ||
589 | rcu_read_lock(); | 686 | rcu_read_lock(); |
590 | 687 | ||
@@ -595,17 +692,20 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len, | |||
595 | i++, head++) { | 692 | i++, head++) { |
596 | hlist_for_each_entry_rcu(conn, head, c_hash_node) { | 693 | hlist_for_each_entry_rcu(conn, head, c_hash_node) { |
597 | struct rds_conn_path *cp; | 694 | struct rds_conn_path *cp; |
598 | int npaths; | ||
599 | 695 | ||
600 | npaths = (conn->c_trans->t_mp_capable ? | 696 | /* XXX We only copy the information from the first |
601 | RDS_MPATH_WORKERS : 1); | 697 | * path for now. The problem is that if there are |
602 | for (j = 0; j < npaths; j++) { | 698 | * more than one underlying paths, we cannot report |
603 | cp = &conn->c_path[j]; | 699 | * information of all of them using the existing |
700 | * API. For example, there is only one next_tx_seq, | ||
701 | * which path's next_tx_seq should we report? It is | ||
702 | * a bug in the design of MPRDS. | ||
703 | */ | ||
704 | cp = conn->c_path; | ||
604 | 705 | ||
605 | /* XXX no cp_lock usage.. */ | 706 | /* XXX no cp_lock usage.. */ |
606 | if (!visitor(cp, buffer)) | 707 | if (!visitor(cp, buffer)) |
607 | continue; | 708 | continue; |
608 | } | ||
609 | 709 | ||
610 | /* We copy as much as we can fit in the buffer, | 710 | /* We copy as much as we can fit in the buffer, |
611 | * but we count all items so that the caller | 711 | * but we count all items so that the caller |
@@ -624,12 +724,16 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len, | |||
624 | static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer) | 724 | static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer) |
625 | { | 725 | { |
626 | struct rds_info_connection *cinfo = buffer; | 726 | struct rds_info_connection *cinfo = buffer; |
727 | struct rds_connection *conn = cp->cp_conn; | ||
728 | |||
729 | if (conn->c_isv6) | ||
730 | return 0; | ||
627 | 731 | ||
628 | cinfo->next_tx_seq = cp->cp_next_tx_seq; | 732 | cinfo->next_tx_seq = cp->cp_next_tx_seq; |
629 | cinfo->next_rx_seq = cp->cp_next_rx_seq; | 733 | cinfo->next_rx_seq = cp->cp_next_rx_seq; |
630 | cinfo->laddr = cp->cp_conn->c_laddr; | 734 | cinfo->laddr = conn->c_laddr.s6_addr32[3]; |
631 | cinfo->faddr = cp->cp_conn->c_faddr; | 735 | cinfo->faddr = conn->c_faddr.s6_addr32[3]; |
632 | strncpy(cinfo->transport, cp->cp_conn->c_trans->t_name, | 736 | strncpy(cinfo->transport, conn->c_trans->t_name, |
633 | sizeof(cinfo->transport)); | 737 | sizeof(cinfo->transport)); |
634 | cinfo->flags = 0; | 738 | cinfo->flags = 0; |
635 | 739 | ||
@@ -645,6 +749,36 @@ static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer) | |||
645 | return 1; | 749 | return 1; |
646 | } | 750 | } |
647 | 751 | ||
752 | #if IS_ENABLED(CONFIG_IPV6) | ||
753 | static int rds6_conn_info_visitor(struct rds_conn_path *cp, void *buffer) | ||
754 | { | ||
755 | struct rds6_info_connection *cinfo6 = buffer; | ||
756 | struct rds_connection *conn = cp->cp_conn; | ||
757 | |||
758 | cinfo6->next_tx_seq = cp->cp_next_tx_seq; | ||
759 | cinfo6->next_rx_seq = cp->cp_next_rx_seq; | ||
760 | cinfo6->laddr = conn->c_laddr; | ||
761 | cinfo6->faddr = conn->c_faddr; | ||
762 | strncpy(cinfo6->transport, conn->c_trans->t_name, | ||
763 | sizeof(cinfo6->transport)); | ||
764 | cinfo6->flags = 0; | ||
765 | |||
766 | rds_conn_info_set(cinfo6->flags, test_bit(RDS_IN_XMIT, &cp->cp_flags), | ||
767 | SENDING); | ||
768 | /* XXX Future: return the state rather than these funky bits */ | ||
769 | rds_conn_info_set(cinfo6->flags, | ||
770 | atomic_read(&cp->cp_state) == RDS_CONN_CONNECTING, | ||
771 | CONNECTING); | ||
772 | rds_conn_info_set(cinfo6->flags, | ||
773 | atomic_read(&cp->cp_state) == RDS_CONN_UP, | ||
774 | CONNECTED); | ||
775 | /* Just return 1 as there is no error case. This is a helper function | ||
776 | * for rds_walk_conn_path_info() and it wants a return value. | ||
777 | */ | ||
778 | return 1; | ||
779 | } | ||
780 | #endif | ||
781 | |||
648 | static void rds_conn_info(struct socket *sock, unsigned int len, | 782 | static void rds_conn_info(struct socket *sock, unsigned int len, |
649 | struct rds_info_iterator *iter, | 783 | struct rds_info_iterator *iter, |
650 | struct rds_info_lengths *lens) | 784 | struct rds_info_lengths *lens) |
@@ -657,6 +791,20 @@ static void rds_conn_info(struct socket *sock, unsigned int len, | |||
657 | sizeof(struct rds_info_connection)); | 791 | sizeof(struct rds_info_connection)); |
658 | } | 792 | } |
659 | 793 | ||
794 | #if IS_ENABLED(CONFIG_IPV6) | ||
795 | static void rds6_conn_info(struct socket *sock, unsigned int len, | ||
796 | struct rds_info_iterator *iter, | ||
797 | struct rds_info_lengths *lens) | ||
798 | { | ||
799 | u64 buffer[(sizeof(struct rds6_info_connection) + 7) / 8]; | ||
800 | |||
801 | rds_walk_conn_path_info(sock, len, iter, lens, | ||
802 | rds6_conn_info_visitor, | ||
803 | buffer, | ||
804 | sizeof(struct rds6_info_connection)); | ||
805 | } | ||
806 | #endif | ||
807 | |||
660 | int rds_conn_init(void) | 808 | int rds_conn_init(void) |
661 | { | 809 | { |
662 | int ret; | 810 | int ret; |
@@ -678,7 +826,13 @@ int rds_conn_init(void) | |||
678 | rds_conn_message_info_send); | 826 | rds_conn_message_info_send); |
679 | rds_info_register_func(RDS_INFO_RETRANS_MESSAGES, | 827 | rds_info_register_func(RDS_INFO_RETRANS_MESSAGES, |
680 | rds_conn_message_info_retrans); | 828 | rds_conn_message_info_retrans); |
681 | 829 | #if IS_ENABLED(CONFIG_IPV6) | |
830 | rds_info_register_func(RDS6_INFO_CONNECTIONS, rds6_conn_info); | ||
831 | rds_info_register_func(RDS6_INFO_SEND_MESSAGES, | ||
832 | rds6_conn_message_info_send); | ||
833 | rds_info_register_func(RDS6_INFO_RETRANS_MESSAGES, | ||
834 | rds6_conn_message_info_retrans); | ||
835 | #endif | ||
682 | return 0; | 836 | return 0; |
683 | } | 837 | } |
684 | 838 | ||
@@ -696,6 +850,13 @@ void rds_conn_exit(void) | |||
696 | rds_conn_message_info_send); | 850 | rds_conn_message_info_send); |
697 | rds_info_deregister_func(RDS_INFO_RETRANS_MESSAGES, | 851 | rds_info_deregister_func(RDS_INFO_RETRANS_MESSAGES, |
698 | rds_conn_message_info_retrans); | 852 | rds_conn_message_info_retrans); |
853 | #if IS_ENABLED(CONFIG_IPV6) | ||
854 | rds_info_deregister_func(RDS6_INFO_CONNECTIONS, rds6_conn_info); | ||
855 | rds_info_deregister_func(RDS6_INFO_SEND_MESSAGES, | ||
856 | rds6_conn_message_info_send); | ||
857 | rds_info_deregister_func(RDS6_INFO_RETRANS_MESSAGES, | ||
858 | rds6_conn_message_info_retrans); | ||
859 | #endif | ||
699 | } | 860 | } |
700 | 861 | ||
701 | /* | 862 | /* |
diff --git a/net/rds/ib.c b/net/rds/ib.c index 683b55d4e2b0..c1d97640c0be 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | 2 | * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/delay.h> | 39 | #include <linux/delay.h> |
40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
41 | #include <linux/module.h> | 41 | #include <linux/module.h> |
42 | #include <net/addrconf.h> | ||
42 | 43 | ||
43 | #include "rds_single_path.h" | 44 | #include "rds_single_path.h" |
44 | #include "rds.h" | 45 | #include "rds.h" |
@@ -295,9 +296,11 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, | |||
295 | /* We will only ever look at IB transports */ | 296 | /* We will only ever look at IB transports */ |
296 | if (conn->c_trans != &rds_ib_transport) | 297 | if (conn->c_trans != &rds_ib_transport) |
297 | return 0; | 298 | return 0; |
299 | if (conn->c_isv6) | ||
300 | return 0; | ||
298 | 301 | ||
299 | iinfo->src_addr = conn->c_laddr; | 302 | iinfo->src_addr = conn->c_laddr.s6_addr32[3]; |
300 | iinfo->dst_addr = conn->c_faddr; | 303 | iinfo->dst_addr = conn->c_faddr.s6_addr32[3]; |
301 | 304 | ||
302 | memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid)); | 305 | memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid)); |
303 | memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); | 306 | memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); |
@@ -318,6 +321,45 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, | |||
318 | return 1; | 321 | return 1; |
319 | } | 322 | } |
320 | 323 | ||
324 | #if IS_ENABLED(CONFIG_IPV6) | ||
325 | /* IPv6 version of rds_ib_conn_info_visitor(). */ | ||
326 | static int rds6_ib_conn_info_visitor(struct rds_connection *conn, | ||
327 | void *buffer) | ||
328 | { | ||
329 | struct rds6_info_rdma_connection *iinfo6 = buffer; | ||
330 | struct rds_ib_connection *ic; | ||
331 | |||
332 | /* We will only ever look at IB transports */ | ||
333 | if (conn->c_trans != &rds_ib_transport) | ||
334 | return 0; | ||
335 | |||
336 | iinfo6->src_addr = conn->c_laddr; | ||
337 | iinfo6->dst_addr = conn->c_faddr; | ||
338 | |||
339 | memset(&iinfo6->src_gid, 0, sizeof(iinfo6->src_gid)); | ||
340 | memset(&iinfo6->dst_gid, 0, sizeof(iinfo6->dst_gid)); | ||
341 | |||
342 | if (rds_conn_state(conn) == RDS_CONN_UP) { | ||
343 | struct rds_ib_device *rds_ibdev; | ||
344 | struct rdma_dev_addr *dev_addr; | ||
345 | |||
346 | ic = conn->c_transport_data; | ||
347 | dev_addr = &ic->i_cm_id->route.addr.dev_addr; | ||
348 | rdma_addr_get_sgid(dev_addr, | ||
349 | (union ib_gid *)&iinfo6->src_gid); | ||
350 | rdma_addr_get_dgid(dev_addr, | ||
351 | (union ib_gid *)&iinfo6->dst_gid); | ||
352 | |||
353 | rds_ibdev = ic->rds_ibdev; | ||
354 | iinfo6->max_send_wr = ic->i_send_ring.w_nr; | ||
355 | iinfo6->max_recv_wr = ic->i_recv_ring.w_nr; | ||
356 | iinfo6->max_send_sge = rds_ibdev->max_sge; | ||
357 | rds6_ib_get_mr_info(rds_ibdev, iinfo6); | ||
358 | } | ||
359 | return 1; | ||
360 | } | ||
361 | #endif | ||
362 | |||
321 | static void rds_ib_ic_info(struct socket *sock, unsigned int len, | 363 | static void rds_ib_ic_info(struct socket *sock, unsigned int len, |
322 | struct rds_info_iterator *iter, | 364 | struct rds_info_iterator *iter, |
323 | struct rds_info_lengths *lens) | 365 | struct rds_info_lengths *lens) |
@@ -330,6 +372,20 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len, | |||
330 | sizeof(struct rds_info_rdma_connection)); | 372 | sizeof(struct rds_info_rdma_connection)); |
331 | } | 373 | } |
332 | 374 | ||
375 | #if IS_ENABLED(CONFIG_IPV6) | ||
376 | /* IPv6 version of rds_ib_ic_info(). */ | ||
377 | static void rds6_ib_ic_info(struct socket *sock, unsigned int len, | ||
378 | struct rds_info_iterator *iter, | ||
379 | struct rds_info_lengths *lens) | ||
380 | { | ||
381 | u64 buffer[(sizeof(struct rds6_info_rdma_connection) + 7) / 8]; | ||
382 | |||
383 | rds_for_each_conn_info(sock, len, iter, lens, | ||
384 | rds6_ib_conn_info_visitor, | ||
385 | buffer, | ||
386 | sizeof(struct rds6_info_rdma_connection)); | ||
387 | } | ||
388 | #endif | ||
333 | 389 | ||
334 | /* | 390 | /* |
335 | * Early RDS/IB was built to only bind to an address if there is an IPoIB | 391 | * Early RDS/IB was built to only bind to an address if there is an IPoIB |
@@ -341,12 +397,19 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len, | |||
341 | * allowed to influence which paths have priority. We could call userspace | 397 | * allowed to influence which paths have priority. We could call userspace |
342 | * asserting this policy "routing". | 398 | * asserting this policy "routing". |
343 | */ | 399 | */ |
344 | static int rds_ib_laddr_check(struct net *net, __be32 addr) | 400 | static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, |
401 | __u32 scope_id) | ||
345 | { | 402 | { |
346 | int ret; | 403 | int ret; |
347 | struct rdma_cm_id *cm_id; | 404 | struct rdma_cm_id *cm_id; |
405 | #if IS_ENABLED(CONFIG_IPV6) | ||
406 | struct sockaddr_in6 sin6; | ||
407 | #endif | ||
348 | struct sockaddr_in sin; | 408 | struct sockaddr_in sin; |
409 | struct sockaddr *sa; | ||
410 | bool isv4; | ||
349 | 411 | ||
412 | isv4 = ipv6_addr_v4mapped(addr); | ||
350 | /* Create a CMA ID and try to bind it. This catches both | 413 | /* Create a CMA ID and try to bind it. This catches both |
351 | * IB and iWARP capable NICs. | 414 | * IB and iWARP capable NICs. |
352 | */ | 415 | */ |
@@ -355,22 +418,66 @@ static int rds_ib_laddr_check(struct net *net, __be32 addr) | |||
355 | if (IS_ERR(cm_id)) | 418 | if (IS_ERR(cm_id)) |
356 | return PTR_ERR(cm_id); | 419 | return PTR_ERR(cm_id); |
357 | 420 | ||
358 | memset(&sin, 0, sizeof(sin)); | 421 | if (isv4) { |
359 | sin.sin_family = AF_INET; | 422 | memset(&sin, 0, sizeof(sin)); |
360 | sin.sin_addr.s_addr = addr; | 423 | sin.sin_family = AF_INET; |
424 | sin.sin_addr.s_addr = addr->s6_addr32[3]; | ||
425 | sa = (struct sockaddr *)&sin; | ||
426 | } else { | ||
427 | #if IS_ENABLED(CONFIG_IPV6) | ||
428 | memset(&sin6, 0, sizeof(sin6)); | ||
429 | sin6.sin6_family = AF_INET6; | ||
430 | sin6.sin6_addr = *addr; | ||
431 | sin6.sin6_scope_id = scope_id; | ||
432 | sa = (struct sockaddr *)&sin6; | ||
433 | |||
434 | /* XXX Do a special IPv6 link local address check here. The | ||
435 | * reason is that rdma_bind_addr() always succeeds with IPv6 | ||
436 | * link local address regardless it is indeed configured in a | ||
437 | * system. | ||
438 | */ | ||
439 | if (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL) { | ||
440 | struct net_device *dev; | ||
441 | |||
442 | if (scope_id == 0) { | ||
443 | ret = -EADDRNOTAVAIL; | ||
444 | goto out; | ||
445 | } | ||
446 | |||
447 | /* Use init_net for now as RDS is not network | ||
448 | * name space aware. | ||
449 | */ | ||
450 | dev = dev_get_by_index(&init_net, scope_id); | ||
451 | if (!dev) { | ||
452 | ret = -EADDRNOTAVAIL; | ||
453 | goto out; | ||
454 | } | ||
455 | if (!ipv6_chk_addr(&init_net, addr, dev, 1)) { | ||
456 | dev_put(dev); | ||
457 | ret = -EADDRNOTAVAIL; | ||
458 | goto out; | ||
459 | } | ||
460 | dev_put(dev); | ||
461 | } | ||
462 | #else | ||
463 | ret = -EADDRNOTAVAIL; | ||
464 | goto out; | ||
465 | #endif | ||
466 | } | ||
361 | 467 | ||
362 | /* rdma_bind_addr will only succeed for IB & iWARP devices */ | 468 | /* rdma_bind_addr will only succeed for IB & iWARP devices */ |
363 | ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); | 469 | ret = rdma_bind_addr(cm_id, sa); |
364 | /* due to this, we will claim to support iWARP devices unless we | 470 | /* due to this, we will claim to support iWARP devices unless we |
365 | check node_type. */ | 471 | check node_type. */ |
366 | if (ret || !cm_id->device || | 472 | if (ret || !cm_id->device || |
367 | cm_id->device->node_type != RDMA_NODE_IB_CA) | 473 | cm_id->device->node_type != RDMA_NODE_IB_CA) |
368 | ret = -EADDRNOTAVAIL; | 474 | ret = -EADDRNOTAVAIL; |
369 | 475 | ||
370 | rdsdebug("addr %pI4 ret %d node type %d\n", | 476 | rdsdebug("addr %pI6c%%%u ret %d node type %d\n", |
371 | &addr, ret, | 477 | addr, scope_id, ret, |
372 | cm_id->device ? cm_id->device->node_type : -1); | 478 | cm_id->device ? cm_id->device->node_type : -1); |
373 | 479 | ||
480 | out: | ||
374 | rdma_destroy_id(cm_id); | 481 | rdma_destroy_id(cm_id); |
375 | 482 | ||
376 | return ret; | 483 | return ret; |
@@ -401,6 +508,9 @@ void rds_ib_exit(void) | |||
401 | rds_ib_set_unloading(); | 508 | rds_ib_set_unloading(); |
402 | synchronize_rcu(); | 509 | synchronize_rcu(); |
403 | rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); | 510 | rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); |
511 | #if IS_ENABLED(CONFIG_IPV6) | ||
512 | rds_info_deregister_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info); | ||
513 | #endif | ||
404 | rds_ib_unregister_client(); | 514 | rds_ib_unregister_client(); |
405 | rds_ib_destroy_nodev_conns(); | 515 | rds_ib_destroy_nodev_conns(); |
406 | rds_ib_sysctl_exit(); | 516 | rds_ib_sysctl_exit(); |
@@ -462,6 +572,9 @@ int rds_ib_init(void) | |||
462 | rds_trans_register(&rds_ib_transport); | 572 | rds_trans_register(&rds_ib_transport); |
463 | 573 | ||
464 | rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); | 574 | rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); |
575 | #if IS_ENABLED(CONFIG_IPV6) | ||
576 | rds_info_register_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info); | ||
577 | #endif | ||
465 | 578 | ||
466 | goto out; | 579 | goto out; |
467 | 580 | ||
@@ -476,4 +589,3 @@ out: | |||
476 | } | 589 | } |
477 | 590 | ||
478 | MODULE_LICENSE("GPL"); | 591 | MODULE_LICENSE("GPL"); |
479 | |||
diff --git a/net/rds/ib.h b/net/rds/ib.h index a6f4d7d68e95..73427ff439f9 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h | |||
@@ -57,16 +57,44 @@ struct rds_ib_refill_cache { | |||
57 | struct list_head *ready; | 57 | struct list_head *ready; |
58 | }; | 58 | }; |
59 | 59 | ||
60 | /* This is the common structure for the IB private data exchange in setting up | ||
61 | * an RDS connection. The exchange is different for IPv4 and IPv6 connections. | ||
62 | * The reason is that the address size is different and the addresses | ||
63 | * exchanged are in the beginning of the structure. Hence it is not possible | ||
64 | * for interoperability if same structure is used. | ||
65 | */ | ||
66 | struct rds_ib_conn_priv_cmn { | ||
67 | u8 ricpc_protocol_major; | ||
68 | u8 ricpc_protocol_minor; | ||
69 | __be16 ricpc_protocol_minor_mask; /* bitmask */ | ||
70 | __be32 ricpc_reserved1; | ||
71 | __be64 ricpc_ack_seq; | ||
72 | __be32 ricpc_credit; /* non-zero enables flow ctl */ | ||
73 | }; | ||
74 | |||
60 | struct rds_ib_connect_private { | 75 | struct rds_ib_connect_private { |
61 | /* Add new fields at the end, and don't permute existing fields. */ | 76 | /* Add new fields at the end, and don't permute existing fields. */ |
62 | __be32 dp_saddr; | 77 | __be32 dp_saddr; |
63 | __be32 dp_daddr; | 78 | __be32 dp_daddr; |
64 | u8 dp_protocol_major; | 79 | struct rds_ib_conn_priv_cmn dp_cmn; |
65 | u8 dp_protocol_minor; | 80 | }; |
66 | __be16 dp_protocol_minor_mask; /* bitmask */ | 81 | |
67 | __be32 dp_reserved1; | 82 | struct rds6_ib_connect_private { |
68 | __be64 dp_ack_seq; | 83 | /* Add new fields at the end, and don't permute existing fields. */ |
69 | __be32 dp_credit; /* non-zero enables flow ctl */ | 84 | struct in6_addr dp_saddr; |
85 | struct in6_addr dp_daddr; | ||
86 | struct rds_ib_conn_priv_cmn dp_cmn; | ||
87 | }; | ||
88 | |||
89 | #define dp_protocol_major dp_cmn.ricpc_protocol_major | ||
90 | #define dp_protocol_minor dp_cmn.ricpc_protocol_minor | ||
91 | #define dp_protocol_minor_mask dp_cmn.ricpc_protocol_minor_mask | ||
92 | #define dp_ack_seq dp_cmn.ricpc_ack_seq | ||
93 | #define dp_credit dp_cmn.ricpc_credit | ||
94 | |||
95 | union rds_ib_conn_priv { | ||
96 | struct rds_ib_connect_private ricp_v4; | ||
97 | struct rds6_ib_connect_private ricp_v6; | ||
70 | }; | 98 | }; |
71 | 99 | ||
72 | struct rds_ib_send_work { | 100 | struct rds_ib_send_work { |
@@ -351,8 +379,8 @@ void rds_ib_listen_stop(void); | |||
351 | __printf(2, 3) | 379 | __printf(2, 3) |
352 | void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...); | 380 | void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...); |
353 | int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, | 381 | int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, |
354 | struct rdma_cm_event *event); | 382 | struct rdma_cm_event *event, bool isv6); |
355 | int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id); | 383 | int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6); |
356 | void rds_ib_cm_connect_complete(struct rds_connection *conn, | 384 | void rds_ib_cm_connect_complete(struct rds_connection *conn, |
357 | struct rdma_cm_event *event); | 385 | struct rdma_cm_event *event); |
358 | 386 | ||
@@ -361,7 +389,8 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, | |||
361 | __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt) | 389 | __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt) |
362 | 390 | ||
363 | /* ib_rdma.c */ | 391 | /* ib_rdma.c */ |
364 | int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr); | 392 | int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, |
393 | struct in6_addr *ipaddr); | ||
365 | void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); | 394 | void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); |
366 | void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); | 395 | void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); |
367 | void rds_ib_destroy_nodev_conns(void); | 396 | void rds_ib_destroy_nodev_conns(void); |
@@ -371,7 +400,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc); | |||
371 | int rds_ib_recv_init(void); | 400 | int rds_ib_recv_init(void); |
372 | void rds_ib_recv_exit(void); | 401 | void rds_ib_recv_exit(void); |
373 | int rds_ib_recv_path(struct rds_conn_path *conn); | 402 | int rds_ib_recv_path(struct rds_conn_path *conn); |
374 | int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic); | 403 | int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic, gfp_t gfp); |
375 | void rds_ib_recv_free_caches(struct rds_ib_connection *ic); | 404 | void rds_ib_recv_free_caches(struct rds_ib_connection *ic); |
376 | void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp); | 405 | void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp); |
377 | void rds_ib_inc_free(struct rds_incoming *inc); | 406 | void rds_ib_inc_free(struct rds_incoming *inc); |
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index f1684ae6abfd..bfbb31f0c7fd 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | 2 | * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/slab.h> | 35 | #include <linux/slab.h> |
36 | #include <linux/vmalloc.h> | 36 | #include <linux/vmalloc.h> |
37 | #include <linux/ratelimit.h> | 37 | #include <linux/ratelimit.h> |
38 | #include <net/addrconf.h> | ||
38 | 39 | ||
39 | #include "rds_single_path.h" | 40 | #include "rds_single_path.h" |
40 | #include "rds.h" | 41 | #include "rds.h" |
@@ -95,25 +96,45 @@ rds_ib_tune_rnr(struct rds_ib_connection *ic, struct ib_qp_attr *attr) | |||
95 | */ | 96 | */ |
96 | void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event) | 97 | void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event) |
97 | { | 98 | { |
98 | const struct rds_ib_connect_private *dp = NULL; | ||
99 | struct rds_ib_connection *ic = conn->c_transport_data; | 99 | struct rds_ib_connection *ic = conn->c_transport_data; |
100 | const union rds_ib_conn_priv *dp = NULL; | ||
100 | struct ib_qp_attr qp_attr; | 101 | struct ib_qp_attr qp_attr; |
102 | __be64 ack_seq = 0; | ||
103 | __be32 credit = 0; | ||
104 | u8 major = 0; | ||
105 | u8 minor = 0; | ||
101 | int err; | 106 | int err; |
102 | 107 | ||
103 | if (event->param.conn.private_data_len >= sizeof(*dp)) { | 108 | dp = event->param.conn.private_data; |
104 | dp = event->param.conn.private_data; | 109 | if (conn->c_isv6) { |
105 | 110 | if (event->param.conn.private_data_len >= | |
106 | /* make sure it isn't empty data */ | 111 | sizeof(struct rds6_ib_connect_private)) { |
107 | if (dp->dp_protocol_major) { | 112 | major = dp->ricp_v6.dp_protocol_major; |
108 | rds_ib_set_protocol(conn, | 113 | minor = dp->ricp_v6.dp_protocol_minor; |
109 | RDS_PROTOCOL(dp->dp_protocol_major, | 114 | credit = dp->ricp_v6.dp_credit; |
110 | dp->dp_protocol_minor)); | 115 | /* dp structure start is not guaranteed to be 8 bytes |
111 | rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit)); | 116 | * aligned. Since dp_ack_seq is 64-bit extended load |
117 | * operations can be used so go through get_unaligned | ||
118 | * to avoid unaligned errors. | ||
119 | */ | ||
120 | ack_seq = get_unaligned(&dp->ricp_v6.dp_ack_seq); | ||
112 | } | 121 | } |
122 | } else if (event->param.conn.private_data_len >= | ||
123 | sizeof(struct rds_ib_connect_private)) { | ||
124 | major = dp->ricp_v4.dp_protocol_major; | ||
125 | minor = dp->ricp_v4.dp_protocol_minor; | ||
126 | credit = dp->ricp_v4.dp_credit; | ||
127 | ack_seq = get_unaligned(&dp->ricp_v4.dp_ack_seq); | ||
128 | } | ||
129 | |||
130 | /* make sure it isn't empty data */ | ||
131 | if (major) { | ||
132 | rds_ib_set_protocol(conn, RDS_PROTOCOL(major, minor)); | ||
133 | rds_ib_set_flow_control(conn, be32_to_cpu(credit)); | ||
113 | } | 134 | } |
114 | 135 | ||
115 | if (conn->c_version < RDS_PROTOCOL(3, 1)) { | 136 | if (conn->c_version < RDS_PROTOCOL(3, 1)) { |
116 | pr_notice("RDS/IB: Connection <%pI4,%pI4> version %u.%u no longer supported\n", | 137 | pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n", |
117 | &conn->c_laddr, &conn->c_faddr, | 138 | &conn->c_laddr, &conn->c_faddr, |
118 | RDS_PROTOCOL_MAJOR(conn->c_version), | 139 | RDS_PROTOCOL_MAJOR(conn->c_version), |
119 | RDS_PROTOCOL_MINOR(conn->c_version)); | 140 | RDS_PROTOCOL_MINOR(conn->c_version)); |
@@ -121,7 +142,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even | |||
121 | rds_conn_destroy(conn); | 142 | rds_conn_destroy(conn); |
122 | return; | 143 | return; |
123 | } else { | 144 | } else { |
124 | pr_notice("RDS/IB: %s conn connected <%pI4,%pI4> version %u.%u%s\n", | 145 | pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c> version %u.%u%s\n", |
125 | ic->i_active_side ? "Active" : "Passive", | 146 | ic->i_active_side ? "Active" : "Passive", |
126 | &conn->c_laddr, &conn->c_faddr, | 147 | &conn->c_laddr, &conn->c_faddr, |
127 | RDS_PROTOCOL_MAJOR(conn->c_version), | 148 | RDS_PROTOCOL_MAJOR(conn->c_version), |
@@ -150,7 +171,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even | |||
150 | printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err); | 171 | printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err); |
151 | 172 | ||
152 | /* update ib_device with this local ipaddr */ | 173 | /* update ib_device with this local ipaddr */ |
153 | err = rds_ib_update_ipaddr(ic->rds_ibdev, conn->c_laddr); | 174 | err = rds_ib_update_ipaddr(ic->rds_ibdev, &conn->c_laddr); |
154 | if (err) | 175 | if (err) |
155 | printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", | 176 | printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", |
156 | err); | 177 | err); |
@@ -158,14 +179,8 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even | |||
158 | /* If the peer gave us the last packet it saw, process this as if | 179 | /* If the peer gave us the last packet it saw, process this as if |
159 | * we had received a regular ACK. */ | 180 | * we had received a regular ACK. */ |
160 | if (dp) { | 181 | if (dp) { |
161 | /* dp structure start is not guaranteed to be 8 bytes aligned. | 182 | if (ack_seq) |
162 | * Since dp_ack_seq is 64-bit extended load operations can be | 183 | rds_send_drop_acked(conn, be64_to_cpu(ack_seq), |
163 | * used so go through get_unaligned to avoid unaligned errors. | ||
164 | */ | ||
165 | __be64 dp_ack_seq = get_unaligned(&dp->dp_ack_seq); | ||
166 | |||
167 | if (dp_ack_seq) | ||
168 | rds_send_drop_acked(conn, be64_to_cpu(dp_ack_seq), | ||
169 | NULL); | 184 | NULL); |
170 | } | 185 | } |
171 | 186 | ||
@@ -173,11 +188,12 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even | |||
173 | } | 188 | } |
174 | 189 | ||
175 | static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, | 190 | static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, |
176 | struct rdma_conn_param *conn_param, | 191 | struct rdma_conn_param *conn_param, |
177 | struct rds_ib_connect_private *dp, | 192 | union rds_ib_conn_priv *dp, |
178 | u32 protocol_version, | 193 | u32 protocol_version, |
179 | u32 max_responder_resources, | 194 | u32 max_responder_resources, |
180 | u32 max_initiator_depth) | 195 | u32 max_initiator_depth, |
196 | bool isv6) | ||
181 | { | 197 | { |
182 | struct rds_ib_connection *ic = conn->c_transport_data; | 198 | struct rds_ib_connection *ic = conn->c_transport_data; |
183 | struct rds_ib_device *rds_ibdev = ic->rds_ibdev; | 199 | struct rds_ib_device *rds_ibdev = ic->rds_ibdev; |
@@ -193,24 +209,49 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, | |||
193 | 209 | ||
194 | if (dp) { | 210 | if (dp) { |
195 | memset(dp, 0, sizeof(*dp)); | 211 | memset(dp, 0, sizeof(*dp)); |
196 | dp->dp_saddr = conn->c_laddr; | 212 | if (isv6) { |
197 | dp->dp_daddr = conn->c_faddr; | 213 | dp->ricp_v6.dp_saddr = conn->c_laddr; |
198 | dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version); | 214 | dp->ricp_v6.dp_daddr = conn->c_faddr; |
199 | dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version); | 215 | dp->ricp_v6.dp_protocol_major = |
200 | dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); | 216 | RDS_PROTOCOL_MAJOR(protocol_version); |
201 | dp->dp_ack_seq = cpu_to_be64(rds_ib_piggyb_ack(ic)); | 217 | dp->ricp_v6.dp_protocol_minor = |
218 | RDS_PROTOCOL_MINOR(protocol_version); | ||
219 | dp->ricp_v6.dp_protocol_minor_mask = | ||
220 | cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); | ||
221 | dp->ricp_v6.dp_ack_seq = | ||
222 | cpu_to_be64(rds_ib_piggyb_ack(ic)); | ||
223 | |||
224 | conn_param->private_data = &dp->ricp_v6; | ||
225 | conn_param->private_data_len = sizeof(dp->ricp_v6); | ||
226 | } else { | ||
227 | dp->ricp_v4.dp_saddr = conn->c_laddr.s6_addr32[3]; | ||
228 | dp->ricp_v4.dp_daddr = conn->c_faddr.s6_addr32[3]; | ||
229 | dp->ricp_v4.dp_protocol_major = | ||
230 | RDS_PROTOCOL_MAJOR(protocol_version); | ||
231 | dp->ricp_v4.dp_protocol_minor = | ||
232 | RDS_PROTOCOL_MINOR(protocol_version); | ||
233 | dp->ricp_v4.dp_protocol_minor_mask = | ||
234 | cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); | ||
235 | dp->ricp_v4.dp_ack_seq = | ||
236 | cpu_to_be64(rds_ib_piggyb_ack(ic)); | ||
237 | |||
238 | conn_param->private_data = &dp->ricp_v4; | ||
239 | conn_param->private_data_len = sizeof(dp->ricp_v4); | ||
240 | } | ||
202 | 241 | ||
203 | /* Advertise flow control */ | 242 | /* Advertise flow control */ |
204 | if (ic->i_flowctl) { | 243 | if (ic->i_flowctl) { |
205 | unsigned int credits; | 244 | unsigned int credits; |
206 | 245 | ||
207 | credits = IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)); | 246 | credits = IB_GET_POST_CREDITS |
208 | dp->dp_credit = cpu_to_be32(credits); | 247 | (atomic_read(&ic->i_credits)); |
209 | atomic_sub(IB_SET_POST_CREDITS(credits), &ic->i_credits); | 248 | if (isv6) |
249 | dp->ricp_v6.dp_credit = cpu_to_be32(credits); | ||
250 | else | ||
251 | dp->ricp_v4.dp_credit = cpu_to_be32(credits); | ||
252 | atomic_sub(IB_SET_POST_CREDITS(credits), | ||
253 | &ic->i_credits); | ||
210 | } | 254 | } |
211 | |||
212 | conn_param->private_data = dp; | ||
213 | conn_param->private_data_len = sizeof(*dp); | ||
214 | } | 255 | } |
215 | } | 256 | } |
216 | 257 | ||
@@ -349,7 +390,7 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data) | |||
349 | break; | 390 | break; |
350 | default: | 391 | default: |
351 | rdsdebug("Fatal QP Event %u (%s) " | 392 | rdsdebug("Fatal QP Event %u (%s) " |
352 | "- connection %pI4->%pI4, reconnecting\n", | 393 | "- connection %pI6c->%pI6c, reconnecting\n", |
353 | event->event, ib_event_msg(event->event), | 394 | event->event, ib_event_msg(event->event), |
354 | &conn->c_laddr, &conn->c_faddr); | 395 | &conn->c_laddr, &conn->c_faddr); |
355 | rds_conn_drop(conn); | 396 | rds_conn_drop(conn); |
@@ -580,11 +621,13 @@ out: | |||
580 | return ret; | 621 | return ret; |
581 | } | 622 | } |
582 | 623 | ||
583 | static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event) | 624 | static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6) |
584 | { | 625 | { |
585 | const struct rds_ib_connect_private *dp = event->param.conn.private_data; | 626 | const union rds_ib_conn_priv *dp = event->param.conn.private_data; |
586 | u16 common; | 627 | u8 data_len, major, minor; |
587 | u32 version = 0; | 628 | u32 version = 0; |
629 | __be16 mask; | ||
630 | u16 common; | ||
588 | 631 | ||
589 | /* | 632 | /* |
590 | * rdma_cm private data is odd - when there is any private data in the | 633 | * rdma_cm private data is odd - when there is any private data in the |
@@ -603,51 +646,140 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event) | |||
603 | return 0; | 646 | return 0; |
604 | } | 647 | } |
605 | 648 | ||
649 | if (isv6) { | ||
650 | data_len = sizeof(struct rds6_ib_connect_private); | ||
651 | major = dp->ricp_v6.dp_protocol_major; | ||
652 | minor = dp->ricp_v6.dp_protocol_minor; | ||
653 | mask = dp->ricp_v6.dp_protocol_minor_mask; | ||
654 | } else { | ||
655 | data_len = sizeof(struct rds_ib_connect_private); | ||
656 | major = dp->ricp_v4.dp_protocol_major; | ||
657 | minor = dp->ricp_v4.dp_protocol_minor; | ||
658 | mask = dp->ricp_v4.dp_protocol_minor_mask; | ||
659 | } | ||
660 | |||
606 | /* Even if len is crap *now* I still want to check it. -ASG */ | 661 | /* Even if len is crap *now* I still want to check it. -ASG */ |
607 | if (event->param.conn.private_data_len < sizeof (*dp) || | 662 | if (event->param.conn.private_data_len < data_len || major == 0) |
608 | dp->dp_protocol_major == 0) | ||
609 | return RDS_PROTOCOL_3_0; | 663 | return RDS_PROTOCOL_3_0; |
610 | 664 | ||
611 | common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IB_SUPPORTED_PROTOCOLS; | 665 | common = be16_to_cpu(mask) & RDS_IB_SUPPORTED_PROTOCOLS; |
612 | if (dp->dp_protocol_major == 3 && common) { | 666 | if (major == 3 && common) { |
613 | version = RDS_PROTOCOL_3_0; | 667 | version = RDS_PROTOCOL_3_0; |
614 | while ((common >>= 1) != 0) | 668 | while ((common >>= 1) != 0) |
615 | version++; | 669 | version++; |
616 | } else | 670 | } else { |
617 | printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using incompatible protocol version %u.%u\n", | 671 | if (isv6) |
618 | &dp->dp_saddr, | 672 | printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI6c using incompatible protocol version %u.%u\n", |
619 | dp->dp_protocol_major, | 673 | &dp->ricp_v6.dp_saddr, major, minor); |
620 | dp->dp_protocol_minor); | 674 | else |
675 | printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using incompatible protocol version %u.%u\n", | ||
676 | &dp->ricp_v4.dp_saddr, major, minor); | ||
677 | } | ||
621 | return version; | 678 | return version; |
622 | } | 679 | } |
623 | 680 | ||
681 | #if IS_ENABLED(CONFIG_IPV6) | ||
682 | /* Given an IPv6 address, find the net_device which hosts that address and | ||
683 | * return its index. This is used by the rds_ib_cm_handle_connect() code to | ||
684 | * find the interface index of where an incoming request comes from when | ||
685 | * the request is using a link local address. | ||
686 | * | ||
687 | * Note one problem in this search. It is possible that two interfaces have | ||
688 | * the same link local address. Unfortunately, this cannot be solved unless | ||
689 | * the underlying layer gives us the interface which an incoming RDMA connect | ||
690 | * request comes from. | ||
691 | */ | ||
692 | static u32 __rds_find_ifindex(struct net *net, const struct in6_addr *addr) | ||
693 | { | ||
694 | struct net_device *dev; | ||
695 | int idx = 0; | ||
696 | |||
697 | rcu_read_lock(); | ||
698 | for_each_netdev_rcu(net, dev) { | ||
699 | if (ipv6_chk_addr(net, addr, dev, 1)) { | ||
700 | idx = dev->ifindex; | ||
701 | break; | ||
702 | } | ||
703 | } | ||
704 | rcu_read_unlock(); | ||
705 | |||
706 | return idx; | ||
707 | } | ||
708 | #endif | ||
709 | |||
624 | int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, | 710 | int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, |
625 | struct rdma_cm_event *event) | 711 | struct rdma_cm_event *event, bool isv6) |
626 | { | 712 | { |
627 | __be64 lguid = cm_id->route.path_rec->sgid.global.interface_id; | 713 | __be64 lguid = cm_id->route.path_rec->sgid.global.interface_id; |
628 | __be64 fguid = cm_id->route.path_rec->dgid.global.interface_id; | 714 | __be64 fguid = cm_id->route.path_rec->dgid.global.interface_id; |
629 | const struct rds_ib_connect_private *dp = event->param.conn.private_data; | 715 | const struct rds_ib_conn_priv_cmn *dp_cmn; |
630 | struct rds_ib_connect_private dp_rep; | ||
631 | struct rds_connection *conn = NULL; | 716 | struct rds_connection *conn = NULL; |
632 | struct rds_ib_connection *ic = NULL; | 717 | struct rds_ib_connection *ic = NULL; |
633 | struct rdma_conn_param conn_param; | 718 | struct rdma_conn_param conn_param; |
719 | const union rds_ib_conn_priv *dp; | ||
720 | union rds_ib_conn_priv dp_rep; | ||
721 | struct in6_addr s_mapped_addr; | ||
722 | struct in6_addr d_mapped_addr; | ||
723 | const struct in6_addr *saddr6; | ||
724 | const struct in6_addr *daddr6; | ||
725 | int destroy = 1; | ||
726 | u32 ifindex = 0; | ||
634 | u32 version; | 727 | u32 version; |
635 | int err = 1, destroy = 1; | 728 | int err = 1; |
636 | 729 | ||
637 | /* Check whether the remote protocol version matches ours. */ | 730 | /* Check whether the remote protocol version matches ours. */ |
638 | version = rds_ib_protocol_compatible(event); | 731 | version = rds_ib_protocol_compatible(event, isv6); |
639 | if (!version) | 732 | if (!version) |
640 | goto out; | 733 | goto out; |
641 | 734 | ||
642 | rdsdebug("saddr %pI4 daddr %pI4 RDSv%u.%u lguid 0x%llx fguid " | 735 | dp = event->param.conn.private_data; |
643 | "0x%llx\n", &dp->dp_saddr, &dp->dp_daddr, | 736 | if (isv6) { |
737 | #if IS_ENABLED(CONFIG_IPV6) | ||
738 | dp_cmn = &dp->ricp_v6.dp_cmn; | ||
739 | saddr6 = &dp->ricp_v6.dp_saddr; | ||
740 | daddr6 = &dp->ricp_v6.dp_daddr; | ||
741 | /* If either address is link local, need to find the | ||
742 | * interface index in order to create a proper RDS | ||
743 | * connection. | ||
744 | */ | ||
745 | if (ipv6_addr_type(daddr6) & IPV6_ADDR_LINKLOCAL) { | ||
746 | /* Using init_net for now .. */ | ||
747 | ifindex = __rds_find_ifindex(&init_net, daddr6); | ||
748 | /* No index found... Need to bail out. */ | ||
749 | if (ifindex == 0) { | ||
750 | err = -EOPNOTSUPP; | ||
751 | goto out; | ||
752 | } | ||
753 | } else if (ipv6_addr_type(saddr6) & IPV6_ADDR_LINKLOCAL) { | ||
754 | /* Use our address to find the correct index. */ | ||
755 | ifindex = __rds_find_ifindex(&init_net, daddr6); | ||
756 | /* No index found... Need to bail out. */ | ||
757 | if (ifindex == 0) { | ||
758 | err = -EOPNOTSUPP; | ||
759 | goto out; | ||
760 | } | ||
761 | } | ||
762 | #else | ||
763 | err = -EOPNOTSUPP; | ||
764 | goto out; | ||
765 | #endif | ||
766 | } else { | ||
767 | dp_cmn = &dp->ricp_v4.dp_cmn; | ||
768 | ipv6_addr_set_v4mapped(dp->ricp_v4.dp_saddr, &s_mapped_addr); | ||
769 | ipv6_addr_set_v4mapped(dp->ricp_v4.dp_daddr, &d_mapped_addr); | ||
770 | saddr6 = &s_mapped_addr; | ||
771 | daddr6 = &d_mapped_addr; | ||
772 | } | ||
773 | |||
774 | rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid " | ||
775 | "0x%llx\n", saddr6, daddr6, | ||
644 | RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version), | 776 | RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version), |
645 | (unsigned long long)be64_to_cpu(lguid), | 777 | (unsigned long long)be64_to_cpu(lguid), |
646 | (unsigned long long)be64_to_cpu(fguid)); | 778 | (unsigned long long)be64_to_cpu(fguid)); |
647 | 779 | ||
648 | /* RDS/IB is not currently netns aware, thus init_net */ | 780 | /* RDS/IB is not currently netns aware, thus init_net */ |
649 | conn = rds_conn_create(&init_net, dp->dp_daddr, dp->dp_saddr, | 781 | conn = rds_conn_create(&init_net, daddr6, saddr6, |
650 | &rds_ib_transport, GFP_KERNEL); | 782 | &rds_ib_transport, GFP_KERNEL, ifindex); |
651 | if (IS_ERR(conn)) { | 783 | if (IS_ERR(conn)) { |
652 | rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn)); | 784 | rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn)); |
653 | conn = NULL; | 785 | conn = NULL; |
@@ -678,12 +810,13 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, | |||
678 | ic = conn->c_transport_data; | 810 | ic = conn->c_transport_data; |
679 | 811 | ||
680 | rds_ib_set_protocol(conn, version); | 812 | rds_ib_set_protocol(conn, version); |
681 | rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit)); | 813 | rds_ib_set_flow_control(conn, be32_to_cpu(dp_cmn->ricpc_credit)); |
682 | 814 | ||
683 | /* If the peer gave us the last packet it saw, process this as if | 815 | /* If the peer gave us the last packet it saw, process this as if |
684 | * we had received a regular ACK. */ | 816 | * we had received a regular ACK. */ |
685 | if (dp->dp_ack_seq) | 817 | if (dp_cmn->ricpc_ack_seq) |
686 | rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL); | 818 | rds_send_drop_acked(conn, be64_to_cpu(dp_cmn->ricpc_ack_seq), |
819 | NULL); | ||
687 | 820 | ||
688 | BUG_ON(cm_id->context); | 821 | BUG_ON(cm_id->context); |
689 | BUG_ON(ic->i_cm_id); | 822 | BUG_ON(ic->i_cm_id); |
@@ -702,8 +835,8 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, | |||
702 | } | 835 | } |
703 | 836 | ||
704 | rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version, | 837 | rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version, |
705 | event->param.conn.responder_resources, | 838 | event->param.conn.responder_resources, |
706 | event->param.conn.initiator_depth); | 839 | event->param.conn.initiator_depth, isv6); |
707 | 840 | ||
708 | /* rdma_accept() calls rdma_reject() internally if it fails */ | 841 | /* rdma_accept() calls rdma_reject() internally if it fails */ |
709 | if (rdma_accept(cm_id, &conn_param)) | 842 | if (rdma_accept(cm_id, &conn_param)) |
@@ -718,12 +851,12 @@ out: | |||
718 | } | 851 | } |
719 | 852 | ||
720 | 853 | ||
721 | int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id) | 854 | int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6) |
722 | { | 855 | { |
723 | struct rds_connection *conn = cm_id->context; | 856 | struct rds_connection *conn = cm_id->context; |
724 | struct rds_ib_connection *ic = conn->c_transport_data; | 857 | struct rds_ib_connection *ic = conn->c_transport_data; |
725 | struct rdma_conn_param conn_param; | 858 | struct rdma_conn_param conn_param; |
726 | struct rds_ib_connect_private dp; | 859 | union rds_ib_conn_priv dp; |
727 | int ret; | 860 | int ret; |
728 | 861 | ||
729 | /* If the peer doesn't do protocol negotiation, we must | 862 | /* If the peer doesn't do protocol negotiation, we must |
@@ -738,7 +871,7 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id) | |||
738 | } | 871 | } |
739 | 872 | ||
740 | rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION, | 873 | rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION, |
741 | UINT_MAX, UINT_MAX); | 874 | UINT_MAX, UINT_MAX, isv6); |
742 | ret = rdma_connect(cm_id, &conn_param); | 875 | ret = rdma_connect(cm_id, &conn_param); |
743 | if (ret) | 876 | if (ret) |
744 | rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret); | 877 | rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret); |
@@ -758,13 +891,22 @@ out: | |||
758 | int rds_ib_conn_path_connect(struct rds_conn_path *cp) | 891 | int rds_ib_conn_path_connect(struct rds_conn_path *cp) |
759 | { | 892 | { |
760 | struct rds_connection *conn = cp->cp_conn; | 893 | struct rds_connection *conn = cp->cp_conn; |
761 | struct rds_ib_connection *ic = conn->c_transport_data; | 894 | struct sockaddr_storage src, dest; |
762 | struct sockaddr_in src, dest; | 895 | rdma_cm_event_handler handler; |
896 | struct rds_ib_connection *ic; | ||
763 | int ret; | 897 | int ret; |
764 | 898 | ||
899 | ic = conn->c_transport_data; | ||
900 | |||
765 | /* XXX I wonder what affect the port space has */ | 901 | /* XXX I wonder what affect the port space has */ |
766 | /* delegate cm event handler to rdma_transport */ | 902 | /* delegate cm event handler to rdma_transport */ |
767 | ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn, | 903 | #if IS_ENABLED(CONFIG_IPV6) |
904 | if (conn->c_isv6) | ||
905 | handler = rds6_rdma_cm_event_handler; | ||
906 | else | ||
907 | #endif | ||
908 | handler = rds_rdma_cm_event_handler; | ||
909 | ic->i_cm_id = rdma_create_id(&init_net, handler, conn, | ||
768 | RDMA_PS_TCP, IB_QPT_RC); | 910 | RDMA_PS_TCP, IB_QPT_RC); |
769 | if (IS_ERR(ic->i_cm_id)) { | 911 | if (IS_ERR(ic->i_cm_id)) { |
770 | ret = PTR_ERR(ic->i_cm_id); | 912 | ret = PTR_ERR(ic->i_cm_id); |
@@ -775,13 +917,33 @@ int rds_ib_conn_path_connect(struct rds_conn_path *cp) | |||
775 | 917 | ||
776 | rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn); | 918 | rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn); |
777 | 919 | ||
778 | src.sin_family = AF_INET; | 920 | if (ipv6_addr_v4mapped(&conn->c_faddr)) { |
779 | src.sin_addr.s_addr = (__force u32)conn->c_laddr; | 921 | struct sockaddr_in *sin; |
780 | src.sin_port = (__force u16)htons(0); | 922 | |
923 | sin = (struct sockaddr_in *)&src; | ||
924 | sin->sin_family = AF_INET; | ||
925 | sin->sin_addr.s_addr = conn->c_laddr.s6_addr32[3]; | ||
926 | sin->sin_port = 0; | ||
781 | 927 | ||
782 | dest.sin_family = AF_INET; | 928 | sin = (struct sockaddr_in *)&dest; |
783 | dest.sin_addr.s_addr = (__force u32)conn->c_faddr; | 929 | sin->sin_family = AF_INET; |
784 | dest.sin_port = (__force u16)htons(RDS_PORT); | 930 | sin->sin_addr.s_addr = conn->c_faddr.s6_addr32[3]; |
931 | sin->sin_port = htons(RDS_PORT); | ||
932 | } else { | ||
933 | struct sockaddr_in6 *sin6; | ||
934 | |||
935 | sin6 = (struct sockaddr_in6 *)&src; | ||
936 | sin6->sin6_family = AF_INET6; | ||
937 | sin6->sin6_addr = conn->c_laddr; | ||
938 | sin6->sin6_port = 0; | ||
939 | sin6->sin6_scope_id = conn->c_dev_if; | ||
940 | |||
941 | sin6 = (struct sockaddr_in6 *)&dest; | ||
942 | sin6->sin6_family = AF_INET6; | ||
943 | sin6->sin6_addr = conn->c_faddr; | ||
944 | sin6->sin6_port = htons(RDS_CM_PORT); | ||
945 | sin6->sin6_scope_id = conn->c_dev_if; | ||
946 | } | ||
785 | 947 | ||
786 | ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src, | 948 | ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src, |
787 | (struct sockaddr *)&dest, | 949 | (struct sockaddr *)&dest, |
@@ -949,7 +1111,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) | |||
949 | if (!ic) | 1111 | if (!ic) |
950 | return -ENOMEM; | 1112 | return -ENOMEM; |
951 | 1113 | ||
952 | ret = rds_ib_recv_alloc_caches(ic); | 1114 | ret = rds_ib_recv_alloc_caches(ic, gfp); |
953 | if (ret) { | 1115 | if (ret) { |
954 | kfree(ic); | 1116 | kfree(ic); |
955 | return ret; | 1117 | return ret; |
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index b371cf08b1fc..6431a023ac89 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c | |||
@@ -61,6 +61,7 @@ static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev, | |||
61 | pool->fmr_attr.max_pages); | 61 | pool->fmr_attr.max_pages); |
62 | if (IS_ERR(frmr->mr)) { | 62 | if (IS_ERR(frmr->mr)) { |
63 | pr_warn("RDS/IB: %s failed to allocate MR", __func__); | 63 | pr_warn("RDS/IB: %s failed to allocate MR", __func__); |
64 | err = PTR_ERR(frmr->mr); | ||
64 | goto out_no_cigar; | 65 | goto out_no_cigar; |
65 | } | 66 | } |
66 | 67 | ||
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h index 655f01d427fe..5da12c248431 100644 --- a/net/rds/ib_mr.h +++ b/net/rds/ib_mr.h | |||
@@ -113,6 +113,8 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev, | |||
113 | int npages); | 113 | int npages); |
114 | void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, | 114 | void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, |
115 | struct rds_info_rdma_connection *iinfo); | 115 | struct rds_info_rdma_connection *iinfo); |
116 | void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev, | ||
117 | struct rds6_info_rdma_connection *iinfo6); | ||
116 | void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); | 118 | void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); |
117 | void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, | 119 | void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, |
118 | struct rds_sock *rs, u32 *key_ret, | 120 | struct rds_sock *rs, u32 *key_ret, |
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 2e49a40a5e11..63c8d107adcf 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | 2 | * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -100,18 +100,19 @@ static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) | |||
100 | kfree_rcu(to_free, rcu); | 100 | kfree_rcu(to_free, rcu); |
101 | } | 101 | } |
102 | 102 | ||
103 | int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) | 103 | int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, |
104 | struct in6_addr *ipaddr) | ||
104 | { | 105 | { |
105 | struct rds_ib_device *rds_ibdev_old; | 106 | struct rds_ib_device *rds_ibdev_old; |
106 | 107 | ||
107 | rds_ibdev_old = rds_ib_get_device(ipaddr); | 108 | rds_ibdev_old = rds_ib_get_device(ipaddr->s6_addr32[3]); |
108 | if (!rds_ibdev_old) | 109 | if (!rds_ibdev_old) |
109 | return rds_ib_add_ipaddr(rds_ibdev, ipaddr); | 110 | return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]); |
110 | 111 | ||
111 | if (rds_ibdev_old != rds_ibdev) { | 112 | if (rds_ibdev_old != rds_ibdev) { |
112 | rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr); | 113 | rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr->s6_addr32[3]); |
113 | rds_ib_dev_put(rds_ibdev_old); | 114 | rds_ib_dev_put(rds_ibdev_old); |
114 | return rds_ib_add_ipaddr(rds_ibdev, ipaddr); | 115 | return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]); |
115 | } | 116 | } |
116 | rds_ib_dev_put(rds_ibdev_old); | 117 | rds_ib_dev_put(rds_ibdev_old); |
117 | 118 | ||
@@ -179,6 +180,17 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_co | |||
179 | iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages; | 180 | iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages; |
180 | } | 181 | } |
181 | 182 | ||
183 | #if IS_ENABLED(CONFIG_IPV6) | ||
184 | void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev, | ||
185 | struct rds6_info_rdma_connection *iinfo6) | ||
186 | { | ||
187 | struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool; | ||
188 | |||
189 | iinfo6->rdma_mr_max = pool_1m->max_items; | ||
190 | iinfo6->rdma_mr_size = pool_1m->fmr_attr.max_pages; | ||
191 | } | ||
192 | #endif | ||
193 | |||
182 | struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool) | 194 | struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool) |
183 | { | 195 | { |
184 | struct rds_ib_mr *ibmr = NULL; | 196 | struct rds_ib_mr *ibmr = NULL; |
@@ -545,7 +557,7 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, | |||
545 | struct rds_ib_connection *ic = NULL; | 557 | struct rds_ib_connection *ic = NULL; |
546 | int ret; | 558 | int ret; |
547 | 559 | ||
548 | rds_ibdev = rds_ib_get_device(rs->rs_bound_addr); | 560 | rds_ibdev = rds_ib_get_device(rs->rs_bound_addr.s6_addr32[3]); |
549 | if (!rds_ibdev) { | 561 | if (!rds_ibdev) { |
550 | ret = -ENODEV; | 562 | ret = -ENODEV; |
551 | goto out; | 563 | goto out; |
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 4c5a937304b2..2f16146e4ec9 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | 2 | * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -98,12 +98,12 @@ static void rds_ib_cache_xfer_to_ready(struct rds_ib_refill_cache *cache) | |||
98 | } | 98 | } |
99 | } | 99 | } |
100 | 100 | ||
101 | static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache) | 101 | static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache, gfp_t gfp) |
102 | { | 102 | { |
103 | struct rds_ib_cache_head *head; | 103 | struct rds_ib_cache_head *head; |
104 | int cpu; | 104 | int cpu; |
105 | 105 | ||
106 | cache->percpu = alloc_percpu(struct rds_ib_cache_head); | 106 | cache->percpu = alloc_percpu_gfp(struct rds_ib_cache_head, gfp); |
107 | if (!cache->percpu) | 107 | if (!cache->percpu) |
108 | return -ENOMEM; | 108 | return -ENOMEM; |
109 | 109 | ||
@@ -118,13 +118,13 @@ static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache) | |||
118 | return 0; | 118 | return 0; |
119 | } | 119 | } |
120 | 120 | ||
121 | int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic) | 121 | int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic, gfp_t gfp) |
122 | { | 122 | { |
123 | int ret; | 123 | int ret; |
124 | 124 | ||
125 | ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs); | 125 | ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs, gfp); |
126 | if (!ret) { | 126 | if (!ret) { |
127 | ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags); | 127 | ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags, gfp); |
128 | if (ret) | 128 | if (ret) |
129 | free_percpu(ic->i_cache_incs.percpu); | 129 | free_percpu(ic->i_cache_incs.percpu); |
130 | } | 130 | } |
@@ -266,7 +266,7 @@ static struct rds_ib_incoming *rds_ib_refill_one_inc(struct rds_ib_connection *i | |||
266 | rds_ib_stats_inc(s_ib_rx_total_incs); | 266 | rds_ib_stats_inc(s_ib_rx_total_incs); |
267 | } | 267 | } |
268 | INIT_LIST_HEAD(&ibinc->ii_frags); | 268 | INIT_LIST_HEAD(&ibinc->ii_frags); |
269 | rds_inc_init(&ibinc->ii_inc, ic->conn, ic->conn->c_faddr); | 269 | rds_inc_init(&ibinc->ii_inc, ic->conn, &ic->conn->c_faddr); |
270 | 270 | ||
271 | return ibinc; | 271 | return ibinc; |
272 | } | 272 | } |
@@ -376,8 +376,6 @@ static void release_refill(struct rds_connection *conn) | |||
376 | * This tries to allocate and post unused work requests after making sure that | 376 | * This tries to allocate and post unused work requests after making sure that |
377 | * they have all the allocations they need to queue received fragments into | 377 | * they have all the allocations they need to queue received fragments into |
378 | * sockets. | 378 | * sockets. |
379 | * | ||
380 | * -1 is returned if posting fails due to temporary resource exhaustion. | ||
381 | */ | 379 | */ |
382 | void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) | 380 | void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) |
383 | { | 381 | { |
@@ -419,7 +417,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) | |||
419 | ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, NULL); | 417 | ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, NULL); |
420 | if (ret) { | 418 | if (ret) { |
421 | rds_ib_conn_error(conn, "recv post on " | 419 | rds_ib_conn_error(conn, "recv post on " |
422 | "%pI4 returned %d, disconnecting and " | 420 | "%pI6c returned %d, disconnecting and " |
423 | "reconnecting\n", &conn->c_faddr, | 421 | "reconnecting\n", &conn->c_faddr, |
424 | ret); | 422 | ret); |
425 | break; | 423 | break; |
@@ -848,7 +846,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, | |||
848 | 846 | ||
849 | if (data_len < sizeof(struct rds_header)) { | 847 | if (data_len < sizeof(struct rds_header)) { |
850 | rds_ib_conn_error(conn, "incoming message " | 848 | rds_ib_conn_error(conn, "incoming message " |
851 | "from %pI4 didn't include a " | 849 | "from %pI6c didn't include a " |
852 | "header, disconnecting and " | 850 | "header, disconnecting and " |
853 | "reconnecting\n", | 851 | "reconnecting\n", |
854 | &conn->c_faddr); | 852 | &conn->c_faddr); |
@@ -861,7 +859,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, | |||
861 | /* Validate the checksum. */ | 859 | /* Validate the checksum. */ |
862 | if (!rds_message_verify_checksum(ihdr)) { | 860 | if (!rds_message_verify_checksum(ihdr)) { |
863 | rds_ib_conn_error(conn, "incoming message " | 861 | rds_ib_conn_error(conn, "incoming message " |
864 | "from %pI4 has corrupted header - " | 862 | "from %pI6c has corrupted header - " |
865 | "forcing a reconnect\n", | 863 | "forcing a reconnect\n", |
866 | &conn->c_faddr); | 864 | &conn->c_faddr); |
867 | rds_stats_inc(s_recv_drop_bad_checksum); | 865 | rds_stats_inc(s_recv_drop_bad_checksum); |
@@ -941,10 +939,10 @@ static void rds_ib_process_recv(struct rds_connection *conn, | |||
941 | ic->i_recv_data_rem = 0; | 939 | ic->i_recv_data_rem = 0; |
942 | ic->i_ibinc = NULL; | 940 | ic->i_ibinc = NULL; |
943 | 941 | ||
944 | if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) | 942 | if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) { |
945 | rds_ib_cong_recv(conn, ibinc); | 943 | rds_ib_cong_recv(conn, ibinc); |
946 | else { | 944 | } else { |
947 | rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr, | 945 | rds_recv_incoming(conn, &conn->c_faddr, &conn->c_laddr, |
948 | &ibinc->ii_inc, GFP_ATOMIC); | 946 | &ibinc->ii_inc, GFP_ATOMIC); |
949 | state->ack_next = be64_to_cpu(hdr->h_sequence); | 947 | state->ack_next = be64_to_cpu(hdr->h_sequence); |
950 | state->ack_next_valid = 1; | 948 | state->ack_next_valid = 1; |
@@ -988,7 +986,7 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, | |||
988 | } else { | 986 | } else { |
989 | /* We expect errors as the qp is drained during shutdown */ | 987 | /* We expect errors as the qp is drained during shutdown */ |
990 | if (rds_conn_up(conn) || rds_conn_connecting(conn)) | 988 | if (rds_conn_up(conn) || rds_conn_connecting(conn)) |
991 | rds_ib_conn_error(conn, "recv completion on <%pI4,%pI4> had status %u (%s), disconnecting and reconnecting\n", | 989 | rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n", |
992 | &conn->c_laddr, &conn->c_faddr, | 990 | &conn->c_laddr, &conn->c_faddr, |
993 | wc->status, | 991 | wc->status, |
994 | ib_wc_status_msg(wc->status)); | 992 | ib_wc_status_msg(wc->status)); |
@@ -1023,7 +1021,6 @@ int rds_ib_recv_path(struct rds_conn_path *cp) | |||
1023 | { | 1021 | { |
1024 | struct rds_connection *conn = cp->cp_conn; | 1022 | struct rds_connection *conn = cp->cp_conn; |
1025 | struct rds_ib_connection *ic = conn->c_transport_data; | 1023 | struct rds_ib_connection *ic = conn->c_transport_data; |
1026 | int ret = 0; | ||
1027 | 1024 | ||
1028 | rdsdebug("conn %p\n", conn); | 1025 | rdsdebug("conn %p\n", conn); |
1029 | if (rds_conn_up(conn)) { | 1026 | if (rds_conn_up(conn)) { |
@@ -1032,7 +1029,7 @@ int rds_ib_recv_path(struct rds_conn_path *cp) | |||
1032 | rds_ib_stats_inc(s_ib_rx_refill_from_thread); | 1029 | rds_ib_stats_inc(s_ib_rx_refill_from_thread); |
1033 | } | 1030 | } |
1034 | 1031 | ||
1035 | return ret; | 1032 | return 0; |
1036 | } | 1033 | } |
1037 | 1034 | ||
1038 | int rds_ib_recv_init(void) | 1035 | int rds_ib_recv_init(void) |
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 8ac80c1b051e..2dcb555e6350 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | 2 | * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -305,7 +305,7 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) | |||
305 | 305 | ||
306 | /* We expect errors as the qp is drained during shutdown */ | 306 | /* We expect errors as the qp is drained during shutdown */ |
307 | if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) { | 307 | if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) { |
308 | rds_ib_conn_error(conn, "send completion on <%pI4,%pI4> had status %u (%s), disconnecting and reconnecting\n", | 308 | rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n", |
309 | &conn->c_laddr, &conn->c_faddr, wc->status, | 309 | &conn->c_laddr, &conn->c_faddr, wc->status, |
310 | ib_wc_status_msg(wc->status)); | 310 | ib_wc_status_msg(wc->status)); |
311 | } | 311 | } |
@@ -730,7 +730,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, | |||
730 | first, &first->s_wr, ret, failed_wr); | 730 | first, &first->s_wr, ret, failed_wr); |
731 | BUG_ON(failed_wr != &first->s_wr); | 731 | BUG_ON(failed_wr != &first->s_wr); |
732 | if (ret) { | 732 | if (ret) { |
733 | printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 " | 733 | printk(KERN_WARNING "RDS/IB: ib_post_send to %pI6c " |
734 | "returned %d\n", &conn->c_faddr, ret); | 734 | "returned %d\n", &conn->c_faddr, ret); |
735 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); | 735 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); |
736 | rds_ib_sub_signaled(ic, nr_sig); | 736 | rds_ib_sub_signaled(ic, nr_sig); |
@@ -759,14 +759,11 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) | |||
759 | struct rds_ib_connection *ic = conn->c_transport_data; | 759 | struct rds_ib_connection *ic = conn->c_transport_data; |
760 | struct rds_ib_send_work *send = NULL; | 760 | struct rds_ib_send_work *send = NULL; |
761 | const struct ib_send_wr *failed_wr; | 761 | const struct ib_send_wr *failed_wr; |
762 | struct rds_ib_device *rds_ibdev; | ||
763 | u32 pos; | 762 | u32 pos; |
764 | u32 work_alloc; | 763 | u32 work_alloc; |
765 | int ret; | 764 | int ret; |
766 | int nr_sig = 0; | 765 | int nr_sig = 0; |
767 | 766 | ||
768 | rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); | ||
769 | |||
770 | work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos); | 767 | work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos); |
771 | if (work_alloc != 1) { | 768 | if (work_alloc != 1) { |
772 | rds_ib_stats_inc(s_ib_tx_ring_full); | 769 | rds_ib_stats_inc(s_ib_tx_ring_full); |
@@ -827,7 +824,7 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) | |||
827 | send, &send->s_atomic_wr, ret, failed_wr); | 824 | send, &send->s_atomic_wr, ret, failed_wr); |
828 | BUG_ON(failed_wr != &send->s_atomic_wr.wr); | 825 | BUG_ON(failed_wr != &send->s_atomic_wr.wr); |
829 | if (ret) { | 826 | if (ret) { |
830 | printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 " | 827 | printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI6c " |
831 | "returned %d\n", &conn->c_faddr, ret); | 828 | "returned %d\n", &conn->c_faddr, ret); |
832 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); | 829 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); |
833 | rds_ib_sub_signaled(ic, nr_sig); | 830 | rds_ib_sub_signaled(ic, nr_sig); |
@@ -967,7 +964,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op) | |||
967 | first, &first->s_rdma_wr.wr, ret, failed_wr); | 964 | first, &first->s_rdma_wr.wr, ret, failed_wr); |
968 | BUG_ON(failed_wr != &first->s_rdma_wr.wr); | 965 | BUG_ON(failed_wr != &first->s_rdma_wr.wr); |
969 | if (ret) { | 966 | if (ret) { |
970 | printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 " | 967 | printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI6c " |
971 | "returned %d\n", &conn->c_faddr, ret); | 968 | "returned %d\n", &conn->c_faddr, ret); |
972 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); | 969 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); |
973 | rds_ib_sub_signaled(ic, nr_sig); | 970 | rds_ib_sub_signaled(ic, nr_sig); |
diff --git a/net/rds/loop.c b/net/rds/loop.c index feea1f96ee2a..1d73ad79c847 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | 2 | * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/in.h> | 35 | #include <linux/in.h> |
36 | #include <net/net_namespace.h> | 36 | #include <net/net_namespace.h> |
37 | #include <net/netns/generic.h> | 37 | #include <net/netns/generic.h> |
38 | #include <linux/ipv6.h> | ||
38 | 39 | ||
39 | #include "rds_single_path.h" | 40 | #include "rds_single_path.h" |
40 | #include "rds.h" | 41 | #include "rds.h" |
@@ -88,11 +89,11 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm, | |||
88 | 89 | ||
89 | BUG_ON(hdr_off || sg || off); | 90 | BUG_ON(hdr_off || sg || off); |
90 | 91 | ||
91 | rds_inc_init(&rm->m_inc, conn, conn->c_laddr); | 92 | rds_inc_init(&rm->m_inc, conn, &conn->c_laddr); |
92 | /* For the embedded inc. Matching put is in loop_inc_free() */ | 93 | /* For the embedded inc. Matching put is in loop_inc_free() */ |
93 | rds_message_addref(rm); | 94 | rds_message_addref(rm); |
94 | 95 | ||
95 | rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc, | 96 | rds_recv_incoming(conn, &conn->c_laddr, &conn->c_faddr, &rm->m_inc, |
96 | GFP_KERNEL); | 97 | GFP_KERNEL); |
97 | 98 | ||
98 | rds_send_drop_acked(conn, be64_to_cpu(rm->m_inc.i_hdr.h_sequence), | 99 | rds_send_drop_acked(conn, be64_to_cpu(rm->m_inc.i_hdr.h_sequence), |
diff --git a/net/rds/message.c b/net/rds/message.c index a35f76971984..4b00b1152a5f 100644 --- a/net/rds/message.c +++ b/net/rds/message.c | |||
@@ -514,4 +514,3 @@ void rds_message_unmapped(struct rds_message *rm) | |||
514 | wake_up_interruptible(&rm->m_flush_wait); | 514 | wake_up_interruptible(&rm->m_flush_wait); |
515 | } | 515 | } |
516 | EXPORT_SYMBOL_GPL(rds_message_unmapped); | 516 | EXPORT_SYMBOL_GPL(rds_message_unmapped); |
517 | |||
diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 80920e47f2c7..98237feb607a 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2007 Oracle. All rights reserved. | 2 | * Copyright (c) 2007, 2017 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -184,7 +184,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, | |||
184 | long i; | 184 | long i; |
185 | int ret; | 185 | int ret; |
186 | 186 | ||
187 | if (rs->rs_bound_addr == 0 || !rs->rs_transport) { | 187 | if (ipv6_addr_any(&rs->rs_bound_addr) || !rs->rs_transport) { |
188 | ret = -ENOTCONN; /* XXX not a great errno */ | 188 | ret = -ENOTCONN; /* XXX not a great errno */ |
189 | goto out; | 189 | goto out; |
190 | } | 190 | } |
@@ -576,7 +576,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, | |||
576 | 576 | ||
577 | args = CMSG_DATA(cmsg); | 577 | args = CMSG_DATA(cmsg); |
578 | 578 | ||
579 | if (rs->rs_bound_addr == 0) { | 579 | if (ipv6_addr_any(&rs->rs_bound_addr)) { |
580 | ret = -ENOTCONN; /* XXX not a great errno */ | 580 | ret = -ENOTCONN; /* XXX not a great errno */ |
581 | goto out_ret; | 581 | goto out_ret; |
582 | } | 582 | } |
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index fc59821f0a27..6b0f57c83a2a 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2009 Oracle. All rights reserved. | 2 | * Copyright (c) 2009, 2018 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -37,10 +37,15 @@ | |||
37 | #include "rdma_transport.h" | 37 | #include "rdma_transport.h" |
38 | #include "ib.h" | 38 | #include "ib.h" |
39 | 39 | ||
40 | /* Global IPv4 and IPv6 RDS RDMA listener cm_id */ | ||
40 | static struct rdma_cm_id *rds_rdma_listen_id; | 41 | static struct rdma_cm_id *rds_rdma_listen_id; |
42 | #if IS_ENABLED(CONFIG_IPV6) | ||
43 | static struct rdma_cm_id *rds6_rdma_listen_id; | ||
44 | #endif | ||
41 | 45 | ||
42 | int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, | 46 | static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, |
43 | struct rdma_cm_event *event) | 47 | struct rdma_cm_event *event, |
48 | bool isv6) | ||
44 | { | 49 | { |
45 | /* this can be null in the listening path */ | 50 | /* this can be null in the listening path */ |
46 | struct rds_connection *conn = cm_id->context; | 51 | struct rds_connection *conn = cm_id->context; |
@@ -72,7 +77,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, | |||
72 | 77 | ||
73 | switch (event->event) { | 78 | switch (event->event) { |
74 | case RDMA_CM_EVENT_CONNECT_REQUEST: | 79 | case RDMA_CM_EVENT_CONNECT_REQUEST: |
75 | ret = trans->cm_handle_connect(cm_id, event); | 80 | ret = trans->cm_handle_connect(cm_id, event, isv6); |
76 | break; | 81 | break; |
77 | 82 | ||
78 | case RDMA_CM_EVENT_ADDR_RESOLVED: | 83 | case RDMA_CM_EVENT_ADDR_RESOLVED: |
@@ -90,7 +95,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, | |||
90 | 95 | ||
91 | ibic = conn->c_transport_data; | 96 | ibic = conn->c_transport_data; |
92 | if (ibic && ibic->i_cm_id == cm_id) | 97 | if (ibic && ibic->i_cm_id == cm_id) |
93 | ret = trans->cm_initiate_connect(cm_id); | 98 | ret = trans->cm_initiate_connect(cm_id, isv6); |
94 | else | 99 | else |
95 | rds_conn_drop(conn); | 100 | rds_conn_drop(conn); |
96 | } | 101 | } |
@@ -116,14 +121,14 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, | |||
116 | 121 | ||
117 | case RDMA_CM_EVENT_DISCONNECTED: | 122 | case RDMA_CM_EVENT_DISCONNECTED: |
118 | rdsdebug("DISCONNECT event - dropping connection " | 123 | rdsdebug("DISCONNECT event - dropping connection " |
119 | "%pI4->%pI4\n", &conn->c_laddr, | 124 | "%pI6c->%pI6c\n", &conn->c_laddr, |
120 | &conn->c_faddr); | 125 | &conn->c_faddr); |
121 | rds_conn_drop(conn); | 126 | rds_conn_drop(conn); |
122 | break; | 127 | break; |
123 | 128 | ||
124 | case RDMA_CM_EVENT_TIMEWAIT_EXIT: | 129 | case RDMA_CM_EVENT_TIMEWAIT_EXIT: |
125 | if (conn) { | 130 | if (conn) { |
126 | pr_info("RDS: RDMA_CM_EVENT_TIMEWAIT_EXIT event: dropping connection %pI4->%pI4\n", | 131 | pr_info("RDS: RDMA_CM_EVENT_TIMEWAIT_EXIT event: dropping connection %pI6c->%pI6c\n", |
127 | &conn->c_laddr, &conn->c_faddr); | 132 | &conn->c_laddr, &conn->c_faddr); |
128 | rds_conn_drop(conn); | 133 | rds_conn_drop(conn); |
129 | } | 134 | } |
@@ -146,13 +151,28 @@ out: | |||
146 | return ret; | 151 | return ret; |
147 | } | 152 | } |
148 | 153 | ||
149 | static int rds_rdma_listen_init(void) | 154 | int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, |
155 | struct rdma_cm_event *event) | ||
156 | { | ||
157 | return rds_rdma_cm_event_handler_cmn(cm_id, event, false); | ||
158 | } | ||
159 | |||
160 | #if IS_ENABLED(CONFIG_IPV6) | ||
161 | int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id, | ||
162 | struct rdma_cm_event *event) | ||
163 | { | ||
164 | return rds_rdma_cm_event_handler_cmn(cm_id, event, true); | ||
165 | } | ||
166 | #endif | ||
167 | |||
168 | static int rds_rdma_listen_init_common(rdma_cm_event_handler handler, | ||
169 | struct sockaddr *sa, | ||
170 | struct rdma_cm_id **ret_cm_id) | ||
150 | { | 171 | { |
151 | struct sockaddr_in sin; | ||
152 | struct rdma_cm_id *cm_id; | 172 | struct rdma_cm_id *cm_id; |
153 | int ret; | 173 | int ret; |
154 | 174 | ||
155 | cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, NULL, | 175 | cm_id = rdma_create_id(&init_net, handler, NULL, |
156 | RDMA_PS_TCP, IB_QPT_RC); | 176 | RDMA_PS_TCP, IB_QPT_RC); |
157 | if (IS_ERR(cm_id)) { | 177 | if (IS_ERR(cm_id)) { |
158 | ret = PTR_ERR(cm_id); | 178 | ret = PTR_ERR(cm_id); |
@@ -161,15 +181,11 @@ static int rds_rdma_listen_init(void) | |||
161 | return ret; | 181 | return ret; |
162 | } | 182 | } |
163 | 183 | ||
164 | sin.sin_family = AF_INET; | ||
165 | sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY); | ||
166 | sin.sin_port = (__force u16)htons(RDS_PORT); | ||
167 | |||
168 | /* | 184 | /* |
169 | * XXX I bet this binds the cm_id to a device. If we want to support | 185 | * XXX I bet this binds the cm_id to a device. If we want to support |
170 | * fail-over we'll have to take this into consideration. | 186 | * fail-over we'll have to take this into consideration. |
171 | */ | 187 | */ |
172 | ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); | 188 | ret = rdma_bind_addr(cm_id, sa); |
173 | if (ret) { | 189 | if (ret) { |
174 | printk(KERN_ERR "RDS/RDMA: failed to setup listener, " | 190 | printk(KERN_ERR "RDS/RDMA: failed to setup listener, " |
175 | "rdma_bind_addr() returned %d\n", ret); | 191 | "rdma_bind_addr() returned %d\n", ret); |
@@ -185,7 +201,7 @@ static int rds_rdma_listen_init(void) | |||
185 | 201 | ||
186 | rdsdebug("cm %p listening on port %u\n", cm_id, RDS_PORT); | 202 | rdsdebug("cm %p listening on port %u\n", cm_id, RDS_PORT); |
187 | 203 | ||
188 | rds_rdma_listen_id = cm_id; | 204 | *ret_cm_id = cm_id; |
189 | cm_id = NULL; | 205 | cm_id = NULL; |
190 | out: | 206 | out: |
191 | if (cm_id) | 207 | if (cm_id) |
@@ -193,6 +209,45 @@ out: | |||
193 | return ret; | 209 | return ret; |
194 | } | 210 | } |
195 | 211 | ||
212 | /* Initialize the RDS RDMA listeners. We create two listeners for | ||
213 | * compatibility reason. The one on RDS_PORT is used for IPv4 | ||
214 | * requests only. The one on RDS_CM_PORT is used for IPv6 requests | ||
215 | * only. So only IPv6 enabled RDS module will communicate using this | ||
216 | * port. | ||
217 | */ | ||
218 | static int rds_rdma_listen_init(void) | ||
219 | { | ||
220 | int ret; | ||
221 | #if IS_ENABLED(CONFIG_IPV6) | ||
222 | struct sockaddr_in6 sin6; | ||
223 | #endif | ||
224 | struct sockaddr_in sin; | ||
225 | |||
226 | sin.sin_family = PF_INET; | ||
227 | sin.sin_addr.s_addr = htonl(INADDR_ANY); | ||
228 | sin.sin_port = htons(RDS_PORT); | ||
229 | ret = rds_rdma_listen_init_common(rds_rdma_cm_event_handler, | ||
230 | (struct sockaddr *)&sin, | ||
231 | &rds_rdma_listen_id); | ||
232 | if (ret != 0) | ||
233 | return ret; | ||
234 | |||
235 | #if IS_ENABLED(CONFIG_IPV6) | ||
236 | sin6.sin6_family = PF_INET6; | ||
237 | sin6.sin6_addr = in6addr_any; | ||
238 | sin6.sin6_port = htons(RDS_CM_PORT); | ||
239 | sin6.sin6_scope_id = 0; | ||
240 | sin6.sin6_flowinfo = 0; | ||
241 | ret = rds_rdma_listen_init_common(rds6_rdma_cm_event_handler, | ||
242 | (struct sockaddr *)&sin6, | ||
243 | &rds6_rdma_listen_id); | ||
244 | /* Keep going even when IPv6 is not enabled in the system. */ | ||
245 | if (ret != 0) | ||
246 | rdsdebug("Cannot set up IPv6 RDMA listener\n"); | ||
247 | #endif | ||
248 | return 0; | ||
249 | } | ||
250 | |||
196 | static void rds_rdma_listen_stop(void) | 251 | static void rds_rdma_listen_stop(void) |
197 | { | 252 | { |
198 | if (rds_rdma_listen_id) { | 253 | if (rds_rdma_listen_id) { |
@@ -200,6 +255,13 @@ static void rds_rdma_listen_stop(void) | |||
200 | rdma_destroy_id(rds_rdma_listen_id); | 255 | rdma_destroy_id(rds_rdma_listen_id); |
201 | rds_rdma_listen_id = NULL; | 256 | rds_rdma_listen_id = NULL; |
202 | } | 257 | } |
258 | #if IS_ENABLED(CONFIG_IPV6) | ||
259 | if (rds6_rdma_listen_id) { | ||
260 | rdsdebug("cm %p\n", rds6_rdma_listen_id); | ||
261 | rdma_destroy_id(rds6_rdma_listen_id); | ||
262 | rds6_rdma_listen_id = NULL; | ||
263 | } | ||
264 | #endif | ||
203 | } | 265 | } |
204 | 266 | ||
205 | static int rds_rdma_init(void) | 267 | static int rds_rdma_init(void) |
@@ -229,4 +291,3 @@ module_exit(rds_rdma_exit); | |||
229 | MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>"); | 291 | MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>"); |
230 | MODULE_DESCRIPTION("RDS: IB transport"); | 292 | MODULE_DESCRIPTION("RDS: IB transport"); |
231 | MODULE_LICENSE("Dual BSD/GPL"); | 293 | MODULE_LICENSE("Dual BSD/GPL"); |
232 | |||
diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h index d309c4430124..200d3134aaae 100644 --- a/net/rds/rdma_transport.h +++ b/net/rds/rdma_transport.h | |||
@@ -6,11 +6,16 @@ | |||
6 | #include <rdma/rdma_cm.h> | 6 | #include <rdma/rdma_cm.h> |
7 | #include "rds.h" | 7 | #include "rds.h" |
8 | 8 | ||
9 | /* RDMA_CM also uses 16385 as the listener port. */ | ||
10 | #define RDS_CM_PORT 16385 | ||
11 | |||
9 | #define RDS_RDMA_RESOLVE_TIMEOUT_MS 5000 | 12 | #define RDS_RDMA_RESOLVE_TIMEOUT_MS 5000 |
10 | 13 | ||
11 | int rds_rdma_conn_connect(struct rds_connection *conn); | 14 | int rds_rdma_conn_connect(struct rds_connection *conn); |
12 | int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, | 15 | int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, |
13 | struct rdma_cm_event *event); | 16 | struct rdma_cm_event *event); |
17 | int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id, | ||
18 | struct rdma_cm_event *event); | ||
14 | 19 | ||
15 | /* from ib.c */ | 20 | /* from ib.c */ |
16 | extern struct rds_transport rds_ib_transport; | 21 | extern struct rds_transport rds_ib_transport; |
diff --git a/net/rds/rds.h b/net/rds/rds.h index 60b3b787fbdb..c4dcf654d8fe 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/rds.h> | 10 | #include <linux/rds.h> |
11 | #include <linux/rhashtable.h> | 11 | #include <linux/rhashtable.h> |
12 | #include <linux/refcount.h> | 12 | #include <linux/refcount.h> |
13 | #include <linux/in6.h> | ||
13 | 14 | ||
14 | #include "info.h" | 15 | #include "info.h" |
15 | 16 | ||
@@ -23,11 +24,13 @@ | |||
23 | #define RDS_PROTOCOL_MINOR(v) ((v) & 255) | 24 | #define RDS_PROTOCOL_MINOR(v) ((v) & 255) |
24 | #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) | 25 | #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) |
25 | 26 | ||
26 | /* | 27 | /* The following ports, 16385, 18634, 18635, are registered with IANA as |
27 | * XXX randomly chosen, but at least seems to be unused: | 28 | * the ports to be used for RDS over TCP and UDP. Currently, only RDS over |
28 | * # 18464-18768 Unassigned | 29 | * TCP and RDS over IB/RDMA are implemented. 18634 is the historical value |
29 | * We should do better. We want a reserved port to discourage unpriv'ed | 30 | * used for the RDMA_CM listener port. RDS/TCP uses port 16385. After |
30 | * userspace from listening. | 31 | * IPv6 work, RDMA_CM also uses 16385 as the listener port. 18634 is kept |
32 | * to ensure compatibility with older RDS modules. Those ports are defined | ||
33 | * in each transport's header file. | ||
31 | */ | 34 | */ |
32 | #define RDS_PORT 18634 | 35 | #define RDS_PORT 18634 |
33 | 36 | ||
@@ -61,7 +64,7 @@ void rdsdebug(char *fmt, ...) | |||
61 | 64 | ||
62 | struct rds_cong_map { | 65 | struct rds_cong_map { |
63 | struct rb_node m_rb_node; | 66 | struct rb_node m_rb_node; |
64 | __be32 m_addr; | 67 | struct in6_addr m_addr; |
65 | wait_queue_head_t m_waitq; | 68 | wait_queue_head_t m_waitq; |
66 | struct list_head m_conn_list; | 69 | struct list_head m_conn_list; |
67 | unsigned long m_page_addrs[RDS_CONG_MAP_PAGES]; | 70 | unsigned long m_page_addrs[RDS_CONG_MAP_PAGES]; |
@@ -136,11 +139,14 @@ struct rds_conn_path { | |||
136 | /* One rds_connection per RDS address pair */ | 139 | /* One rds_connection per RDS address pair */ |
137 | struct rds_connection { | 140 | struct rds_connection { |
138 | struct hlist_node c_hash_node; | 141 | struct hlist_node c_hash_node; |
139 | __be32 c_laddr; | 142 | struct in6_addr c_laddr; |
140 | __be32 c_faddr; | 143 | struct in6_addr c_faddr; |
144 | int c_dev_if; /* ifindex used for this conn */ | ||
145 | int c_bound_if; /* ifindex of c_laddr */ | ||
141 | unsigned int c_loopback:1, | 146 | unsigned int c_loopback:1, |
147 | c_isv6:1, | ||
142 | c_ping_triggered:1, | 148 | c_ping_triggered:1, |
143 | c_pad_to_32:30; | 149 | c_pad_to_32:29; |
144 | int c_npaths; | 150 | int c_npaths; |
145 | struct rds_connection *c_passive; | 151 | struct rds_connection *c_passive; |
146 | struct rds_transport *c_trans; | 152 | struct rds_transport *c_trans; |
@@ -269,7 +275,7 @@ struct rds_incoming { | |||
269 | struct rds_conn_path *i_conn_path; | 275 | struct rds_conn_path *i_conn_path; |
270 | struct rds_header i_hdr; | 276 | struct rds_header i_hdr; |
271 | unsigned long i_rx_jiffies; | 277 | unsigned long i_rx_jiffies; |
272 | __be32 i_saddr; | 278 | struct in6_addr i_saddr; |
273 | 279 | ||
274 | rds_rdma_cookie_t i_rdma_cookie; | 280 | rds_rdma_cookie_t i_rdma_cookie; |
275 | struct timeval i_rx_tstamp; | 281 | struct timeval i_rx_tstamp; |
@@ -386,7 +392,7 @@ struct rds_message { | |||
386 | struct list_head m_conn_item; | 392 | struct list_head m_conn_item; |
387 | struct rds_incoming m_inc; | 393 | struct rds_incoming m_inc; |
388 | u64 m_ack_seq; | 394 | u64 m_ack_seq; |
389 | __be32 m_daddr; | 395 | struct in6_addr m_daddr; |
390 | unsigned long m_flags; | 396 | unsigned long m_flags; |
391 | 397 | ||
392 | /* Never access m_rs without holding m_rs_lock. | 398 | /* Never access m_rs without holding m_rs_lock. |
@@ -521,7 +527,8 @@ struct rds_transport { | |||
521 | t_mp_capable:1; | 527 | t_mp_capable:1; |
522 | unsigned int t_type; | 528 | unsigned int t_type; |
523 | 529 | ||
524 | int (*laddr_check)(struct net *net, __be32 addr); | 530 | int (*laddr_check)(struct net *net, const struct in6_addr *addr, |
531 | __u32 scope_id); | ||
525 | int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp); | 532 | int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp); |
526 | void (*conn_free)(void *data); | 533 | void (*conn_free)(void *data); |
527 | int (*conn_path_connect)(struct rds_conn_path *cp); | 534 | int (*conn_path_connect)(struct rds_conn_path *cp); |
@@ -537,8 +544,8 @@ struct rds_transport { | |||
537 | void (*inc_free)(struct rds_incoming *inc); | 544 | void (*inc_free)(struct rds_incoming *inc); |
538 | 545 | ||
539 | int (*cm_handle_connect)(struct rdma_cm_id *cm_id, | 546 | int (*cm_handle_connect)(struct rdma_cm_id *cm_id, |
540 | struct rdma_cm_event *event); | 547 | struct rdma_cm_event *event, bool isv6); |
541 | int (*cm_initiate_connect)(struct rdma_cm_id *cm_id); | 548 | int (*cm_initiate_connect)(struct rdma_cm_id *cm_id, bool isv6); |
542 | void (*cm_connect_complete)(struct rds_connection *conn, | 549 | void (*cm_connect_complete)(struct rds_connection *conn, |
543 | struct rdma_cm_event *event); | 550 | struct rdma_cm_event *event); |
544 | 551 | ||
@@ -554,6 +561,12 @@ struct rds_transport { | |||
554 | bool (*t_unloading)(struct rds_connection *conn); | 561 | bool (*t_unloading)(struct rds_connection *conn); |
555 | }; | 562 | }; |
556 | 563 | ||
564 | /* Bind hash table key length. It is the sum of the size of a struct | ||
565 | * in6_addr, a scope_id and a port. | ||
566 | */ | ||
567 | #define RDS_BOUND_KEY_LEN \ | ||
568 | (sizeof(struct in6_addr) + sizeof(__u32) + sizeof(__be16)) | ||
569 | |||
557 | struct rds_sock { | 570 | struct rds_sock { |
558 | struct sock rs_sk; | 571 | struct sock rs_sk; |
559 | 572 | ||
@@ -565,10 +578,14 @@ struct rds_sock { | |||
565 | * support. | 578 | * support. |
566 | */ | 579 | */ |
567 | struct rhash_head rs_bound_node; | 580 | struct rhash_head rs_bound_node; |
568 | u64 rs_bound_key; | 581 | u8 rs_bound_key[RDS_BOUND_KEY_LEN]; |
569 | __be32 rs_bound_addr; | 582 | struct sockaddr_in6 rs_bound_sin6; |
570 | __be32 rs_conn_addr; | 583 | #define rs_bound_addr rs_bound_sin6.sin6_addr |
571 | __be16 rs_bound_port; | 584 | #define rs_bound_addr_v4 rs_bound_sin6.sin6_addr.s6_addr32[3] |
585 | #define rs_bound_port rs_bound_sin6.sin6_port | ||
586 | #define rs_bound_scope_id rs_bound_sin6.sin6_scope_id | ||
587 | struct in6_addr rs_conn_addr; | ||
588 | #define rs_conn_addr_v4 rs_conn_addr.s6_addr32[3] | ||
572 | __be16 rs_conn_port; | 589 | __be16 rs_conn_port; |
573 | struct rds_transport *rs_transport; | 590 | struct rds_transport *rs_transport; |
574 | 591 | ||
@@ -704,7 +721,8 @@ extern wait_queue_head_t rds_poll_waitq; | |||
704 | /* bind.c */ | 721 | /* bind.c */ |
705 | int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); | 722 | int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); |
706 | void rds_remove_bound(struct rds_sock *rs); | 723 | void rds_remove_bound(struct rds_sock *rs); |
707 | struct rds_sock *rds_find_bound(__be32 addr, __be16 port); | 724 | struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port, |
725 | __u32 scope_id); | ||
708 | int rds_bind_lock_init(void); | 726 | int rds_bind_lock_init(void); |
709 | void rds_bind_lock_destroy(void); | 727 | void rds_bind_lock_destroy(void); |
710 | 728 | ||
@@ -723,16 +741,20 @@ void rds_cong_remove_socket(struct rds_sock *); | |||
723 | void rds_cong_exit(void); | 741 | void rds_cong_exit(void); |
724 | struct rds_message *rds_cong_update_alloc(struct rds_connection *conn); | 742 | struct rds_message *rds_cong_update_alloc(struct rds_connection *conn); |
725 | 743 | ||
726 | /* conn.c */ | 744 | /* connection.c */ |
727 | extern u32 rds_gen_num; | 745 | extern u32 rds_gen_num; |
728 | int rds_conn_init(void); | 746 | int rds_conn_init(void); |
729 | void rds_conn_exit(void); | 747 | void rds_conn_exit(void); |
730 | struct rds_connection *rds_conn_create(struct net *net, | 748 | struct rds_connection *rds_conn_create(struct net *net, |
731 | __be32 laddr, __be32 faddr, | 749 | const struct in6_addr *laddr, |
732 | struct rds_transport *trans, gfp_t gfp); | 750 | const struct in6_addr *faddr, |
751 | struct rds_transport *trans, gfp_t gfp, | ||
752 | int dev_if); | ||
733 | struct rds_connection *rds_conn_create_outgoing(struct net *net, | 753 | struct rds_connection *rds_conn_create_outgoing(struct net *net, |
734 | __be32 laddr, __be32 faddr, | 754 | const struct in6_addr *laddr, |
735 | struct rds_transport *trans, gfp_t gfp); | 755 | const struct in6_addr *faddr, |
756 | struct rds_transport *trans, | ||
757 | gfp_t gfp, int dev_if); | ||
736 | void rds_conn_shutdown(struct rds_conn_path *cpath); | 758 | void rds_conn_shutdown(struct rds_conn_path *cpath); |
737 | void rds_conn_destroy(struct rds_connection *conn); | 759 | void rds_conn_destroy(struct rds_connection *conn); |
738 | void rds_conn_drop(struct rds_connection *conn); | 760 | void rds_conn_drop(struct rds_connection *conn); |
@@ -843,11 +865,12 @@ void rds_page_exit(void); | |||
843 | 865 | ||
844 | /* recv.c */ | 866 | /* recv.c */ |
845 | void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, | 867 | void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, |
846 | __be32 saddr); | 868 | struct in6_addr *saddr); |
847 | void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *conn, | 869 | void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *conn, |
848 | __be32 saddr); | 870 | struct in6_addr *saddr); |
849 | void rds_inc_put(struct rds_incoming *inc); | 871 | void rds_inc_put(struct rds_incoming *inc); |
850 | void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, | 872 | void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr, |
873 | struct in6_addr *daddr, | ||
851 | struct rds_incoming *inc, gfp_t gfp); | 874 | struct rds_incoming *inc, gfp_t gfp); |
852 | int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, | 875 | int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, |
853 | int msg_flags); | 876 | int msg_flags); |
@@ -856,13 +879,17 @@ int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg); | |||
856 | void rds_inc_info_copy(struct rds_incoming *inc, | 879 | void rds_inc_info_copy(struct rds_incoming *inc, |
857 | struct rds_info_iterator *iter, | 880 | struct rds_info_iterator *iter, |
858 | __be32 saddr, __be32 daddr, int flip); | 881 | __be32 saddr, __be32 daddr, int flip); |
882 | void rds6_inc_info_copy(struct rds_incoming *inc, | ||
883 | struct rds_info_iterator *iter, | ||
884 | struct in6_addr *saddr, struct in6_addr *daddr, | ||
885 | int flip); | ||
859 | 886 | ||
860 | /* send.c */ | 887 | /* send.c */ |
861 | int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len); | 888 | int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len); |
862 | void rds_send_path_reset(struct rds_conn_path *conn); | 889 | void rds_send_path_reset(struct rds_conn_path *conn); |
863 | int rds_send_xmit(struct rds_conn_path *cp); | 890 | int rds_send_xmit(struct rds_conn_path *cp); |
864 | struct sockaddr_in; | 891 | struct sockaddr_in; |
865 | void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest); | 892 | void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in6 *dest); |
866 | typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack); | 893 | typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack); |
867 | void rds_send_drop_acked(struct rds_connection *conn, u64 ack, | 894 | void rds_send_drop_acked(struct rds_connection *conn, u64 ack, |
868 | is_acked_func is_acked); | 895 | is_acked_func is_acked); |
@@ -949,11 +976,14 @@ void rds_send_worker(struct work_struct *); | |||
949 | void rds_recv_worker(struct work_struct *); | 976 | void rds_recv_worker(struct work_struct *); |
950 | void rds_connect_path_complete(struct rds_conn_path *conn, int curr); | 977 | void rds_connect_path_complete(struct rds_conn_path *conn, int curr); |
951 | void rds_connect_complete(struct rds_connection *conn); | 978 | void rds_connect_complete(struct rds_connection *conn); |
979 | int rds_addr_cmp(const struct in6_addr *a1, const struct in6_addr *a2); | ||
952 | 980 | ||
953 | /* transport.c */ | 981 | /* transport.c */ |
954 | void rds_trans_register(struct rds_transport *trans); | 982 | void rds_trans_register(struct rds_transport *trans); |
955 | void rds_trans_unregister(struct rds_transport *trans); | 983 | void rds_trans_unregister(struct rds_transport *trans); |
956 | struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr); | 984 | struct rds_transport *rds_trans_get_preferred(struct net *net, |
985 | const struct in6_addr *addr, | ||
986 | __u32 scope_id); | ||
957 | void rds_trans_put(struct rds_transport *trans); | 987 | void rds_trans_put(struct rds_transport *trans); |
958 | unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, | 988 | unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, |
959 | unsigned int avail); | 989 | unsigned int avail); |
diff --git a/net/rds/recv.c b/net/rds/recv.c index 192ac6f78ded..504cd6bcc54c 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | 2 | * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -41,14 +41,14 @@ | |||
41 | #include "rds.h" | 41 | #include "rds.h" |
42 | 42 | ||
43 | void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, | 43 | void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, |
44 | __be32 saddr) | 44 | struct in6_addr *saddr) |
45 | { | 45 | { |
46 | int i; | 46 | int i; |
47 | 47 | ||
48 | refcount_set(&inc->i_refcount, 1); | 48 | refcount_set(&inc->i_refcount, 1); |
49 | INIT_LIST_HEAD(&inc->i_item); | 49 | INIT_LIST_HEAD(&inc->i_item); |
50 | inc->i_conn = conn; | 50 | inc->i_conn = conn; |
51 | inc->i_saddr = saddr; | 51 | inc->i_saddr = *saddr; |
52 | inc->i_rdma_cookie = 0; | 52 | inc->i_rdma_cookie = 0; |
53 | inc->i_rx_tstamp.tv_sec = 0; | 53 | inc->i_rx_tstamp.tv_sec = 0; |
54 | inc->i_rx_tstamp.tv_usec = 0; | 54 | inc->i_rx_tstamp.tv_usec = 0; |
@@ -59,13 +59,13 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, | |||
59 | EXPORT_SYMBOL_GPL(rds_inc_init); | 59 | EXPORT_SYMBOL_GPL(rds_inc_init); |
60 | 60 | ||
61 | void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp, | 61 | void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp, |
62 | __be32 saddr) | 62 | struct in6_addr *saddr) |
63 | { | 63 | { |
64 | refcount_set(&inc->i_refcount, 1); | 64 | refcount_set(&inc->i_refcount, 1); |
65 | INIT_LIST_HEAD(&inc->i_item); | 65 | INIT_LIST_HEAD(&inc->i_item); |
66 | inc->i_conn = cp->cp_conn; | 66 | inc->i_conn = cp->cp_conn; |
67 | inc->i_conn_path = cp; | 67 | inc->i_conn_path = cp; |
68 | inc->i_saddr = saddr; | 68 | inc->i_saddr = *saddr; |
69 | inc->i_rdma_cookie = 0; | 69 | inc->i_rdma_cookie = 0; |
70 | inc->i_rx_tstamp.tv_sec = 0; | 70 | inc->i_rx_tstamp.tv_sec = 0; |
71 | inc->i_rx_tstamp.tv_usec = 0; | 71 | inc->i_rx_tstamp.tv_usec = 0; |
@@ -110,7 +110,7 @@ static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk, | |||
110 | 110 | ||
111 | now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs); | 111 | now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs); |
112 | 112 | ||
113 | rdsdebug("rs %p (%pI4:%u) recv bytes %d buf %d " | 113 | rdsdebug("rs %p (%pI6c:%u) recv bytes %d buf %d " |
114 | "now_cong %d delta %d\n", | 114 | "now_cong %d delta %d\n", |
115 | rs, &rs->rs_bound_addr, | 115 | rs, &rs->rs_bound_addr, |
116 | ntohs(rs->rs_bound_port), rs->rs_rcv_bytes, | 116 | ntohs(rs->rs_bound_port), rs->rs_rcv_bytes, |
@@ -260,7 +260,7 @@ static void rds_start_mprds(struct rds_connection *conn) | |||
260 | struct rds_conn_path *cp; | 260 | struct rds_conn_path *cp; |
261 | 261 | ||
262 | if (conn->c_npaths > 1 && | 262 | if (conn->c_npaths > 1 && |
263 | IS_CANONICAL(conn->c_laddr, conn->c_faddr)) { | 263 | rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) < 0) { |
264 | for (i = 0; i < conn->c_npaths; i++) { | 264 | for (i = 0; i < conn->c_npaths; i++) { |
265 | cp = &conn->c_path[i]; | 265 | cp = &conn->c_path[i]; |
266 | rds_conn_path_connect_if_down(cp); | 266 | rds_conn_path_connect_if_down(cp); |
@@ -284,7 +284,8 @@ static void rds_start_mprds(struct rds_connection *conn) | |||
284 | * conn. This lets loopback, who only has one conn for both directions, | 284 | * conn. This lets loopback, who only has one conn for both directions, |
285 | * tell us which roles the addrs in the conn are playing for this message. | 285 | * tell us which roles the addrs in the conn are playing for this message. |
286 | */ | 286 | */ |
287 | void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, | 287 | void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr, |
288 | struct in6_addr *daddr, | ||
288 | struct rds_incoming *inc, gfp_t gfp) | 289 | struct rds_incoming *inc, gfp_t gfp) |
289 | { | 290 | { |
290 | struct rds_sock *rs = NULL; | 291 | struct rds_sock *rs = NULL; |
@@ -339,7 +340,8 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, | |||
339 | 340 | ||
340 | if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) { | 341 | if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) { |
341 | if (inc->i_hdr.h_sport == 0) { | 342 | if (inc->i_hdr.h_sport == 0) { |
342 | rdsdebug("ignore ping with 0 sport from 0x%x\n", saddr); | 343 | rdsdebug("ignore ping with 0 sport from %pI6c\n", |
344 | saddr); | ||
343 | goto out; | 345 | goto out; |
344 | } | 346 | } |
345 | rds_stats_inc(s_recv_ping); | 347 | rds_stats_inc(s_recv_ping); |
@@ -362,7 +364,7 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, | |||
362 | goto out; | 364 | goto out; |
363 | } | 365 | } |
364 | 366 | ||
365 | rs = rds_find_bound(daddr, inc->i_hdr.h_dport); | 367 | rs = rds_find_bound(daddr, inc->i_hdr.h_dport, conn->c_bound_if); |
366 | if (!rs) { | 368 | if (!rs) { |
367 | rds_stats_inc(s_recv_drop_no_sock); | 369 | rds_stats_inc(s_recv_drop_no_sock); |
368 | goto out; | 370 | goto out; |
@@ -625,6 +627,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, | |||
625 | struct rds_sock *rs = rds_sk_to_rs(sk); | 627 | struct rds_sock *rs = rds_sk_to_rs(sk); |
626 | long timeo; | 628 | long timeo; |
627 | int ret = 0, nonblock = msg_flags & MSG_DONTWAIT; | 629 | int ret = 0, nonblock = msg_flags & MSG_DONTWAIT; |
630 | DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); | ||
628 | DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); | 631 | DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); |
629 | struct rds_incoming *inc = NULL; | 632 | struct rds_incoming *inc = NULL; |
630 | 633 | ||
@@ -673,7 +676,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, | |||
673 | break; | 676 | break; |
674 | } | 677 | } |
675 | 678 | ||
676 | rdsdebug("copying inc %p from %pI4:%u to user\n", inc, | 679 | rdsdebug("copying inc %p from %pI6c:%u to user\n", inc, |
677 | &inc->i_conn->c_faddr, | 680 | &inc->i_conn->c_faddr, |
678 | ntohs(inc->i_hdr.h_sport)); | 681 | ntohs(inc->i_hdr.h_sport)); |
679 | ret = inc->i_conn->c_trans->inc_copy_to_user(inc, &msg->msg_iter); | 682 | ret = inc->i_conn->c_trans->inc_copy_to_user(inc, &msg->msg_iter); |
@@ -707,12 +710,26 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, | |||
707 | 710 | ||
708 | rds_stats_inc(s_recv_delivered); | 711 | rds_stats_inc(s_recv_delivered); |
709 | 712 | ||
710 | if (sin) { | 713 | if (msg->msg_name) { |
711 | sin->sin_family = AF_INET; | 714 | if (ipv6_addr_v4mapped(&inc->i_saddr)) { |
712 | sin->sin_port = inc->i_hdr.h_sport; | 715 | sin = (struct sockaddr_in *)msg->msg_name; |
713 | sin->sin_addr.s_addr = inc->i_saddr; | 716 | |
714 | memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); | 717 | sin->sin_family = AF_INET; |
715 | msg->msg_namelen = sizeof(*sin); | 718 | sin->sin_port = inc->i_hdr.h_sport; |
719 | sin->sin_addr.s_addr = | ||
720 | inc->i_saddr.s6_addr32[3]; | ||
721 | memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); | ||
722 | msg->msg_namelen = sizeof(*sin); | ||
723 | } else { | ||
724 | sin6 = (struct sockaddr_in6 *)msg->msg_name; | ||
725 | |||
726 | sin6->sin6_family = AF_INET6; | ||
727 | sin6->sin6_port = inc->i_hdr.h_sport; | ||
728 | sin6->sin6_addr = inc->i_saddr; | ||
729 | sin6->sin6_flowinfo = 0; | ||
730 | sin6->sin6_scope_id = rs->rs_bound_scope_id; | ||
731 | msg->msg_namelen = sizeof(*sin6); | ||
732 | } | ||
716 | } | 733 | } |
717 | break; | 734 | break; |
718 | } | 735 | } |
@@ -775,3 +792,30 @@ void rds_inc_info_copy(struct rds_incoming *inc, | |||
775 | 792 | ||
776 | rds_info_copy(iter, &minfo, sizeof(minfo)); | 793 | rds_info_copy(iter, &minfo, sizeof(minfo)); |
777 | } | 794 | } |
795 | |||
796 | #if IS_ENABLED(CONFIG_IPV6) | ||
797 | void rds6_inc_info_copy(struct rds_incoming *inc, | ||
798 | struct rds_info_iterator *iter, | ||
799 | struct in6_addr *saddr, struct in6_addr *daddr, | ||
800 | int flip) | ||
801 | { | ||
802 | struct rds6_info_message minfo6; | ||
803 | |||
804 | minfo6.seq = be64_to_cpu(inc->i_hdr.h_sequence); | ||
805 | minfo6.len = be32_to_cpu(inc->i_hdr.h_len); | ||
806 | |||
807 | if (flip) { | ||
808 | minfo6.laddr = *daddr; | ||
809 | minfo6.faddr = *saddr; | ||
810 | minfo6.lport = inc->i_hdr.h_dport; | ||
811 | minfo6.fport = inc->i_hdr.h_sport; | ||
812 | } else { | ||
813 | minfo6.laddr = *saddr; | ||
814 | minfo6.faddr = *daddr; | ||
815 | minfo6.lport = inc->i_hdr.h_sport; | ||
816 | minfo6.fport = inc->i_hdr.h_dport; | ||
817 | } | ||
818 | |||
819 | rds_info_copy(iter, &minfo6, sizeof(minfo6)); | ||
820 | } | ||
821 | #endif | ||
diff --git a/net/rds/send.c b/net/rds/send.c index 59f17a2335f4..57b3d5a8b2db 100644 --- a/net/rds/send.c +++ b/net/rds/send.c | |||
@@ -709,7 +709,7 @@ void rds_send_drop_acked(struct rds_connection *conn, u64 ack, | |||
709 | } | 709 | } |
710 | EXPORT_SYMBOL_GPL(rds_send_drop_acked); | 710 | EXPORT_SYMBOL_GPL(rds_send_drop_acked); |
711 | 711 | ||
712 | void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) | 712 | void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in6 *dest) |
713 | { | 713 | { |
714 | struct rds_message *rm, *tmp; | 714 | struct rds_message *rm, *tmp; |
715 | struct rds_connection *conn; | 715 | struct rds_connection *conn; |
@@ -721,8 +721,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) | |||
721 | spin_lock_irqsave(&rs->rs_lock, flags); | 721 | spin_lock_irqsave(&rs->rs_lock, flags); |
722 | 722 | ||
723 | list_for_each_entry_safe(rm, tmp, &rs->rs_send_queue, m_sock_item) { | 723 | list_for_each_entry_safe(rm, tmp, &rs->rs_send_queue, m_sock_item) { |
724 | if (dest && (dest->sin_addr.s_addr != rm->m_daddr || | 724 | if (dest && |
725 | dest->sin_port != rm->m_inc.i_hdr.h_dport)) | 725 | (!ipv6_addr_equal(&dest->sin6_addr, &rm->m_daddr) || |
726 | dest->sin6_port != rm->m_inc.i_hdr.h_dport)) | ||
726 | continue; | 727 | continue; |
727 | 728 | ||
728 | list_move(&rm->m_sock_item, &list); | 729 | list_move(&rm->m_sock_item, &list); |
@@ -1059,8 +1060,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) | |||
1059 | { | 1060 | { |
1060 | struct sock *sk = sock->sk; | 1061 | struct sock *sk = sock->sk; |
1061 | struct rds_sock *rs = rds_sk_to_rs(sk); | 1062 | struct rds_sock *rs = rds_sk_to_rs(sk); |
1063 | DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); | ||
1062 | DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); | 1064 | DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); |
1063 | __be32 daddr; | ||
1064 | __be16 dport; | 1065 | __be16 dport; |
1065 | struct rds_message *rm = NULL; | 1066 | struct rds_message *rm = NULL; |
1066 | struct rds_connection *conn; | 1067 | struct rds_connection *conn; |
@@ -1069,10 +1070,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) | |||
1069 | int nonblock = msg->msg_flags & MSG_DONTWAIT; | 1070 | int nonblock = msg->msg_flags & MSG_DONTWAIT; |
1070 | long timeo = sock_sndtimeo(sk, nonblock); | 1071 | long timeo = sock_sndtimeo(sk, nonblock); |
1071 | struct rds_conn_path *cpath; | 1072 | struct rds_conn_path *cpath; |
1073 | struct in6_addr daddr; | ||
1074 | __u32 scope_id = 0; | ||
1072 | size_t total_payload_len = payload_len, rdma_payload_len = 0; | 1075 | size_t total_payload_len = payload_len, rdma_payload_len = 0; |
1073 | bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) && | 1076 | bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) && |
1074 | sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY)); | 1077 | sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY)); |
1075 | int num_sgs = ceil(payload_len, PAGE_SIZE); | 1078 | int num_sgs = ceil(payload_len, PAGE_SIZE); |
1079 | int namelen; | ||
1076 | 1080 | ||
1077 | /* Mirror Linux UDP mirror of BSD error message compatibility */ | 1081 | /* Mirror Linux UDP mirror of BSD error message compatibility */ |
1078 | /* XXX: Perhaps MSG_MORE someday */ | 1082 | /* XXX: Perhaps MSG_MORE someday */ |
@@ -1081,27 +1085,108 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) | |||
1081 | goto out; | 1085 | goto out; |
1082 | } | 1086 | } |
1083 | 1087 | ||
1084 | if (msg->msg_namelen) { | 1088 | namelen = msg->msg_namelen; |
1085 | /* XXX fail non-unicast destination IPs? */ | 1089 | if (namelen != 0) { |
1086 | if (msg->msg_namelen < sizeof(*usin) || usin->sin_family != AF_INET) { | 1090 | if (namelen < sizeof(*usin)) { |
1091 | ret = -EINVAL; | ||
1092 | goto out; | ||
1093 | } | ||
1094 | switch (usin->sin_family) { | ||
1095 | case AF_INET: | ||
1096 | if (usin->sin_addr.s_addr == htonl(INADDR_ANY) || | ||
1097 | usin->sin_addr.s_addr == htonl(INADDR_BROADCAST) || | ||
1098 | IN_MULTICAST(ntohl(usin->sin_addr.s_addr))) { | ||
1099 | ret = -EINVAL; | ||
1100 | goto out; | ||
1101 | } | ||
1102 | ipv6_addr_set_v4mapped(usin->sin_addr.s_addr, &daddr); | ||
1103 | dport = usin->sin_port; | ||
1104 | break; | ||
1105 | |||
1106 | #if IS_ENABLED(CONFIG_IPV6) | ||
1107 | case AF_INET6: { | ||
1108 | int addr_type; | ||
1109 | |||
1110 | if (namelen < sizeof(*sin6)) { | ||
1111 | ret = -EINVAL; | ||
1112 | goto out; | ||
1113 | } | ||
1114 | addr_type = ipv6_addr_type(&sin6->sin6_addr); | ||
1115 | if (!(addr_type & IPV6_ADDR_UNICAST)) { | ||
1116 | __be32 addr4; | ||
1117 | |||
1118 | if (!(addr_type & IPV6_ADDR_MAPPED)) { | ||
1119 | ret = -EINVAL; | ||
1120 | goto out; | ||
1121 | } | ||
1122 | |||
1123 | /* It is a mapped address. Need to do some | ||
1124 | * sanity checks. | ||
1125 | */ | ||
1126 | addr4 = sin6->sin6_addr.s6_addr32[3]; | ||
1127 | if (addr4 == htonl(INADDR_ANY) || | ||
1128 | addr4 == htonl(INADDR_BROADCAST) || | ||
1129 | IN_MULTICAST(ntohl(addr4))) { | ||
1130 | ret = -EINVAL; | ||
1131 | goto out; | ||
1132 | } | ||
1133 | } | ||
1134 | if (addr_type & IPV6_ADDR_LINKLOCAL) { | ||
1135 | if (sin6->sin6_scope_id == 0) { | ||
1136 | ret = -EINVAL; | ||
1137 | goto out; | ||
1138 | } | ||
1139 | scope_id = sin6->sin6_scope_id; | ||
1140 | } | ||
1141 | |||
1142 | daddr = sin6->sin6_addr; | ||
1143 | dport = sin6->sin6_port; | ||
1144 | break; | ||
1145 | } | ||
1146 | #endif | ||
1147 | |||
1148 | default: | ||
1087 | ret = -EINVAL; | 1149 | ret = -EINVAL; |
1088 | goto out; | 1150 | goto out; |
1089 | } | 1151 | } |
1090 | daddr = usin->sin_addr.s_addr; | ||
1091 | dport = usin->sin_port; | ||
1092 | } else { | 1152 | } else { |
1093 | /* We only care about consistency with ->connect() */ | 1153 | /* We only care about consistency with ->connect() */ |
1094 | lock_sock(sk); | 1154 | lock_sock(sk); |
1095 | daddr = rs->rs_conn_addr; | 1155 | daddr = rs->rs_conn_addr; |
1096 | dport = rs->rs_conn_port; | 1156 | dport = rs->rs_conn_port; |
1157 | scope_id = rs->rs_bound_scope_id; | ||
1097 | release_sock(sk); | 1158 | release_sock(sk); |
1098 | } | 1159 | } |
1099 | 1160 | ||
1100 | lock_sock(sk); | 1161 | lock_sock(sk); |
1101 | if (daddr == 0 || rs->rs_bound_addr == 0) { | 1162 | if (ipv6_addr_any(&rs->rs_bound_addr) || ipv6_addr_any(&daddr)) { |
1102 | release_sock(sk); | 1163 | release_sock(sk); |
1103 | ret = -ENOTCONN; /* XXX not a great errno */ | 1164 | ret = -ENOTCONN; |
1104 | goto out; | 1165 | goto out; |
1166 | } else if (namelen != 0) { | ||
1167 | /* Cannot send to an IPv4 address using an IPv6 source | ||
1168 | * address and cannot send to an IPv6 address using an | ||
1169 | * IPv4 source address. | ||
1170 | */ | ||
1171 | if (ipv6_addr_v4mapped(&daddr) ^ | ||
1172 | ipv6_addr_v4mapped(&rs->rs_bound_addr)) { | ||
1173 | release_sock(sk); | ||
1174 | ret = -EOPNOTSUPP; | ||
1175 | goto out; | ||
1176 | } | ||
1177 | /* If the socket is already bound to a link local address, | ||
1178 | * it can only send to peers on the same link. But allow | ||
1179 | * communicating beween link local and non-link local address. | ||
1180 | */ | ||
1181 | if (scope_id != rs->rs_bound_scope_id) { | ||
1182 | if (!scope_id) { | ||
1183 | scope_id = rs->rs_bound_scope_id; | ||
1184 | } else if (rs->rs_bound_scope_id) { | ||
1185 | release_sock(sk); | ||
1186 | ret = -EINVAL; | ||
1187 | goto out; | ||
1188 | } | ||
1189 | } | ||
1105 | } | 1190 | } |
1106 | release_sock(sk); | 1191 | release_sock(sk); |
1107 | 1192 | ||
@@ -1155,13 +1240,14 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) | |||
1155 | 1240 | ||
1156 | /* rds_conn_create has a spinlock that runs with IRQ off. | 1241 | /* rds_conn_create has a spinlock that runs with IRQ off. |
1157 | * Caching the conn in the socket helps a lot. */ | 1242 | * Caching the conn in the socket helps a lot. */ |
1158 | if (rs->rs_conn && rs->rs_conn->c_faddr == daddr) | 1243 | if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr)) |
1159 | conn = rs->rs_conn; | 1244 | conn = rs->rs_conn; |
1160 | else { | 1245 | else { |
1161 | conn = rds_conn_create_outgoing(sock_net(sock->sk), | 1246 | conn = rds_conn_create_outgoing(sock_net(sock->sk), |
1162 | rs->rs_bound_addr, daddr, | 1247 | &rs->rs_bound_addr, &daddr, |
1163 | rs->rs_transport, | 1248 | rs->rs_transport, |
1164 | sock->sk->sk_allocation); | 1249 | sock->sk->sk_allocation, |
1250 | scope_id); | ||
1165 | if (IS_ERR(conn)) { | 1251 | if (IS_ERR(conn)) { |
1166 | ret = PTR_ERR(conn); | 1252 | ret = PTR_ERR(conn); |
1167 | goto out; | 1253 | goto out; |
diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 351a28474667..2c7b7c352d3e 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | 2 | * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -37,6 +37,8 @@ | |||
37 | #include <net/tcp.h> | 37 | #include <net/tcp.h> |
38 | #include <net/net_namespace.h> | 38 | #include <net/net_namespace.h> |
39 | #include <net/netns/generic.h> | 39 | #include <net/netns/generic.h> |
40 | #include <net/tcp.h> | ||
41 | #include <net/addrconf.h> | ||
40 | 42 | ||
41 | #include "rds.h" | 43 | #include "rds.h" |
42 | #include "tcp.h" | 44 | #include "tcp.h" |
@@ -44,7 +46,14 @@ | |||
44 | /* only for info exporting */ | 46 | /* only for info exporting */ |
45 | static DEFINE_SPINLOCK(rds_tcp_tc_list_lock); | 47 | static DEFINE_SPINLOCK(rds_tcp_tc_list_lock); |
46 | static LIST_HEAD(rds_tcp_tc_list); | 48 | static LIST_HEAD(rds_tcp_tc_list); |
49 | |||
50 | /* rds_tcp_tc_count counts only IPv4 connections. | ||
51 | * rds6_tcp_tc_count counts both IPv4 and IPv6 connections. | ||
52 | */ | ||
47 | static unsigned int rds_tcp_tc_count; | 53 | static unsigned int rds_tcp_tc_count; |
54 | #if IS_ENABLED(CONFIG_IPV6) | ||
55 | static unsigned int rds6_tcp_tc_count; | ||
56 | #endif | ||
48 | 57 | ||
49 | /* Track rds_tcp_connection structs so they can be cleaned up */ | 58 | /* Track rds_tcp_connection structs so they can be cleaned up */ |
50 | static DEFINE_SPINLOCK(rds_tcp_conn_lock); | 59 | static DEFINE_SPINLOCK(rds_tcp_conn_lock); |
@@ -111,7 +120,11 @@ void rds_tcp_restore_callbacks(struct socket *sock, | |||
111 | /* done under the callback_lock to serialize with write_space */ | 120 | /* done under the callback_lock to serialize with write_space */ |
112 | spin_lock(&rds_tcp_tc_list_lock); | 121 | spin_lock(&rds_tcp_tc_list_lock); |
113 | list_del_init(&tc->t_list_item); | 122 | list_del_init(&tc->t_list_item); |
114 | rds_tcp_tc_count--; | 123 | #if IS_ENABLED(CONFIG_IPV6) |
124 | rds6_tcp_tc_count--; | ||
125 | #endif | ||
126 | if (!tc->t_cpath->cp_conn->c_isv6) | ||
127 | rds_tcp_tc_count--; | ||
115 | spin_unlock(&rds_tcp_tc_list_lock); | 128 | spin_unlock(&rds_tcp_tc_list_lock); |
116 | 129 | ||
117 | tc->t_sock = NULL; | 130 | tc->t_sock = NULL; |
@@ -198,7 +211,11 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) | |||
198 | /* done under the callback_lock to serialize with write_space */ | 211 | /* done under the callback_lock to serialize with write_space */ |
199 | spin_lock(&rds_tcp_tc_list_lock); | 212 | spin_lock(&rds_tcp_tc_list_lock); |
200 | list_add_tail(&tc->t_list_item, &rds_tcp_tc_list); | 213 | list_add_tail(&tc->t_list_item, &rds_tcp_tc_list); |
201 | rds_tcp_tc_count++; | 214 | #if IS_ENABLED(CONFIG_IPV6) |
215 | rds6_tcp_tc_count++; | ||
216 | #endif | ||
217 | if (!tc->t_cpath->cp_conn->c_isv6) | ||
218 | rds_tcp_tc_count++; | ||
202 | spin_unlock(&rds_tcp_tc_list_lock); | 219 | spin_unlock(&rds_tcp_tc_list_lock); |
203 | 220 | ||
204 | /* accepted sockets need our listen data ready undone */ | 221 | /* accepted sockets need our listen data ready undone */ |
@@ -219,6 +236,9 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) | |||
219 | write_unlock_bh(&sock->sk->sk_callback_lock); | 236 | write_unlock_bh(&sock->sk->sk_callback_lock); |
220 | } | 237 | } |
221 | 238 | ||
239 | /* Handle RDS_INFO_TCP_SOCKETS socket option. It only returns IPv4 | ||
240 | * connections for backward compatibility. | ||
241 | */ | ||
222 | static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, | 242 | static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, |
223 | struct rds_info_iterator *iter, | 243 | struct rds_info_iterator *iter, |
224 | struct rds_info_lengths *lens) | 244 | struct rds_info_lengths *lens) |
@@ -226,8 +246,6 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, | |||
226 | struct rds_info_tcp_socket tsinfo; | 246 | struct rds_info_tcp_socket tsinfo; |
227 | struct rds_tcp_connection *tc; | 247 | struct rds_tcp_connection *tc; |
228 | unsigned long flags; | 248 | unsigned long flags; |
229 | struct sockaddr_in sin; | ||
230 | struct socket *sock; | ||
231 | 249 | ||
232 | spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); | 250 | spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); |
233 | 251 | ||
@@ -235,16 +253,15 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, | |||
235 | goto out; | 253 | goto out; |
236 | 254 | ||
237 | list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) { | 255 | list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) { |
256 | struct inet_sock *inet = inet_sk(tc->t_sock->sk); | ||
238 | 257 | ||
239 | sock = tc->t_sock; | 258 | if (tc->t_cpath->cp_conn->c_isv6) |
240 | if (sock) { | 259 | continue; |
241 | sock->ops->getname(sock, (struct sockaddr *)&sin, 0); | 260 | |
242 | tsinfo.local_addr = sin.sin_addr.s_addr; | 261 | tsinfo.local_addr = inet->inet_saddr; |
243 | tsinfo.local_port = sin.sin_port; | 262 | tsinfo.local_port = inet->inet_sport; |
244 | sock->ops->getname(sock, (struct sockaddr *)&sin, 1); | 263 | tsinfo.peer_addr = inet->inet_daddr; |
245 | tsinfo.peer_addr = sin.sin_addr.s_addr; | 264 | tsinfo.peer_port = inet->inet_dport; |
246 | tsinfo.peer_port = sin.sin_port; | ||
247 | } | ||
248 | 265 | ||
249 | tsinfo.hdr_rem = tc->t_tinc_hdr_rem; | 266 | tsinfo.hdr_rem = tc->t_tinc_hdr_rem; |
250 | tsinfo.data_rem = tc->t_tinc_data_rem; | 267 | tsinfo.data_rem = tc->t_tinc_data_rem; |
@@ -262,10 +279,82 @@ out: | |||
262 | spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags); | 279 | spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags); |
263 | } | 280 | } |
264 | 281 | ||
265 | static int rds_tcp_laddr_check(struct net *net, __be32 addr) | 282 | #if IS_ENABLED(CONFIG_IPV6) |
283 | /* Handle RDS6_INFO_TCP_SOCKETS socket option. It returns both IPv4 and | ||
284 | * IPv6 connections. IPv4 connection address is returned in an IPv4 mapped | ||
285 | * address. | ||
286 | */ | ||
287 | static void rds6_tcp_tc_info(struct socket *sock, unsigned int len, | ||
288 | struct rds_info_iterator *iter, | ||
289 | struct rds_info_lengths *lens) | ||
266 | { | 290 | { |
267 | if (inet_addr_type(net, addr) == RTN_LOCAL) | 291 | struct rds6_info_tcp_socket tsinfo6; |
292 | struct rds_tcp_connection *tc; | ||
293 | unsigned long flags; | ||
294 | |||
295 | spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); | ||
296 | |||
297 | if (len / sizeof(tsinfo6) < rds6_tcp_tc_count) | ||
298 | goto out; | ||
299 | |||
300 | list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) { | ||
301 | struct sock *sk = tc->t_sock->sk; | ||
302 | struct inet_sock *inet = inet_sk(sk); | ||
303 | |||
304 | tsinfo6.local_addr = sk->sk_v6_rcv_saddr; | ||
305 | tsinfo6.local_port = inet->inet_sport; | ||
306 | tsinfo6.peer_addr = sk->sk_v6_daddr; | ||
307 | tsinfo6.peer_port = inet->inet_dport; | ||
308 | |||
309 | tsinfo6.hdr_rem = tc->t_tinc_hdr_rem; | ||
310 | tsinfo6.data_rem = tc->t_tinc_data_rem; | ||
311 | tsinfo6.last_sent_nxt = tc->t_last_sent_nxt; | ||
312 | tsinfo6.last_expected_una = tc->t_last_expected_una; | ||
313 | tsinfo6.last_seen_una = tc->t_last_seen_una; | ||
314 | |||
315 | rds_info_copy(iter, &tsinfo6, sizeof(tsinfo6)); | ||
316 | } | ||
317 | |||
318 | out: | ||
319 | lens->nr = rds6_tcp_tc_count; | ||
320 | lens->each = sizeof(tsinfo6); | ||
321 | |||
322 | spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags); | ||
323 | } | ||
324 | #endif | ||
325 | |||
326 | static int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr, | ||
327 | __u32 scope_id) | ||
328 | { | ||
329 | struct net_device *dev = NULL; | ||
330 | #if IS_ENABLED(CONFIG_IPV6) | ||
331 | int ret; | ||
332 | #endif | ||
333 | |||
334 | if (ipv6_addr_v4mapped(addr)) { | ||
335 | if (inet_addr_type(net, addr->s6_addr32[3]) == RTN_LOCAL) | ||
336 | return 0; | ||
337 | return -EADDRNOTAVAIL; | ||
338 | } | ||
339 | |||
340 | /* If the scope_id is specified, check only those addresses | ||
341 | * hosted on the specified interface. | ||
342 | */ | ||
343 | if (scope_id != 0) { | ||
344 | rcu_read_lock(); | ||
345 | dev = dev_get_by_index_rcu(net, scope_id); | ||
346 | /* scope_id is not valid... */ | ||
347 | if (!dev) { | ||
348 | rcu_read_unlock(); | ||
349 | return -EADDRNOTAVAIL; | ||
350 | } | ||
351 | rcu_read_unlock(); | ||
352 | } | ||
353 | #if IS_ENABLED(CONFIG_IPV6) | ||
354 | ret = ipv6_chk_addr(net, addr, dev, 0); | ||
355 | if (ret) | ||
268 | return 0; | 356 | return 0; |
357 | #endif | ||
269 | return -EADDRNOTAVAIL; | 358 | return -EADDRNOTAVAIL; |
270 | } | 359 | } |
271 | 360 | ||
@@ -468,13 +557,27 @@ static __net_init int rds_tcp_init_net(struct net *net) | |||
468 | err = -ENOMEM; | 557 | err = -ENOMEM; |
469 | goto fail; | 558 | goto fail; |
470 | } | 559 | } |
471 | rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net); | 560 | |
561 | #if IS_ENABLED(CONFIG_IPV6) | ||
562 | rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, true); | ||
563 | #else | ||
564 | rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, false); | ||
565 | #endif | ||
472 | if (!rtn->rds_tcp_listen_sock) { | 566 | if (!rtn->rds_tcp_listen_sock) { |
473 | pr_warn("could not set up listen sock\n"); | 567 | pr_warn("could not set up IPv6 listen sock\n"); |
474 | unregister_net_sysctl_table(rtn->rds_tcp_sysctl); | 568 | |
475 | rtn->rds_tcp_sysctl = NULL; | 569 | #if IS_ENABLED(CONFIG_IPV6) |
476 | err = -EAFNOSUPPORT; | 570 | /* Try IPv4 as some systems disable IPv6 */ |
477 | goto fail; | 571 | rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, false); |
572 | if (!rtn->rds_tcp_listen_sock) { | ||
573 | #endif | ||
574 | unregister_net_sysctl_table(rtn->rds_tcp_sysctl); | ||
575 | rtn->rds_tcp_sysctl = NULL; | ||
576 | err = -EAFNOSUPPORT; | ||
577 | goto fail; | ||
578 | #if IS_ENABLED(CONFIG_IPV6) | ||
579 | } | ||
580 | #endif | ||
478 | } | 581 | } |
479 | INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker); | 582 | INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker); |
480 | return 0; | 583 | return 0; |
@@ -588,6 +691,9 @@ static void rds_tcp_exit(void) | |||
588 | rds_tcp_set_unloading(); | 691 | rds_tcp_set_unloading(); |
589 | synchronize_rcu(); | 692 | synchronize_rcu(); |
590 | rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); | 693 | rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); |
694 | #if IS_ENABLED(CONFIG_IPV6) | ||
695 | rds_info_deregister_func(RDS6_INFO_TCP_SOCKETS, rds6_tcp_tc_info); | ||
696 | #endif | ||
591 | unregister_pernet_device(&rds_tcp_net_ops); | 697 | unregister_pernet_device(&rds_tcp_net_ops); |
592 | rds_tcp_destroy_conns(); | 698 | rds_tcp_destroy_conns(); |
593 | rds_trans_unregister(&rds_tcp_transport); | 699 | rds_trans_unregister(&rds_tcp_transport); |
@@ -619,6 +725,9 @@ static int rds_tcp_init(void) | |||
619 | rds_trans_register(&rds_tcp_transport); | 725 | rds_trans_register(&rds_tcp_transport); |
620 | 726 | ||
621 | rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); | 727 | rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); |
728 | #if IS_ENABLED(CONFIG_IPV6) | ||
729 | rds_info_register_func(RDS6_INFO_TCP_SOCKETS, rds6_tcp_tc_info); | ||
730 | #endif | ||
622 | 731 | ||
623 | goto out; | 732 | goto out; |
624 | out_recv: | 733 | out_recv: |
@@ -633,4 +742,3 @@ module_init(rds_tcp_init); | |||
633 | MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>"); | 742 | MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>"); |
634 | MODULE_DESCRIPTION("RDS: TCP transport"); | 743 | MODULE_DESCRIPTION("RDS: TCP transport"); |
635 | MODULE_LICENSE("Dual BSD/GPL"); | 744 | MODULE_LICENSE("Dual BSD/GPL"); |
636 | |||
diff --git a/net/rds/tcp.h b/net/rds/tcp.h index c6fa080e9b6d..3c69361d21c7 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h | |||
@@ -67,7 +67,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn); | |||
67 | void rds_tcp_state_change(struct sock *sk); | 67 | void rds_tcp_state_change(struct sock *sk); |
68 | 68 | ||
69 | /* tcp_listen.c */ | 69 | /* tcp_listen.c */ |
70 | struct socket *rds_tcp_listen_init(struct net *); | 70 | struct socket *rds_tcp_listen_init(struct net *net, bool isv6); |
71 | void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor); | 71 | void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor); |
72 | void rds_tcp_listen_data_ready(struct sock *sk); | 72 | void rds_tcp_listen_data_ready(struct sock *sk); |
73 | int rds_tcp_accept_one(struct socket *sock); | 73 | int rds_tcp_accept_one(struct socket *sock); |
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index d999e7075645..008f50fb25dd 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | 2 | * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -66,7 +66,8 @@ void rds_tcp_state_change(struct sock *sk) | |||
66 | * RDS connection as RDS_CONN_UP until the reconnect, | 66 | * RDS connection as RDS_CONN_UP until the reconnect, |
67 | * to avoid RDS datagram loss. | 67 | * to avoid RDS datagram loss. |
68 | */ | 68 | */ |
69 | if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) && | 69 | if (rds_addr_cmp(&cp->cp_conn->c_laddr, |
70 | &cp->cp_conn->c_faddr) >= 0 && | ||
70 | rds_conn_path_transition(cp, RDS_CONN_CONNECTING, | 71 | rds_conn_path_transition(cp, RDS_CONN_CONNECTING, |
71 | RDS_CONN_ERROR)) { | 72 | RDS_CONN_ERROR)) { |
72 | rds_conn_path_drop(cp, false); | 73 | rds_conn_path_drop(cp, false); |
@@ -88,7 +89,11 @@ out: | |||
88 | int rds_tcp_conn_path_connect(struct rds_conn_path *cp) | 89 | int rds_tcp_conn_path_connect(struct rds_conn_path *cp) |
89 | { | 90 | { |
90 | struct socket *sock = NULL; | 91 | struct socket *sock = NULL; |
91 | struct sockaddr_in src, dest; | 92 | struct sockaddr_in6 sin6; |
93 | struct sockaddr_in sin; | ||
94 | struct sockaddr *addr; | ||
95 | int addrlen; | ||
96 | bool isv6; | ||
92 | int ret; | 97 | int ret; |
93 | struct rds_connection *conn = cp->cp_conn; | 98 | struct rds_connection *conn = cp->cp_conn; |
94 | struct rds_tcp_connection *tc = cp->cp_transport_data; | 99 | struct rds_tcp_connection *tc = cp->cp_transport_data; |
@@ -105,37 +110,68 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) | |||
105 | mutex_unlock(&tc->t_conn_path_lock); | 110 | mutex_unlock(&tc->t_conn_path_lock); |
106 | return 0; | 111 | return 0; |
107 | } | 112 | } |
108 | ret = sock_create_kern(rds_conn_net(conn), PF_INET, | 113 | if (ipv6_addr_v4mapped(&conn->c_laddr)) { |
109 | SOCK_STREAM, IPPROTO_TCP, &sock); | 114 | ret = sock_create_kern(rds_conn_net(conn), PF_INET, |
115 | SOCK_STREAM, IPPROTO_TCP, &sock); | ||
116 | isv6 = false; | ||
117 | } else { | ||
118 | ret = sock_create_kern(rds_conn_net(conn), PF_INET6, | ||
119 | SOCK_STREAM, IPPROTO_TCP, &sock); | ||
120 | isv6 = true; | ||
121 | } | ||
122 | |||
110 | if (ret < 0) | 123 | if (ret < 0) |
111 | goto out; | 124 | goto out; |
112 | 125 | ||
113 | rds_tcp_tune(sock); | 126 | rds_tcp_tune(sock); |
114 | 127 | ||
115 | src.sin_family = AF_INET; | 128 | if (isv6) { |
116 | src.sin_addr.s_addr = (__force u32)conn->c_laddr; | 129 | sin6.sin6_family = AF_INET6; |
117 | src.sin_port = (__force u16)htons(0); | 130 | sin6.sin6_addr = conn->c_laddr; |
131 | sin6.sin6_port = 0; | ||
132 | sin6.sin6_flowinfo = 0; | ||
133 | sin6.sin6_scope_id = conn->c_dev_if; | ||
134 | addr = (struct sockaddr *)&sin6; | ||
135 | addrlen = sizeof(sin6); | ||
136 | } else { | ||
137 | sin.sin_family = AF_INET; | ||
138 | sin.sin_addr.s_addr = conn->c_laddr.s6_addr32[3]; | ||
139 | sin.sin_port = 0; | ||
140 | addr = (struct sockaddr *)&sin; | ||
141 | addrlen = sizeof(sin); | ||
142 | } | ||
118 | 143 | ||
119 | ret = sock->ops->bind(sock, (struct sockaddr *)&src, sizeof(src)); | 144 | ret = sock->ops->bind(sock, addr, addrlen); |
120 | if (ret) { | 145 | if (ret) { |
121 | rdsdebug("bind failed with %d at address %pI4\n", | 146 | rdsdebug("bind failed with %d at address %pI6c\n", |
122 | ret, &conn->c_laddr); | 147 | ret, &conn->c_laddr); |
123 | goto out; | 148 | goto out; |
124 | } | 149 | } |
125 | 150 | ||
126 | dest.sin_family = AF_INET; | 151 | if (isv6) { |
127 | dest.sin_addr.s_addr = (__force u32)conn->c_faddr; | 152 | sin6.sin6_family = AF_INET6; |
128 | dest.sin_port = (__force u16)htons(RDS_TCP_PORT); | 153 | sin6.sin6_addr = conn->c_faddr; |
154 | sin6.sin6_port = htons(RDS_TCP_PORT); | ||
155 | sin6.sin6_flowinfo = 0; | ||
156 | sin6.sin6_scope_id = conn->c_dev_if; | ||
157 | addr = (struct sockaddr *)&sin6; | ||
158 | addrlen = sizeof(sin6); | ||
159 | } else { | ||
160 | sin.sin_family = AF_INET; | ||
161 | sin.sin_addr.s_addr = conn->c_faddr.s6_addr32[3]; | ||
162 | sin.sin_port = htons(RDS_TCP_PORT); | ||
163 | addr = (struct sockaddr *)&sin; | ||
164 | addrlen = sizeof(sin); | ||
165 | } | ||
129 | 166 | ||
130 | /* | 167 | /* |
131 | * once we call connect() we can start getting callbacks and they | 168 | * once we call connect() we can start getting callbacks and they |
132 | * own the socket | 169 | * own the socket |
133 | */ | 170 | */ |
134 | rds_tcp_set_callbacks(sock, cp); | 171 | rds_tcp_set_callbacks(sock, cp); |
135 | ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest), | 172 | ret = sock->ops->connect(sock, addr, addrlen, O_NONBLOCK); |
136 | O_NONBLOCK); | ||
137 | 173 | ||
138 | rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret); | 174 | rdsdebug("connect to address %pI6c returned %d\n", &conn->c_faddr, ret); |
139 | if (ret == -EINPROGRESS) | 175 | if (ret == -EINPROGRESS) |
140 | ret = 0; | 176 | ret = 0; |
141 | if (ret == 0) { | 177 | if (ret == 0) { |
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 22571189f21e..c12203f646da 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006, 2018 Oracle. All rights reserved. | 2 | * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -83,13 +83,12 @@ static | |||
83 | struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn) | 83 | struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn) |
84 | { | 84 | { |
85 | int i; | 85 | int i; |
86 | bool peer_is_smaller = IS_CANONICAL(conn->c_faddr, conn->c_laddr); | ||
87 | int npaths = max_t(int, 1, conn->c_npaths); | 86 | int npaths = max_t(int, 1, conn->c_npaths); |
88 | 87 | ||
89 | /* for mprds, all paths MUST be initiated by the peer | 88 | /* for mprds, all paths MUST be initiated by the peer |
90 | * with the smaller address. | 89 | * with the smaller address. |
91 | */ | 90 | */ |
92 | if (!peer_is_smaller) { | 91 | if (rds_addr_cmp(&conn->c_faddr, &conn->c_laddr) >= 0) { |
93 | /* Make sure we initiate at least one path if this | 92 | /* Make sure we initiate at least one path if this |
94 | * has not already been done; rds_start_mprds() will | 93 | * has not already been done; rds_start_mprds() will |
95 | * take care of additional paths, if necessary. | 94 | * take care of additional paths, if necessary. |
@@ -132,6 +131,11 @@ int rds_tcp_accept_one(struct socket *sock) | |||
132 | struct rds_tcp_connection *rs_tcp = NULL; | 131 | struct rds_tcp_connection *rs_tcp = NULL; |
133 | int conn_state; | 132 | int conn_state; |
134 | struct rds_conn_path *cp; | 133 | struct rds_conn_path *cp; |
134 | struct in6_addr *my_addr, *peer_addr; | ||
135 | #if !IS_ENABLED(CONFIG_IPV6) | ||
136 | struct in6_addr saddr, daddr; | ||
137 | #endif | ||
138 | int dev_if = 0; | ||
135 | 139 | ||
136 | if (!sock) /* module unload or netns delete in progress */ | 140 | if (!sock) /* module unload or netns delete in progress */ |
137 | return -ENETUNREACH; | 141 | return -ENETUNREACH; |
@@ -164,13 +168,40 @@ int rds_tcp_accept_one(struct socket *sock) | |||
164 | 168 | ||
165 | inet = inet_sk(new_sock->sk); | 169 | inet = inet_sk(new_sock->sk); |
166 | 170 | ||
167 | rdsdebug("accepted tcp %pI4:%u -> %pI4:%u\n", | 171 | #if IS_ENABLED(CONFIG_IPV6) |
168 | &inet->inet_saddr, ntohs(inet->inet_sport), | 172 | my_addr = &new_sock->sk->sk_v6_rcv_saddr; |
169 | &inet->inet_daddr, ntohs(inet->inet_dport)); | 173 | peer_addr = &new_sock->sk->sk_v6_daddr; |
174 | #else | ||
175 | ipv6_addr_set_v4mapped(inet->inet_saddr, &saddr); | ||
176 | ipv6_addr_set_v4mapped(inet->inet_daddr, &daddr); | ||
177 | my_addr = &saddr; | ||
178 | peer_addr = &daddr; | ||
179 | #endif | ||
180 | rdsdebug("accepted family %d tcp %pI6c:%u -> %pI6c:%u\n", | ||
181 | sock->sk->sk_family, | ||
182 | my_addr, ntohs(inet->inet_sport), | ||
183 | peer_addr, ntohs(inet->inet_dport)); | ||
184 | |||
185 | #if IS_ENABLED(CONFIG_IPV6) | ||
186 | /* sk_bound_dev_if is not set if the peer address is not link local | ||
187 | * address. In this case, it happens that mcast_oif is set. So | ||
188 | * just use it. | ||
189 | */ | ||
190 | if ((ipv6_addr_type(my_addr) & IPV6_ADDR_LINKLOCAL) && | ||
191 | !(ipv6_addr_type(peer_addr) & IPV6_ADDR_LINKLOCAL)) { | ||
192 | struct ipv6_pinfo *inet6; | ||
193 | |||
194 | inet6 = inet6_sk(new_sock->sk); | ||
195 | dev_if = inet6->mcast_oif; | ||
196 | } else { | ||
197 | dev_if = new_sock->sk->sk_bound_dev_if; | ||
198 | } | ||
199 | #endif | ||
170 | 200 | ||
171 | conn = rds_conn_create(sock_net(sock->sk), | 201 | conn = rds_conn_create(sock_net(sock->sk), |
172 | inet->inet_saddr, inet->inet_daddr, | 202 | my_addr, peer_addr, |
173 | &rds_tcp_transport, GFP_KERNEL); | 203 | &rds_tcp_transport, GFP_KERNEL, dev_if); |
204 | |||
174 | if (IS_ERR(conn)) { | 205 | if (IS_ERR(conn)) { |
175 | ret = PTR_ERR(conn); | 206 | ret = PTR_ERR(conn); |
176 | goto out; | 207 | goto out; |
@@ -254,15 +285,22 @@ out: | |||
254 | ready(sk); | 285 | ready(sk); |
255 | } | 286 | } |
256 | 287 | ||
257 | struct socket *rds_tcp_listen_init(struct net *net) | 288 | struct socket *rds_tcp_listen_init(struct net *net, bool isv6) |
258 | { | 289 | { |
259 | struct sockaddr_in sin; | ||
260 | struct socket *sock = NULL; | 290 | struct socket *sock = NULL; |
291 | struct sockaddr_storage ss; | ||
292 | struct sockaddr_in6 *sin6; | ||
293 | struct sockaddr_in *sin; | ||
294 | int addr_len; | ||
261 | int ret; | 295 | int ret; |
262 | 296 | ||
263 | ret = sock_create_kern(net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); | 297 | ret = sock_create_kern(net, isv6 ? PF_INET6 : PF_INET, SOCK_STREAM, |
264 | if (ret < 0) | 298 | IPPROTO_TCP, &sock); |
299 | if (ret < 0) { | ||
300 | rdsdebug("could not create %s listener socket: %d\n", | ||
301 | isv6 ? "IPv6" : "IPv4", ret); | ||
265 | goto out; | 302 | goto out; |
303 | } | ||
266 | 304 | ||
267 | sock->sk->sk_reuse = SK_CAN_REUSE; | 305 | sock->sk->sk_reuse = SK_CAN_REUSE; |
268 | rds_tcp_nonagle(sock); | 306 | rds_tcp_nonagle(sock); |
@@ -272,13 +310,28 @@ struct socket *rds_tcp_listen_init(struct net *net) | |||
272 | sock->sk->sk_data_ready = rds_tcp_listen_data_ready; | 310 | sock->sk->sk_data_ready = rds_tcp_listen_data_ready; |
273 | write_unlock_bh(&sock->sk->sk_callback_lock); | 311 | write_unlock_bh(&sock->sk->sk_callback_lock); |
274 | 312 | ||
275 | sin.sin_family = PF_INET; | 313 | if (isv6) { |
276 | sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY); | 314 | sin6 = (struct sockaddr_in6 *)&ss; |
277 | sin.sin_port = (__force u16)htons(RDS_TCP_PORT); | 315 | sin6->sin6_family = PF_INET6; |
316 | sin6->sin6_addr = in6addr_any; | ||
317 | sin6->sin6_port = (__force u16)htons(RDS_TCP_PORT); | ||
318 | sin6->sin6_scope_id = 0; | ||
319 | sin6->sin6_flowinfo = 0; | ||
320 | addr_len = sizeof(*sin6); | ||
321 | } else { | ||
322 | sin = (struct sockaddr_in *)&ss; | ||
323 | sin->sin_family = PF_INET; | ||
324 | sin->sin_addr.s_addr = INADDR_ANY; | ||
325 | sin->sin_port = (__force u16)htons(RDS_TCP_PORT); | ||
326 | addr_len = sizeof(*sin); | ||
327 | } | ||
278 | 328 | ||
279 | ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); | 329 | ret = sock->ops->bind(sock, (struct sockaddr *)&ss, addr_len); |
280 | if (ret < 0) | 330 | if (ret < 0) { |
331 | rdsdebug("could not bind %s listener socket: %d\n", | ||
332 | isv6 ? "IPv6" : "IPv4", ret); | ||
281 | goto out; | 333 | goto out; |
334 | } | ||
282 | 335 | ||
283 | ret = sock->ops->listen(sock, 64); | 336 | ret = sock->ops->listen(sock, 64); |
284 | if (ret < 0) | 337 | if (ret < 0) |
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index b9fbd2ee74ef..42c5ff1eda95 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | 2 | * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -179,7 +179,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, | |||
179 | tc->t_tinc = tinc; | 179 | tc->t_tinc = tinc; |
180 | rdsdebug("alloced tinc %p\n", tinc); | 180 | rdsdebug("alloced tinc %p\n", tinc); |
181 | rds_inc_path_init(&tinc->ti_inc, cp, | 181 | rds_inc_path_init(&tinc->ti_inc, cp, |
182 | cp->cp_conn->c_faddr); | 182 | &cp->cp_conn->c_faddr); |
183 | tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] = | 183 | tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] = |
184 | local_clock(); | 184 | local_clock(); |
185 | 185 | ||
@@ -239,8 +239,9 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, | |||
239 | if (tinc->ti_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) | 239 | if (tinc->ti_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) |
240 | rds_tcp_cong_recv(conn, tinc); | 240 | rds_tcp_cong_recv(conn, tinc); |
241 | else | 241 | else |
242 | rds_recv_incoming(conn, conn->c_faddr, | 242 | rds_recv_incoming(conn, &conn->c_faddr, |
243 | conn->c_laddr, &tinc->ti_inc, | 243 | &conn->c_laddr, |
244 | &tinc->ti_inc, | ||
244 | arg->gfp); | 245 | arg->gfp); |
245 | 246 | ||
246 | tc->t_tinc_hdr_rem = sizeof(struct rds_header); | 247 | tc->t_tinc_hdr_rem = sizeof(struct rds_header); |
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 7df869d37afd..78a2554a4497 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | 2 | * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -153,7 +153,7 @@ out: | |||
153 | * an incoming RST. | 153 | * an incoming RST. |
154 | */ | 154 | */ |
155 | if (rds_conn_path_up(cp)) { | 155 | if (rds_conn_path_up(cp)) { |
156 | pr_warn("RDS/tcp: send to %pI4 on cp [%d]" | 156 | pr_warn("RDS/tcp: send to %pI6c on cp [%d]" |
157 | "returned %d, " | 157 | "returned %d, " |
158 | "disconnecting and reconnecting\n", | 158 | "disconnecting and reconnecting\n", |
159 | &conn->c_faddr, cp->cp_index, ret); | 159 | &conn->c_faddr, cp->cp_index, ret); |
diff --git a/net/rds/threads.c b/net/rds/threads.c index c52861d77a59..e64f9e4c3cda 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | 2 | * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -82,8 +82,8 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr) | |||
82 | return; | 82 | return; |
83 | } | 83 | } |
84 | 84 | ||
85 | rdsdebug("conn %p for %pI4 to %pI4 complete\n", | 85 | rdsdebug("conn %p for %pI6c to %pI6c complete\n", |
86 | cp->cp_conn, &cp->cp_conn->c_laddr, &cp->cp_conn->c_faddr); | 86 | cp->cp_conn, &cp->cp_conn->c_laddr, &cp->cp_conn->c_faddr); |
87 | 87 | ||
88 | cp->cp_reconnect_jiffies = 0; | 88 | cp->cp_reconnect_jiffies = 0; |
89 | set_bit(0, &cp->cp_conn->c_map_queued); | 89 | set_bit(0, &cp->cp_conn->c_map_queued); |
@@ -125,13 +125,13 @@ void rds_queue_reconnect(struct rds_conn_path *cp) | |||
125 | unsigned long rand; | 125 | unsigned long rand; |
126 | struct rds_connection *conn = cp->cp_conn; | 126 | struct rds_connection *conn = cp->cp_conn; |
127 | 127 | ||
128 | rdsdebug("conn %p for %pI4 to %pI4 reconnect jiffies %lu\n", | 128 | rdsdebug("conn %p for %pI6c to %pI6c reconnect jiffies %lu\n", |
129 | conn, &conn->c_laddr, &conn->c_faddr, | 129 | conn, &conn->c_laddr, &conn->c_faddr, |
130 | cp->cp_reconnect_jiffies); | 130 | cp->cp_reconnect_jiffies); |
131 | 131 | ||
132 | /* let peer with smaller addr initiate reconnect, to avoid duels */ | 132 | /* let peer with smaller addr initiate reconnect, to avoid duels */ |
133 | if (conn->c_trans->t_type == RDS_TRANS_TCP && | 133 | if (conn->c_trans->t_type == RDS_TRANS_TCP && |
134 | !IS_CANONICAL(conn->c_laddr, conn->c_faddr)) | 134 | rds_addr_cmp(&conn->c_laddr, &conn->c_faddr) >= 0) |
135 | return; | 135 | return; |
136 | 136 | ||
137 | set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); | 137 | set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); |
@@ -145,7 +145,7 @@ void rds_queue_reconnect(struct rds_conn_path *cp) | |||
145 | } | 145 | } |
146 | 146 | ||
147 | get_random_bytes(&rand, sizeof(rand)); | 147 | get_random_bytes(&rand, sizeof(rand)); |
148 | rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n", | 148 | rdsdebug("%lu delay %lu ceil conn %p for %pI6c -> %pI6c\n", |
149 | rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies, | 149 | rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies, |
150 | conn, &conn->c_laddr, &conn->c_faddr); | 150 | conn, &conn->c_laddr, &conn->c_faddr); |
151 | rcu_read_lock(); | 151 | rcu_read_lock(); |
@@ -167,14 +167,14 @@ void rds_connect_worker(struct work_struct *work) | |||
167 | int ret; | 167 | int ret; |
168 | 168 | ||
169 | if (cp->cp_index > 0 && | 169 | if (cp->cp_index > 0 && |
170 | !IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr)) | 170 | rds_addr_cmp(&cp->cp_conn->c_laddr, &cp->cp_conn->c_faddr) >= 0) |
171 | return; | 171 | return; |
172 | clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); | 172 | clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); |
173 | ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING); | 173 | ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING); |
174 | if (ret) { | 174 | if (ret) { |
175 | ret = conn->c_trans->conn_path_connect(cp); | 175 | ret = conn->c_trans->conn_path_connect(cp); |
176 | rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n", | 176 | rdsdebug("conn %p for %pI6c to %pI6c dispatched, ret %d\n", |
177 | conn, &conn->c_laddr, &conn->c_faddr, ret); | 177 | conn, &conn->c_laddr, &conn->c_faddr, ret); |
178 | 178 | ||
179 | if (ret) { | 179 | if (ret) { |
180 | if (rds_conn_path_transition(cp, | 180 | if (rds_conn_path_transition(cp, |
@@ -259,3 +259,50 @@ int rds_threads_init(void) | |||
259 | 259 | ||
260 | return 0; | 260 | return 0; |
261 | } | 261 | } |
262 | |||
263 | /* Compare two IPv6 addresses. Return 0 if the two addresses are equal. | ||
264 | * Return 1 if the first is greater. Return -1 if the second is greater. | ||
265 | */ | ||
266 | int rds_addr_cmp(const struct in6_addr *addr1, | ||
267 | const struct in6_addr *addr2) | ||
268 | { | ||
269 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 | ||
270 | const __be64 *a1, *a2; | ||
271 | u64 x, y; | ||
272 | |||
273 | a1 = (__be64 *)addr1; | ||
274 | a2 = (__be64 *)addr2; | ||
275 | |||
276 | if (*a1 != *a2) { | ||
277 | if (be64_to_cpu(*a1) < be64_to_cpu(*a2)) | ||
278 | return -1; | ||
279 | else | ||
280 | return 1; | ||
281 | } else { | ||
282 | x = be64_to_cpu(*++a1); | ||
283 | y = be64_to_cpu(*++a2); | ||
284 | if (x < y) | ||
285 | return -1; | ||
286 | else if (x > y) | ||
287 | return 1; | ||
288 | else | ||
289 | return 0; | ||
290 | } | ||
291 | #else | ||
292 | u32 a, b; | ||
293 | int i; | ||
294 | |||
295 | for (i = 0; i < 4; i++) { | ||
296 | if (addr1->s6_addr32[i] != addr2->s6_addr32[i]) { | ||
297 | a = ntohl(addr1->s6_addr32[i]); | ||
298 | b = ntohl(addr2->s6_addr32[i]); | ||
299 | if (a < b) | ||
300 | return -1; | ||
301 | else if (a > b) | ||
302 | return 1; | ||
303 | } | ||
304 | } | ||
305 | return 0; | ||
306 | #endif | ||
307 | } | ||
308 | EXPORT_SYMBOL_GPL(rds_addr_cmp); | ||
diff --git a/net/rds/transport.c b/net/rds/transport.c index 0b188dd0a344..46f709a4b577 100644 --- a/net/rds/transport.c +++ b/net/rds/transport.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | 2 | * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. |
3 | * | 3 | * |
4 | * This software is available to you under a choice of one of two | 4 | * This software is available to you under a choice of one of two |
5 | * licenses. You may choose to be licensed under the terms of the GNU | 5 | * licenses. You may choose to be licensed under the terms of the GNU |
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/kernel.h> | 33 | #include <linux/kernel.h> |
34 | #include <linux/module.h> | 34 | #include <linux/module.h> |
35 | #include <linux/in.h> | 35 | #include <linux/in.h> |
36 | #include <linux/ipv6.h> | ||
36 | 37 | ||
37 | #include "rds.h" | 38 | #include "rds.h" |
38 | #include "loop.h" | 39 | #include "loop.h" |
@@ -75,20 +76,26 @@ void rds_trans_put(struct rds_transport *trans) | |||
75 | module_put(trans->t_owner); | 76 | module_put(trans->t_owner); |
76 | } | 77 | } |
77 | 78 | ||
78 | struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr) | 79 | struct rds_transport *rds_trans_get_preferred(struct net *net, |
80 | const struct in6_addr *addr, | ||
81 | __u32 scope_id) | ||
79 | { | 82 | { |
80 | struct rds_transport *ret = NULL; | 83 | struct rds_transport *ret = NULL; |
81 | struct rds_transport *trans; | 84 | struct rds_transport *trans; |
82 | unsigned int i; | 85 | unsigned int i; |
83 | 86 | ||
84 | if (IN_LOOPBACK(ntohl(addr))) | 87 | if (ipv6_addr_v4mapped(addr)) { |
88 | if (*(u_int8_t *)&addr->s6_addr32[3] == IN_LOOPBACKNET) | ||
89 | return &rds_loop_transport; | ||
90 | } else if (ipv6_addr_loopback(addr)) { | ||
85 | return &rds_loop_transport; | 91 | return &rds_loop_transport; |
92 | } | ||
86 | 93 | ||
87 | down_read(&rds_trans_sem); | 94 | down_read(&rds_trans_sem); |
88 | for (i = 0; i < RDS_TRANS_COUNT; i++) { | 95 | for (i = 0; i < RDS_TRANS_COUNT; i++) { |
89 | trans = transports[i]; | 96 | trans = transports[i]; |
90 | 97 | ||
91 | if (trans && (trans->laddr_check(net, addr) == 0) && | 98 | if (trans && (trans->laddr_check(net, addr, scope_id) == 0) && |
92 | (!trans->t_owner || try_module_get(trans->t_owner))) { | 99 | (!trans->t_owner || try_module_get(trans->t_owner))) { |
93 | ret = trans; | 100 | ret = trans; |
94 | break; | 101 | break; |
@@ -152,4 +159,3 @@ unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, | |||
152 | 159 | ||
153 | return total; | 160 | return total; |
154 | } | 161 | } |
155 | |||