diff options
author | Trond Myklebust <trond.myklebust@primarydata.com> | 2015-02-08 15:00:06 -0500 |
---|---|---|
committer | Trond Myklebust <trond.myklebust@primarydata.com> | 2015-02-08 18:52:11 -0500 |
commit | 4dda9c8a5e34773b290c6b5938ccb36e7fcdf35c (patch) | |
tree | cbbed61fb38c38f281f7a5775d4a793e4ebe401d | |
parent | bc3203cdca962bcfaf2d59a5bc59b9f0171c7df4 (diff) |
SUNRPC: Set SO_REUSEPORT socket option for TCP connections
When using TCP, we need the ability to reuse port numbers after
a disconnection, so that the NFSv3 server knows that we're the same
client. Currently we use a hack to work around the TCP socket's
TIME_WAIT: we send an RST instead of closing, which doesn't
always work...
The SO_REUSEPORT option added in Linux 3.9 allows us to bind multiple
TCP connections to the same source address+port combination, and thus
to use ordinary TCP close() instead of the current hack.
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
-rw-r--r-- | net/sunrpc/xprtsock.c | 53 |
1 files changed, 49 insertions, 4 deletions
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 87ce7e8bb8dc..484c5040436a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -1667,6 +1667,39 @@ static unsigned short xs_get_random_port(void) | |||
1667 | } | 1667 | } |
1668 | 1668 | ||
1669 | /** | 1669 | /** |
1670 | * xs_set_reuseaddr_port - set the socket's port and address reuse options | ||
1671 | * @sock: socket | ||
1672 | * | ||
1673 | * Note that this function has to be called on all sockets that share the | ||
1674 | * same port, and it must be called before binding. | ||
1675 | */ | ||
1676 | static void xs_sock_set_reuseport(struct socket *sock) | ||
1677 | { | ||
1678 | char opt = 1; | ||
1679 | |||
1680 | kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); | ||
1681 | } | ||
1682 | |||
1683 | static unsigned short xs_sock_getport(struct socket *sock) | ||
1684 | { | ||
1685 | struct sockaddr_storage buf; | ||
1686 | int buflen; | ||
1687 | unsigned short port = 0; | ||
1688 | |||
1689 | if (kernel_getsockname(sock, (struct sockaddr *)&buf, &buflen) < 0) | ||
1690 | goto out; | ||
1691 | switch (buf.ss_family) { | ||
1692 | case AF_INET6: | ||
1693 | port = ntohs(((struct sockaddr_in6 *)&buf)->sin6_port); | ||
1694 | break; | ||
1695 | case AF_INET: | ||
1696 | port = ntohs(((struct sockaddr_in *)&buf)->sin_port); | ||
1697 | } | ||
1698 | out: | ||
1699 | return port; | ||
1700 | } | ||
1701 | |||
1702 | /** | ||
1670 | * xs_set_port - reset the port number in the remote endpoint address | 1703 | * xs_set_port - reset the port number in the remote endpoint address |
1671 | * @xprt: generic transport | 1704 | * @xprt: generic transport |
1672 | * @port: new port number | 1705 | * @port: new port number |
@@ -1680,6 +1713,12 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) | |||
1680 | xs_update_peer_port(xprt); | 1713 | xs_update_peer_port(xprt); |
1681 | } | 1714 | } |
1682 | 1715 | ||
1716 | static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock) | ||
1717 | { | ||
1718 | if (transport->srcport == 0) | ||
1719 | transport->srcport = xs_sock_getport(sock); | ||
1720 | } | ||
1721 | |||
1683 | static unsigned short xs_get_srcport(struct sock_xprt *transport) | 1722 | static unsigned short xs_get_srcport(struct sock_xprt *transport) |
1684 | { | 1723 | { |
1685 | unsigned short port = transport->srcport; | 1724 | unsigned short port = transport->srcport; |
@@ -1833,7 +1872,8 @@ static void xs_dummy_setup_socket(struct work_struct *work) | |||
1833 | } | 1872 | } |
1834 | 1873 | ||
1835 | static struct socket *xs_create_sock(struct rpc_xprt *xprt, | 1874 | static struct socket *xs_create_sock(struct rpc_xprt *xprt, |
1836 | struct sock_xprt *transport, int family, int type, int protocol) | 1875 | struct sock_xprt *transport, int family, int type, |
1876 | int protocol, bool reuseport) | ||
1837 | { | 1877 | { |
1838 | struct socket *sock; | 1878 | struct socket *sock; |
1839 | int err; | 1879 | int err; |
@@ -1846,6 +1886,9 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, | |||
1846 | } | 1886 | } |
1847 | xs_reclassify_socket(family, sock); | 1887 | xs_reclassify_socket(family, sock); |
1848 | 1888 | ||
1889 | if (reuseport) | ||
1890 | xs_sock_set_reuseport(sock); | ||
1891 | |||
1849 | err = xs_bind(transport, sock); | 1892 | err = xs_bind(transport, sock); |
1850 | if (err) { | 1893 | if (err) { |
1851 | sock_release(sock); | 1894 | sock_release(sock); |
@@ -2047,7 +2090,8 @@ static void xs_udp_setup_socket(struct work_struct *work) | |||
2047 | /* Start by resetting any existing state */ | 2090 | /* Start by resetting any existing state */ |
2048 | xs_reset_transport(transport); | 2091 | xs_reset_transport(transport); |
2049 | sock = xs_create_sock(xprt, transport, | 2092 | sock = xs_create_sock(xprt, transport, |
2050 | xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP); | 2093 | xs_addr(xprt)->sa_family, SOCK_DGRAM, |
2094 | IPPROTO_UDP, false); | ||
2051 | if (IS_ERR(sock)) | 2095 | if (IS_ERR(sock)) |
2052 | goto out; | 2096 | goto out; |
2053 | 2097 | ||
@@ -2149,7 +2193,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) | |||
2149 | sk->sk_allocation = GFP_ATOMIC; | 2193 | sk->sk_allocation = GFP_ATOMIC; |
2150 | 2194 | ||
2151 | /* socket options */ | 2195 | /* socket options */ |
2152 | sk->sk_userlocks |= SOCK_BINDPORT_LOCK; | ||
2153 | sock_reset_flag(sk, SOCK_LINGER); | 2196 | sock_reset_flag(sk, SOCK_LINGER); |
2154 | tcp_sk(sk)->linger2 = 0; | 2197 | tcp_sk(sk)->linger2 = 0; |
2155 | tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; | 2198 | tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; |
@@ -2174,6 +2217,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) | |||
2174 | ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); | 2217 | ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); |
2175 | switch (ret) { | 2218 | switch (ret) { |
2176 | case 0: | 2219 | case 0: |
2220 | xs_set_srcport(transport, sock); | ||
2177 | case -EINPROGRESS: | 2221 | case -EINPROGRESS: |
2178 | /* SYN_SENT! */ | 2222 | /* SYN_SENT! */ |
2179 | if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) | 2223 | if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) |
@@ -2202,7 +2246,8 @@ static void xs_tcp_setup_socket(struct work_struct *work) | |||
2202 | if (!sock) { | 2246 | if (!sock) { |
2203 | clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); | 2247 | clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); |
2204 | sock = xs_create_sock(xprt, transport, | 2248 | sock = xs_create_sock(xprt, transport, |
2205 | xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP); | 2249 | xs_addr(xprt)->sa_family, SOCK_STREAM, |
2250 | IPPROTO_TCP, true); | ||
2206 | if (IS_ERR(sock)) { | 2251 | if (IS_ERR(sock)) { |
2207 | status = PTR_ERR(sock); | 2252 | status = PTR_ERR(sock); |
2208 | goto out; | 2253 | goto out; |