aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTrond Myklebust <trond.myklebust@primarydata.com>2015-02-08 15:00:06 -0500
committerTrond Myklebust <trond.myklebust@primarydata.com>2015-02-08 18:52:11 -0500
commit4dda9c8a5e34773b290c6b5938ccb36e7fcdf35c (patch)
treecbbed61fb38c38f281f7a5775d4a793e4ebe401d
parentbc3203cdca962bcfaf2d59a5bc59b9f0171c7df4 (diff)
SUNRPC: Set SO_REUSEPORT socket option for TCP connections
When using TCP, we need the ability to reuse port numbers after a disconnection, so that the NFSv3 server knows that we're the same client. Currently we use a hack to work around the TCP socket's TIME_WAIT: we send an RST instead of closing, which doesn't always work... The SO_REUSEPORT option added in Linux 3.9 allows us to bind multiple TCP connections to the same source address+port combination, and thus to use ordinary TCP close() instead of the current hack. Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
-rw-r--r--net/sunrpc/xprtsock.c53
1 files changed, 49 insertions, 4 deletions
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 87ce7e8bb8dc..484c5040436a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1667,6 +1667,39 @@ static unsigned short xs_get_random_port(void)
1667} 1667}
1668 1668
1669/** 1669/**
1670 * xs_set_reuseaddr_port - set the socket's port and address reuse options
1671 * @sock: socket
1672 *
1673 * Note that this function has to be called on all sockets that share the
1674 * same port, and it must be called before binding.
1675 */
1676static void xs_sock_set_reuseport(struct socket *sock)
1677{
1678 char opt = 1;
1679
1680 kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
1681}
1682
1683static unsigned short xs_sock_getport(struct socket *sock)
1684{
1685 struct sockaddr_storage buf;
1686 int buflen;
1687 unsigned short port = 0;
1688
1689 if (kernel_getsockname(sock, (struct sockaddr *)&buf, &buflen) < 0)
1690 goto out;
1691 switch (buf.ss_family) {
1692 case AF_INET6:
1693 port = ntohs(((struct sockaddr_in6 *)&buf)->sin6_port);
1694 break;
1695 case AF_INET:
1696 port = ntohs(((struct sockaddr_in *)&buf)->sin_port);
1697 }
1698out:
1699 return port;
1700}
1701
1702/**
1670 * xs_set_port - reset the port number in the remote endpoint address 1703 * xs_set_port - reset the port number in the remote endpoint address
1671 * @xprt: generic transport 1704 * @xprt: generic transport
1672 * @port: new port number 1705 * @port: new port number
@@ -1680,6 +1713,12 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
1680 xs_update_peer_port(xprt); 1713 xs_update_peer_port(xprt);
1681} 1714}
1682 1715
1716static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock)
1717{
1718 if (transport->srcport == 0)
1719 transport->srcport = xs_sock_getport(sock);
1720}
1721
1683static unsigned short xs_get_srcport(struct sock_xprt *transport) 1722static unsigned short xs_get_srcport(struct sock_xprt *transport)
1684{ 1723{
1685 unsigned short port = transport->srcport; 1724 unsigned short port = transport->srcport;
@@ -1833,7 +1872,8 @@ static void xs_dummy_setup_socket(struct work_struct *work)
1833} 1872}
1834 1873
1835static struct socket *xs_create_sock(struct rpc_xprt *xprt, 1874static struct socket *xs_create_sock(struct rpc_xprt *xprt,
1836 struct sock_xprt *transport, int family, int type, int protocol) 1875 struct sock_xprt *transport, int family, int type,
1876 int protocol, bool reuseport)
1837{ 1877{
1838 struct socket *sock; 1878 struct socket *sock;
1839 int err; 1879 int err;
@@ -1846,6 +1886,9 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
1846 } 1886 }
1847 xs_reclassify_socket(family, sock); 1887 xs_reclassify_socket(family, sock);
1848 1888
1889 if (reuseport)
1890 xs_sock_set_reuseport(sock);
1891
1849 err = xs_bind(transport, sock); 1892 err = xs_bind(transport, sock);
1850 if (err) { 1893 if (err) {
1851 sock_release(sock); 1894 sock_release(sock);
@@ -2047,7 +2090,8 @@ static void xs_udp_setup_socket(struct work_struct *work)
2047 /* Start by resetting any existing state */ 2090 /* Start by resetting any existing state */
2048 xs_reset_transport(transport); 2091 xs_reset_transport(transport);
2049 sock = xs_create_sock(xprt, transport, 2092 sock = xs_create_sock(xprt, transport,
2050 xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP); 2093 xs_addr(xprt)->sa_family, SOCK_DGRAM,
2094 IPPROTO_UDP, false);
2051 if (IS_ERR(sock)) 2095 if (IS_ERR(sock))
2052 goto out; 2096 goto out;
2053 2097
@@ -2149,7 +2193,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
2149 sk->sk_allocation = GFP_ATOMIC; 2193 sk->sk_allocation = GFP_ATOMIC;
2150 2194
2151 /* socket options */ 2195 /* socket options */
2152 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
2153 sock_reset_flag(sk, SOCK_LINGER); 2196 sock_reset_flag(sk, SOCK_LINGER);
2154 tcp_sk(sk)->linger2 = 0; 2197 tcp_sk(sk)->linger2 = 0;
2155 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; 2198 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
@@ -2174,6 +2217,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
2174 ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); 2217 ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
2175 switch (ret) { 2218 switch (ret) {
2176 case 0: 2219 case 0:
2220 xs_set_srcport(transport, sock);
2177 case -EINPROGRESS: 2221 case -EINPROGRESS:
2178 /* SYN_SENT! */ 2222 /* SYN_SENT! */
2179 if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) 2223 if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
@@ -2202,7 +2246,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
2202 if (!sock) { 2246 if (!sock) {
2203 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); 2247 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
2204 sock = xs_create_sock(xprt, transport, 2248 sock = xs_create_sock(xprt, transport,
2205 xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP); 2249 xs_addr(xprt)->sa_family, SOCK_STREAM,
2250 IPPROTO_TCP, true);
2206 if (IS_ERR(sock)) { 2251 if (IS_ERR(sock)) {
2207 status = PTR_ERR(sock); 2252 status = PTR_ERR(sock);
2208 goto out; 2253 goto out;