aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorNeil Brown <neilb@suse.de>2009-09-23 14:36:37 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2009-09-23 14:36:37 -0400
commit61d0a8e6a8049cea246ee7ec19b042d4ff1f6ef6 (patch)
treeee84a7e5260be07af494a70076bb70202ac0428d /net
parent8a6e5deb8a8caa810fef2c525f5dbea2cfe04a47 (diff)
NFS/RPC: fix problems with reestablish_timeout and related code.
[[resending with correct cc: - "vfs.kernel.org" just isn't right!]] xprt->reestablish_timeout is used to cause TCP connection attempts to back off if the connection fails so as not to hammer the network, but to still allow immediate connections when there is no reason to believe there is a problem. It is not used for the first connection (when transport->sock is NULL) but only on reconnects. It is currently set: a/ to 0 when xs_tcp_state_change finds a state of TCP_FIN_WAIT1 on the assumption that the client has closed the connection so the reconnect should be immediate when needed. b/ to at least XS_TCP_INIT_REEST_TO when xs_tcp_state_change detects TCP_CLOSING or TCP_CLOSE_WAIT on the assumption that the server closed the connection so a small delay at least is required. c/ as above when xs_tcp_state_change detects TCP_SYN_SENT, so that it is never 0 while a connection has been attempted, else the doubling will produce 0 and there will be no backoff. d/ to double is value (up to a limit) when delaying a connection, thus providing exponential backoff and e/ to XS_TCP_INIT_REEST_TO in xs_setup_tcp as simple initialisation. So you can see it is highly dependant on xs_tcp_state_change being called as expected. However experimental evidence shows that xs_tcp_state_change does not see all state changes. ("rpcdebug -m rpc trans" can help show what actually happens). Results show: TCP_ESTABLISHED is reported when a connection is made. TCP_SYN_SENT is never reported, so rule 'c' above is never effective. When the server closes the connection, TCP_CLOSE_WAIT and TCP_LAST_ACK *might* be reported, and TCP_CLOSE is always reported. This rule 'b' above will sometimes be effective, but not reliably. When the client closes the connection, it used to result in TCP_FIN_WAIT1, TCP_FIN_WAIT2, TCP_CLOSE. However since commit f75e674 (SUNRPC: Fix the problem of EADDRNOTAVAIL syslog floods on reconnect) we don't see *any* events on client-close. I think this is because xs_restore_old_callbacks is called to disconnect xs_tcp_state_change before the socket is closed. In any case, rule 'a' no longer applies. So all that is left are rule d, which successfully doubles the timeout which is never rest, and rule e which initialises the timeout. Even if the rules worked as expected, there would be a problem because a successful connection does not reset the timeout, so a sequence of events where the server closes the connection (e.g. during failover testing) will cause longer and longer timeouts with no good reason. This patch: - sets reestablish_timeout to 0 in xs_close thus effecting rule 'a' - sets it to 0 in xs_tcp_data_ready to ensure that a successful connection resets the timeout - sets it to at least XS_TCP_INIT_REEST_TO after it is doubled, thus effecting rule c I have not reimplemented rule b and the new version of rule c seems sufficient. I suspect other code in xs_tcp_data_ready needs to be revised as well. For example I don't think connect_cookie is being incremented as often as it should be. Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'net')
-rw-r--r--net/sunrpc/xprtsock.c9
1 files changed, 9 insertions, 0 deletions
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index bee415465754..37c5475ba258 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -773,6 +773,7 @@ static void xs_close(struct rpc_xprt *xprt)
773 dprintk("RPC: xs_close xprt %p\n", xprt); 773 dprintk("RPC: xs_close xprt %p\n", xprt);
774 774
775 xs_reset_transport(transport); 775 xs_reset_transport(transport);
776 xprt->reestablish_timeout = 0;
776 777
777 smp_mb__before_clear_bit(); 778 smp_mb__before_clear_bit();
778 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); 779 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
@@ -1264,6 +1265,12 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes)
1264 if (xprt->shutdown) 1265 if (xprt->shutdown)
1265 goto out; 1266 goto out;
1266 1267
1268 /* Any data means we had a useful conversation, so
1269 * the we don't need to delay the next reconnect
1270 */
1271 if (xprt->reestablish_timeout)
1272 xprt->reestablish_timeout = 0;
1273
1267 /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ 1274 /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
1268 rd_desc.arg.data = xprt; 1275 rd_desc.arg.data = xprt;
1269 do { 1276 do {
@@ -2034,6 +2041,8 @@ static void xs_connect(struct rpc_task *task)
2034 &transport->connect_worker, 2041 &transport->connect_worker,
2035 xprt->reestablish_timeout); 2042 xprt->reestablish_timeout);
2036 xprt->reestablish_timeout <<= 1; 2043 xprt->reestablish_timeout <<= 1;
2044 if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
2045 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
2037 if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) 2046 if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
2038 xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; 2047 xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
2039 } else { 2048 } else {