aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChuck Lever <cel@netapp.com>2005-08-25 19:25:55 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2005-09-23 12:38:53 -0400
commit03bf4b707eee06706c9db343dd5c905b7ee47ed2 (patch)
tree54f89b578758e2bf2650b647ae1c7100c882a757
parent3167e12c0c424f3c323944701615343022d86418 (diff)
[PATCH] RPC: parametrize various transport connect timeouts
Each transport implementation can now set unique bind, connect, reestablishment, and idle timeout values. These are variables, allowing the values to be modified dynamically. This permits exponential backoff of any of these values, for instance. As an example, we implement exponential backoff for the connection reestablishment timeout. Test-plan: Destructive testing (unplugging the network temporarily). Connectathon with UDP and TCP. Signed-off-by: Chuck Lever <cel@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r--fs/nfs/inode.c10
-rw-r--r--include/linux/nfs_fs.h4
-rw-r--r--include/linux/sunrpc/xprt.h29
-rw-r--r--net/sunrpc/clnt.c2
-rw-r--r--net/sunrpc/xprt.c5
-rw-r--r--net/sunrpc/xprtsock.c68
6 files changed, 84 insertions, 34 deletions
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b6a1ca508e6..062911e7ceb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -369,8 +369,8 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned
369 case IPPROTO_TCP: 369 case IPPROTO_TCP:
370 if (!to->to_initval) 370 if (!to->to_initval)
371 to->to_initval = 60 * HZ; 371 to->to_initval = 60 * HZ;
372 if (to->to_initval > RPC_MAX_TCP_TIMEOUT) 372 if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
373 to->to_initval = RPC_MAX_TCP_TIMEOUT; 373 to->to_initval = NFS_MAX_TCP_TIMEOUT;
374 to->to_increment = to->to_initval; 374 to->to_increment = to->to_initval;
375 to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); 375 to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
376 to->to_exponential = 0; 376 to->to_exponential = 0;
@@ -379,9 +379,9 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned
379 default: 379 default:
380 if (!to->to_initval) 380 if (!to->to_initval)
381 to->to_initval = 11 * HZ / 10; 381 to->to_initval = 11 * HZ / 10;
382 if (to->to_initval > RPC_MAX_UDP_TIMEOUT) 382 if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
383 to->to_initval = RPC_MAX_UDP_TIMEOUT; 383 to->to_initval = NFS_MAX_UDP_TIMEOUT;
384 to->to_maxval = RPC_MAX_UDP_TIMEOUT; 384 to->to_maxval = NFS_MAX_UDP_TIMEOUT;
385 to->to_exponential = 1; 385 to->to_exponential = 1;
386 break; 386 break;
387 } 387 }
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 9a6047ff1b2..7bac2785c6e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -41,6 +41,10 @@
41#define NFS_MAX_FILE_IO_BUFFER_SIZE 32768 41#define NFS_MAX_FILE_IO_BUFFER_SIZE 32768
42#define NFS_DEF_FILE_IO_BUFFER_SIZE 4096 42#define NFS_DEF_FILE_IO_BUFFER_SIZE 4096
43 43
44/* Default timeout values */
45#define NFS_MAX_UDP_TIMEOUT (60*HZ)
46#define NFS_MAX_TCP_TIMEOUT (600*HZ)
47
44/* 48/*
45 * superblock magic number for NFS 49 * superblock magic number for NFS
46 */ 50 */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 9d9266cf8a3..2543adf1855 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -22,28 +22,6 @@ extern unsigned int xprt_tcp_slot_table_entries;
22#define RPC_DEF_SLOT_TABLE (16U) 22#define RPC_DEF_SLOT_TABLE (16U)
23#define RPC_MAX_SLOT_TABLE (128U) 23#define RPC_MAX_SLOT_TABLE (128U)
24 24
25/* Default timeout values */
26#define RPC_MAX_UDP_TIMEOUT (60*HZ)
27#define RPC_MAX_TCP_TIMEOUT (600*HZ)
28
29/*
30 * Wait duration for an RPC TCP connection to be established. Solaris
31 * NFS over TCP uses 60 seconds, for example, which is in line with how
32 * long a server takes to reboot.
33 */
34#define RPC_CONNECT_TIMEOUT (60*HZ)
35
36/*
37 * Delay an arbitrary number of seconds before attempting to reconnect
38 * after an error.
39 */
40#define RPC_REESTABLISH_TIMEOUT (15*HZ)
41
42/*
43 * RPC transport idle timeout.
44 */
45#define RPC_IDLE_DISCONNECT_TIMEOUT (5*60*HZ)
46
47/* 25/*
48 * RPC call and reply header size as number of 32bit words (verifier 26 * RPC call and reply header size as number of 32bit words (verifier
49 * size computed separately) 27 * size computed separately)
@@ -182,14 +160,19 @@ struct rpc_xprt {
182 /* 160 /*
183 * Connection of transports 161 * Connection of transports
184 */ 162 */
163 unsigned long connect_timeout,
164 bind_timeout,
165 reestablish_timeout;
185 struct work_struct connect_worker; 166 struct work_struct connect_worker;
186 unsigned short port; 167 unsigned short port;
168
187 /* 169 /*
188 * Disconnection of idle transports 170 * Disconnection of idle transports
189 */ 171 */
190 struct work_struct task_cleanup; 172 struct work_struct task_cleanup;
191 struct timer_list timer; 173 struct timer_list timer;
192 unsigned long last_used; 174 unsigned long last_used,
175 idle_timeout;
193 176
194 /* 177 /*
195 * Send stuff 178 * Send stuff
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index cc1b773a79d..24b44e73f39 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -740,7 +740,7 @@ call_bind(struct rpc_task *task)
740 task->tk_action = call_connect; 740 task->tk_action = call_connect;
741 if (!clnt->cl_port) { 741 if (!clnt->cl_port) {
742 task->tk_action = call_bind_status; 742 task->tk_action = call_bind_status;
743 task->tk_timeout = RPC_CONNECT_TIMEOUT; 743 task->tk_timeout = task->tk_xprt->bind_timeout;
744 rpc_getport(task, clnt); 744 rpc_getport(task, clnt);
745 } 745 }
746} 746}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 0458319a1bd..215be0d0ef6 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -551,7 +551,7 @@ void xprt_connect(struct rpc_task *task)
551 if (task->tk_rqstp) 551 if (task->tk_rqstp)
552 task->tk_rqstp->rq_bytes_sent = 0; 552 task->tk_rqstp->rq_bytes_sent = 0;
553 553
554 task->tk_timeout = RPC_CONNECT_TIMEOUT; 554 task->tk_timeout = xprt->connect_timeout;
555 rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); 555 rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL);
556 xprt->ops->connect(task); 556 xprt->ops->connect(task);
557 } 557 }
@@ -763,7 +763,6 @@ void xprt_transmit(struct rpc_task *task)
763 763
764 switch (status) { 764 switch (status) {
765 case -ECONNREFUSED: 765 case -ECONNREFUSED:
766 task->tk_timeout = RPC_REESTABLISH_TIMEOUT;
767 rpc_sleep_on(&xprt->sending, task, NULL, NULL); 766 rpc_sleep_on(&xprt->sending, task, NULL, NULL);
768 case -EAGAIN: 767 case -EAGAIN:
769 case -ENOTCONN: 768 case -ENOTCONN:
@@ -857,7 +856,7 @@ void xprt_release(struct rpc_task *task)
857 xprt->last_used = jiffies; 856 xprt->last_used = jiffies;
858 if (list_empty(&xprt->recv) && !xprt->shutdown) 857 if (list_empty(&xprt->recv) && !xprt->shutdown)
859 mod_timer(&xprt->timer, 858 mod_timer(&xprt->timer,
860 xprt->last_used + RPC_IDLE_DISCONNECT_TIMEOUT); 859 xprt->last_used + xprt->idle_timeout);
861 spin_unlock_bh(&xprt->transport_lock); 860 spin_unlock_bh(&xprt->transport_lock);
862 task->tk_rqstp = NULL; 861 task->tk_rqstp = NULL;
863 memset(req, 0, sizeof(*req)); /* mark unused */ 862 memset(req, 0, sizeof(*req)); /* mark unused */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 88ac71fcd33..06c2d95484e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -41,6 +41,50 @@
41 */ 41 */
42#define XS_SENDMSG_RETRY (10U) 42#define XS_SENDMSG_RETRY (10U)
43 43
44/*
45 * Time out for an RPC UDP socket connect. UDP socket connects are
46 * synchronous, but we set a timeout anyway in case of resource
47 * exhaustion on the local host.
48 */
49#define XS_UDP_CONN_TO (5U * HZ)
50
51/*
52 * Wait duration for an RPC TCP connection to be established. Solaris
53 * NFS over TCP uses 60 seconds, for example, which is in line with how
54 * long a server takes to reboot.
55 */
56#define XS_TCP_CONN_TO (60U * HZ)
57
58/*
59 * Wait duration for a reply from the RPC portmapper.
60 */
61#define XS_BIND_TO (60U * HZ)
62
63/*
64 * Delay if a UDP socket connect error occurs. This is most likely some
65 * kind of resource problem on the local host.
66 */
67#define XS_UDP_REEST_TO (2U * HZ)
68
69/*
70 * The reestablish timeout allows clients to delay for a bit before attempting
71 * to reconnect to a server that just dropped our connection.
72 *
73 * We implement an exponential backoff when trying to reestablish a TCP
74 * transport connection with the server. Some servers like to drop a TCP
75 * connection when they are overworked, so we start with a short timeout and
76 * increase over time if the server is down or not responding.
77 */
78#define XS_TCP_INIT_REEST_TO (3U * HZ)
79#define XS_TCP_MAX_REEST_TO (5U * 60 * HZ)
80
81/*
82 * TCP idle timeout; client drops the transport socket if it is idle
83 * for this long. Note that we also timeout UDP sockets to prevent
84 * holding port numbers when there is no RPC traffic.
85 */
86#define XS_IDLE_DISC_TO (5U * 60 * HZ)
87
44#ifdef RPC_DEBUG 88#ifdef RPC_DEBUG
45# undef RPC_DEBUG_DATA 89# undef RPC_DEBUG_DATA
46# define RPCDBG_FACILITY RPCDBG_TRANS 90# define RPCDBG_FACILITY RPCDBG_TRANS
@@ -739,6 +783,7 @@ static void xs_tcp_state_change(struct sock *sk)
739 xprt->tcp_reclen = 0; 783 xprt->tcp_reclen = 0;
740 xprt->tcp_copied = 0; 784 xprt->tcp_copied = 0;
741 xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; 785 xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID;
786 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
742 xprt_wake_pending_tasks(xprt, 0); 787 xprt_wake_pending_tasks(xprt, 0);
743 } 788 }
744 spin_unlock_bh(&xprt->transport_lock); 789 spin_unlock_bh(&xprt->transport_lock);
@@ -1066,6 +1111,13 @@ out_clear:
1066 * @task: address of RPC task that manages state of connect request 1111 * @task: address of RPC task that manages state of connect request
1067 * 1112 *
1068 * TCP: If the remote end dropped the connection, delay reconnecting. 1113 * TCP: If the remote end dropped the connection, delay reconnecting.
1114 *
1115 * UDP socket connects are synchronous, but we use a work queue anyway
1116 * to guarantee that even unprivileged user processes can set up a
1117 * socket on a privileged port.
1118 *
1119 * If a UDP socket connect fails, the delay behavior here prevents
1120 * retry floods (hard mounts).
1069 */ 1121 */
1070static void xs_connect(struct rpc_task *task) 1122static void xs_connect(struct rpc_task *task)
1071{ 1123{
@@ -1075,9 +1127,13 @@ static void xs_connect(struct rpc_task *task)
1075 return; 1127 return;
1076 1128
1077 if (xprt->sock != NULL) { 1129 if (xprt->sock != NULL) {
1078 dprintk("RPC: xs_connect delayed xprt %p\n", xprt); 1130 dprintk("RPC: xs_connect delayed xprt %p for %lu seconds\n",
1131 xprt, xprt->reestablish_timeout / HZ);
1079 schedule_delayed_work(&xprt->connect_worker, 1132 schedule_delayed_work(&xprt->connect_worker,
1080 RPC_REESTABLISH_TIMEOUT); 1133 xprt->reestablish_timeout);
1134 xprt->reestablish_timeout <<= 1;
1135 if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
1136 xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
1081 } else { 1137 } else {
1082 dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); 1138 dprintk("RPC: xs_connect scheduled xprt %p\n", xprt);
1083 schedule_work(&xprt->connect_worker); 1139 schedule_work(&xprt->connect_worker);
@@ -1139,6 +1195,10 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
1139 xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); 1195 xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
1140 1196
1141 INIT_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt); 1197 INIT_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt);
1198 xprt->bind_timeout = XS_BIND_TO;
1199 xprt->connect_timeout = XS_UDP_CONN_TO;
1200 xprt->reestablish_timeout = XS_UDP_REEST_TO;
1201 xprt->idle_timeout = XS_IDLE_DISC_TO;
1142 1202
1143 xprt->ops = &xs_udp_ops; 1203 xprt->ops = &xs_udp_ops;
1144 1204
@@ -1176,6 +1236,10 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
1176 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; 1236 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
1177 1237
1178 INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); 1238 INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt);
1239 xprt->bind_timeout = XS_BIND_TO;
1240 xprt->connect_timeout = XS_TCP_CONN_TO;
1241 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
1242 xprt->idle_timeout = XS_IDLE_DISC_TO;
1179 1243
1180 xprt->ops = &xs_tcp_ops; 1244 xprt->ops = &xs_tcp_ops;
1181 1245