diff options
author | Chuck Lever <cel@netapp.com> | 2005-08-25 19:25:55 -0400 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2005-09-23 12:38:53 -0400 |
commit | 03bf4b707eee06706c9db343dd5c905b7ee47ed2 (patch) | |
tree | 54f89b578758e2bf2650b647ae1c7100c882a757 | |
parent | 3167e12c0c424f3c323944701615343022d86418 (diff) |
[PATCH] RPC: parametrize various transport connect timeouts
Each transport implementation can now set unique bind, connect,
reestablishment, and idle timeout values. These are variables,
allowing the values to be modified dynamically. This permits
exponential backoff of any of these values, for instance.
As an example, we implement exponential backoff for the connection
reestablishment timeout.
Test-plan:
Destructive testing (unplugging the network temporarily). Connectathon
with UDP and TCP.
Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r-- | fs/nfs/inode.c | 10 | ||||
-rw-r--r-- | include/linux/nfs_fs.h | 4 | ||||
-rw-r--r-- | include/linux/sunrpc/xprt.h | 29 | ||||
-rw-r--r-- | net/sunrpc/clnt.c | 2 | ||||
-rw-r--r-- | net/sunrpc/xprt.c | 5 | ||||
-rw-r--r-- | net/sunrpc/xprtsock.c | 68 |
6 files changed, 84 insertions, 34 deletions
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b6a1ca508e6..062911e7ceb 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -369,8 +369,8 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned | |||
369 | case IPPROTO_TCP: | 369 | case IPPROTO_TCP: |
370 | if (!to->to_initval) | 370 | if (!to->to_initval) |
371 | to->to_initval = 60 * HZ; | 371 | to->to_initval = 60 * HZ; |
372 | if (to->to_initval > RPC_MAX_TCP_TIMEOUT) | 372 | if (to->to_initval > NFS_MAX_TCP_TIMEOUT) |
373 | to->to_initval = RPC_MAX_TCP_TIMEOUT; | 373 | to->to_initval = NFS_MAX_TCP_TIMEOUT; |
374 | to->to_increment = to->to_initval; | 374 | to->to_increment = to->to_initval; |
375 | to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); | 375 | to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); |
376 | to->to_exponential = 0; | 376 | to->to_exponential = 0; |
@@ -379,9 +379,9 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned | |||
379 | default: | 379 | default: |
380 | if (!to->to_initval) | 380 | if (!to->to_initval) |
381 | to->to_initval = 11 * HZ / 10; | 381 | to->to_initval = 11 * HZ / 10; |
382 | if (to->to_initval > RPC_MAX_UDP_TIMEOUT) | 382 | if (to->to_initval > NFS_MAX_UDP_TIMEOUT) |
383 | to->to_initval = RPC_MAX_UDP_TIMEOUT; | 383 | to->to_initval = NFS_MAX_UDP_TIMEOUT; |
384 | to->to_maxval = RPC_MAX_UDP_TIMEOUT; | 384 | to->to_maxval = NFS_MAX_UDP_TIMEOUT; |
385 | to->to_exponential = 1; | 385 | to->to_exponential = 1; |
386 | break; | 386 | break; |
387 | } | 387 | } |
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 9a6047ff1b2..7bac2785c6e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -41,6 +41,10 @@ | |||
41 | #define NFS_MAX_FILE_IO_BUFFER_SIZE 32768 | 41 | #define NFS_MAX_FILE_IO_BUFFER_SIZE 32768 |
42 | #define NFS_DEF_FILE_IO_BUFFER_SIZE 4096 | 42 | #define NFS_DEF_FILE_IO_BUFFER_SIZE 4096 |
43 | 43 | ||
44 | /* Default timeout values */ | ||
45 | #define NFS_MAX_UDP_TIMEOUT (60*HZ) | ||
46 | #define NFS_MAX_TCP_TIMEOUT (600*HZ) | ||
47 | |||
44 | /* | 48 | /* |
45 | * superblock magic number for NFS | 49 | * superblock magic number for NFS |
46 | */ | 50 | */ |
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 9d9266cf8a3..2543adf1855 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h | |||
@@ -22,28 +22,6 @@ extern unsigned int xprt_tcp_slot_table_entries; | |||
22 | #define RPC_DEF_SLOT_TABLE (16U) | 22 | #define RPC_DEF_SLOT_TABLE (16U) |
23 | #define RPC_MAX_SLOT_TABLE (128U) | 23 | #define RPC_MAX_SLOT_TABLE (128U) |
24 | 24 | ||
25 | /* Default timeout values */ | ||
26 | #define RPC_MAX_UDP_TIMEOUT (60*HZ) | ||
27 | #define RPC_MAX_TCP_TIMEOUT (600*HZ) | ||
28 | |||
29 | /* | ||
30 | * Wait duration for an RPC TCP connection to be established. Solaris | ||
31 | * NFS over TCP uses 60 seconds, for example, which is in line with how | ||
32 | * long a server takes to reboot. | ||
33 | */ | ||
34 | #define RPC_CONNECT_TIMEOUT (60*HZ) | ||
35 | |||
36 | /* | ||
37 | * Delay an arbitrary number of seconds before attempting to reconnect | ||
38 | * after an error. | ||
39 | */ | ||
40 | #define RPC_REESTABLISH_TIMEOUT (15*HZ) | ||
41 | |||
42 | /* | ||
43 | * RPC transport idle timeout. | ||
44 | */ | ||
45 | #define RPC_IDLE_DISCONNECT_TIMEOUT (5*60*HZ) | ||
46 | |||
47 | /* | 25 | /* |
48 | * RPC call and reply header size as number of 32bit words (verifier | 26 | * RPC call and reply header size as number of 32bit words (verifier |
49 | * size computed separately) | 27 | * size computed separately) |
@@ -182,14 +160,19 @@ struct rpc_xprt { | |||
182 | /* | 160 | /* |
183 | * Connection of transports | 161 | * Connection of transports |
184 | */ | 162 | */ |
163 | unsigned long connect_timeout, | ||
164 | bind_timeout, | ||
165 | reestablish_timeout; | ||
185 | struct work_struct connect_worker; | 166 | struct work_struct connect_worker; |
186 | unsigned short port; | 167 | unsigned short port; |
168 | |||
187 | /* | 169 | /* |
188 | * Disconnection of idle transports | 170 | * Disconnection of idle transports |
189 | */ | 171 | */ |
190 | struct work_struct task_cleanup; | 172 | struct work_struct task_cleanup; |
191 | struct timer_list timer; | 173 | struct timer_list timer; |
192 | unsigned long last_used; | 174 | unsigned long last_used, |
175 | idle_timeout; | ||
193 | 176 | ||
194 | /* | 177 | /* |
195 | * Send stuff | 178 | * Send stuff |
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index cc1b773a79d..24b44e73f39 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -740,7 +740,7 @@ call_bind(struct rpc_task *task) | |||
740 | task->tk_action = call_connect; | 740 | task->tk_action = call_connect; |
741 | if (!clnt->cl_port) { | 741 | if (!clnt->cl_port) { |
742 | task->tk_action = call_bind_status; | 742 | task->tk_action = call_bind_status; |
743 | task->tk_timeout = RPC_CONNECT_TIMEOUT; | 743 | task->tk_timeout = task->tk_xprt->bind_timeout; |
744 | rpc_getport(task, clnt); | 744 | rpc_getport(task, clnt); |
745 | } | 745 | } |
746 | } | 746 | } |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 0458319a1bd..215be0d0ef6 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
@@ -551,7 +551,7 @@ void xprt_connect(struct rpc_task *task) | |||
551 | if (task->tk_rqstp) | 551 | if (task->tk_rqstp) |
552 | task->tk_rqstp->rq_bytes_sent = 0; | 552 | task->tk_rqstp->rq_bytes_sent = 0; |
553 | 553 | ||
554 | task->tk_timeout = RPC_CONNECT_TIMEOUT; | 554 | task->tk_timeout = xprt->connect_timeout; |
555 | rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); | 555 | rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); |
556 | xprt->ops->connect(task); | 556 | xprt->ops->connect(task); |
557 | } | 557 | } |
@@ -763,7 +763,6 @@ void xprt_transmit(struct rpc_task *task) | |||
763 | 763 | ||
764 | switch (status) { | 764 | switch (status) { |
765 | case -ECONNREFUSED: | 765 | case -ECONNREFUSED: |
766 | task->tk_timeout = RPC_REESTABLISH_TIMEOUT; | ||
767 | rpc_sleep_on(&xprt->sending, task, NULL, NULL); | 766 | rpc_sleep_on(&xprt->sending, task, NULL, NULL); |
768 | case -EAGAIN: | 767 | case -EAGAIN: |
769 | case -ENOTCONN: | 768 | case -ENOTCONN: |
@@ -857,7 +856,7 @@ void xprt_release(struct rpc_task *task) | |||
857 | xprt->last_used = jiffies; | 856 | xprt->last_used = jiffies; |
858 | if (list_empty(&xprt->recv) && !xprt->shutdown) | 857 | if (list_empty(&xprt->recv) && !xprt->shutdown) |
859 | mod_timer(&xprt->timer, | 858 | mod_timer(&xprt->timer, |
860 | xprt->last_used + RPC_IDLE_DISCONNECT_TIMEOUT); | 859 | xprt->last_used + xprt->idle_timeout); |
861 | spin_unlock_bh(&xprt->transport_lock); | 860 | spin_unlock_bh(&xprt->transport_lock); |
862 | task->tk_rqstp = NULL; | 861 | task->tk_rqstp = NULL; |
863 | memset(req, 0, sizeof(*req)); /* mark unused */ | 862 | memset(req, 0, sizeof(*req)); /* mark unused */ |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 88ac71fcd33..06c2d95484e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -41,6 +41,50 @@ | |||
41 | */ | 41 | */ |
42 | #define XS_SENDMSG_RETRY (10U) | 42 | #define XS_SENDMSG_RETRY (10U) |
43 | 43 | ||
44 | /* | ||
45 | * Time out for an RPC UDP socket connect. UDP socket connects are | ||
46 | * synchronous, but we set a timeout anyway in case of resource | ||
47 | * exhaustion on the local host. | ||
48 | */ | ||
49 | #define XS_UDP_CONN_TO (5U * HZ) | ||
50 | |||
51 | /* | ||
52 | * Wait duration for an RPC TCP connection to be established. Solaris | ||
53 | * NFS over TCP uses 60 seconds, for example, which is in line with how | ||
54 | * long a server takes to reboot. | ||
55 | */ | ||
56 | #define XS_TCP_CONN_TO (60U * HZ) | ||
57 | |||
58 | /* | ||
59 | * Wait duration for a reply from the RPC portmapper. | ||
60 | */ | ||
61 | #define XS_BIND_TO (60U * HZ) | ||
62 | |||
63 | /* | ||
64 | * Delay if a UDP socket connect error occurs. This is most likely some | ||
65 | * kind of resource problem on the local host. | ||
66 | */ | ||
67 | #define XS_UDP_REEST_TO (2U * HZ) | ||
68 | |||
69 | /* | ||
70 | * The reestablish timeout allows clients to delay for a bit before attempting | ||
71 | * to reconnect to a server that just dropped our connection. | ||
72 | * | ||
73 | * We implement an exponential backoff when trying to reestablish a TCP | ||
74 | * transport connection with the server. Some servers like to drop a TCP | ||
75 | * connection when they are overworked, so we start with a short timeout and | ||
76 | * increase over time if the server is down or not responding. | ||
77 | */ | ||
78 | #define XS_TCP_INIT_REEST_TO (3U * HZ) | ||
79 | #define XS_TCP_MAX_REEST_TO (5U * 60 * HZ) | ||
80 | |||
81 | /* | ||
82 | * TCP idle timeout; client drops the transport socket if it is idle | ||
83 | * for this long. Note that we also timeout UDP sockets to prevent | ||
84 | * holding port numbers when there is no RPC traffic. | ||
85 | */ | ||
86 | #define XS_IDLE_DISC_TO (5U * 60 * HZ) | ||
87 | |||
44 | #ifdef RPC_DEBUG | 88 | #ifdef RPC_DEBUG |
45 | # undef RPC_DEBUG_DATA | 89 | # undef RPC_DEBUG_DATA |
46 | # define RPCDBG_FACILITY RPCDBG_TRANS | 90 | # define RPCDBG_FACILITY RPCDBG_TRANS |
@@ -739,6 +783,7 @@ static void xs_tcp_state_change(struct sock *sk) | |||
739 | xprt->tcp_reclen = 0; | 783 | xprt->tcp_reclen = 0; |
740 | xprt->tcp_copied = 0; | 784 | xprt->tcp_copied = 0; |
741 | xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; | 785 | xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; |
786 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; | ||
742 | xprt_wake_pending_tasks(xprt, 0); | 787 | xprt_wake_pending_tasks(xprt, 0); |
743 | } | 788 | } |
744 | spin_unlock_bh(&xprt->transport_lock); | 789 | spin_unlock_bh(&xprt->transport_lock); |
@@ -1066,6 +1111,13 @@ out_clear: | |||
1066 | * @task: address of RPC task that manages state of connect request | 1111 | * @task: address of RPC task that manages state of connect request |
1067 | * | 1112 | * |
1068 | * TCP: If the remote end dropped the connection, delay reconnecting. | 1113 | * TCP: If the remote end dropped the connection, delay reconnecting. |
1114 | * | ||
1115 | * UDP socket connects are synchronous, but we use a work queue anyway | ||
1116 | * to guarantee that even unprivileged user processes can set up a | ||
1117 | * socket on a privileged port. | ||
1118 | * | ||
1119 | * If a UDP socket connect fails, the delay behavior here prevents | ||
1120 | * retry floods (hard mounts). | ||
1069 | */ | 1121 | */ |
1070 | static void xs_connect(struct rpc_task *task) | 1122 | static void xs_connect(struct rpc_task *task) |
1071 | { | 1123 | { |
@@ -1075,9 +1127,13 @@ static void xs_connect(struct rpc_task *task) | |||
1075 | return; | 1127 | return; |
1076 | 1128 | ||
1077 | if (xprt->sock != NULL) { | 1129 | if (xprt->sock != NULL) { |
1078 | dprintk("RPC: xs_connect delayed xprt %p\n", xprt); | 1130 | dprintk("RPC: xs_connect delayed xprt %p for %lu seconds\n", |
1131 | xprt, xprt->reestablish_timeout / HZ); | ||
1079 | schedule_delayed_work(&xprt->connect_worker, | 1132 | schedule_delayed_work(&xprt->connect_worker, |
1080 | RPC_REESTABLISH_TIMEOUT); | 1133 | xprt->reestablish_timeout); |
1134 | xprt->reestablish_timeout <<= 1; | ||
1135 | if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) | ||
1136 | xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; | ||
1081 | } else { | 1137 | } else { |
1082 | dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); | 1138 | dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); |
1083 | schedule_work(&xprt->connect_worker); | 1139 | schedule_work(&xprt->connect_worker); |
@@ -1139,6 +1195,10 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) | |||
1139 | xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); | 1195 | xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); |
1140 | 1196 | ||
1141 | INIT_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt); | 1197 | INIT_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt); |
1198 | xprt->bind_timeout = XS_BIND_TO; | ||
1199 | xprt->connect_timeout = XS_UDP_CONN_TO; | ||
1200 | xprt->reestablish_timeout = XS_UDP_REEST_TO; | ||
1201 | xprt->idle_timeout = XS_IDLE_DISC_TO; | ||
1142 | 1202 | ||
1143 | xprt->ops = &xs_udp_ops; | 1203 | xprt->ops = &xs_udp_ops; |
1144 | 1204 | ||
@@ -1176,6 +1236,10 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) | |||
1176 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; | 1236 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; |
1177 | 1237 | ||
1178 | INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); | 1238 | INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); |
1239 | xprt->bind_timeout = XS_BIND_TO; | ||
1240 | xprt->connect_timeout = XS_TCP_CONN_TO; | ||
1241 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; | ||
1242 | xprt->idle_timeout = XS_IDLE_DISC_TO; | ||
1179 | 1243 | ||
1180 | xprt->ops = &xs_tcp_ops; | 1244 | xprt->ops = &xs_tcp_ops; |
1181 | 1245 | ||