aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJerry Chu <hkchu@google.com>2010-08-27 15:13:28 -0400
committerDavid S. Miller <davem@davemloft.net>2010-08-30 16:23:33 -0400
commitdca43c75e7e545694a9dd6288553f55c53e2a3a3 (patch)
tree4df6b0b295ecd571fa95004b486d9af1636d6a30
parent409456b10f87b28303643fec37543103f9ada00c (diff)
tcp: Add TCP_USER_TIMEOUT socket option.
This patch provides a "user timeout" support as described in RFC793. The socket option is also needed for the the local half of RFC5482 "TCP User Timeout Option". TCP_USER_TIMEOUT is a TCP level socket option that takes an unsigned int, when > 0, to specify the maximum amount of time in ms that transmitted data may remain unacknowledged before TCP will forcefully close the corresponding connection and return ETIMEDOUT to the application. If 0 is given, TCP will continue to use the system default. Increasing the user timeouts allows a TCP connection to survive extended periods without end-to-end connectivity. Decreasing the user timeouts allows applications to "fail fast" if so desired. Otherwise it may take upto 20 minutes with the current system defaults in a normal WAN environment. The socket option can be made during any state of a TCP connection, but is only effective during the synchronized states of a connection (ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, or LAST-ACK). Moreover, when used with the TCP keepalive (SO_KEEPALIVE) option, TCP_USER_TIMEOUT will overtake keepalive to determine when to close a connection due to keepalive failure. The option does not change in anyway when TCP retransmits a packet, nor when a keepalive probe will be sent. This option, like many others, will be inherited by an acceptor from its listener. Signed-off-by: H.K. Jerry Chu <hkchu@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/tcp.h1
-rw-r--r--include/net/inet_connection_sock.h1
-rw-r--r--net/ipv4/tcp.c11
-rw-r--r--net/ipv4/tcp_timer.c40
4 files changed, 37 insertions, 16 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index a778ee02459..e64f4c67d0e 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -105,6 +105,7 @@ enum {
105#define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ 105#define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */
106#define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ 106#define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/
107#define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ 107#define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */
108#define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */
108 109
109/* for TCP_INFO socket option */ 110/* for TCP_INFO socket option */
110#define TCPI_OPT_TIMESTAMPS 1 111#define TCPI_OPT_TIMESTAMPS 1
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index b6d3b55da19..e4f494b42e0 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -125,6 +125,7 @@ struct inet_connection_sock {
125 int probe_size; 125 int probe_size;
126 } icsk_mtup; 126 } icsk_mtup;
127 u32 icsk_ca_priv[16]; 127 u32 icsk_ca_priv[16];
128 u32 icsk_user_timeout;
128#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) 129#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32))
129}; 130};
130 131
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 176e11aaea7..cf325452875 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2391,7 +2391,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2391 err = tp->af_specific->md5_parse(sk, optval, optlen); 2391 err = tp->af_specific->md5_parse(sk, optval, optlen);
2392 break; 2392 break;
2393#endif 2393#endif
2394 2394 case TCP_USER_TIMEOUT:
2395 /* Cap the max timeout in ms TCP will retry/retrans
2396 * before giving up and aborting (ETIMEDOUT) a connection.
2397 */
2398 icsk->icsk_user_timeout = msecs_to_jiffies(val);
2399 break;
2395 default: 2400 default:
2396 err = -ENOPROTOOPT; 2401 err = -ENOPROTOOPT;
2397 break; 2402 break;
@@ -2610,6 +2615,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2610 case TCP_THIN_DUPACK: 2615 case TCP_THIN_DUPACK:
2611 val = tp->thin_dupack; 2616 val = tp->thin_dupack;
2612 break; 2617 break;
2618
2619 case TCP_USER_TIMEOUT:
2620 val = jiffies_to_msecs(icsk->icsk_user_timeout);
2621 break;
2613 default: 2622 default:
2614 return -ENOPROTOOPT; 2623 return -ENOPROTOOPT;
2615 } 2624 }
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 808bb920c9f..11569deccbe 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -138,10 +138,10 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
138 * retransmissions with an initial RTO of TCP_RTO_MIN. 138 * retransmissions with an initial RTO of TCP_RTO_MIN.
139 */ 139 */
140static bool retransmits_timed_out(struct sock *sk, 140static bool retransmits_timed_out(struct sock *sk,
141 unsigned int boundary) 141 unsigned int boundary,
142 unsigned int timeout)
142{ 143{
143 unsigned int timeout, linear_backoff_thresh; 144 unsigned int linear_backoff_thresh, start_ts;
144 unsigned int start_ts;
145 145
146 if (!inet_csk(sk)->icsk_retransmits) 146 if (!inet_csk(sk)->icsk_retransmits)
147 return false; 147 return false;
@@ -151,14 +151,15 @@ static bool retransmits_timed_out(struct sock *sk,
151 else 151 else
152 start_ts = tcp_sk(sk)->retrans_stamp; 152 start_ts = tcp_sk(sk)->retrans_stamp;
153 153
154 linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); 154 if (likely(timeout == 0)) {
155 155 linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN);
156 if (boundary <= linear_backoff_thresh)
157 timeout = ((2 << boundary) - 1) * TCP_RTO_MIN;
158 else
159 timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN +
160 (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
161 156
157 if (boundary <= linear_backoff_thresh)
158 timeout = ((2 << boundary) - 1) * TCP_RTO_MIN;
159 else
160 timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN +
161 (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
162 }
162 return (tcp_time_stamp - start_ts) >= timeout; 163 return (tcp_time_stamp - start_ts) >= timeout;
163} 164}
164 165
@@ -174,7 +175,7 @@ static int tcp_write_timeout(struct sock *sk)
174 dst_negative_advice(sk); 175 dst_negative_advice(sk);
175 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 176 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
176 } else { 177 } else {
177 if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { 178 if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0)) {
178 /* Black hole detection */ 179 /* Black hole detection */
179 tcp_mtu_probing(icsk, sk); 180 tcp_mtu_probing(icsk, sk);
180 181
@@ -187,14 +188,16 @@ static int tcp_write_timeout(struct sock *sk)
187 188
188 retry_until = tcp_orphan_retries(sk, alive); 189 retry_until = tcp_orphan_retries(sk, alive);
189 do_reset = alive || 190 do_reset = alive ||
190 !retransmits_timed_out(sk, retry_until); 191 !retransmits_timed_out(sk, retry_until, 0);
191 192
192 if (tcp_out_of_resources(sk, do_reset)) 193 if (tcp_out_of_resources(sk, do_reset))
193 return 1; 194 return 1;
194 } 195 }
195 } 196 }
196 197
197 if (retransmits_timed_out(sk, retry_until)) { 198 if (retransmits_timed_out(sk, retry_until,
199 (1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV) ? 0 :
200 icsk->icsk_user_timeout)) {
198 /* Has it gone just too far? */ 201 /* Has it gone just too far? */
199 tcp_write_err(sk); 202 tcp_write_err(sk);
200 return 1; 203 return 1;
@@ -436,7 +439,7 @@ out_reset_timer:
436 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); 439 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
437 } 440 }
438 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); 441 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
439 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) 442 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0))
440 __sk_dst_reset(sk); 443 __sk_dst_reset(sk);
441 444
442out:; 445out:;
@@ -556,7 +559,14 @@ static void tcp_keepalive_timer (unsigned long data)
556 elapsed = keepalive_time_elapsed(tp); 559 elapsed = keepalive_time_elapsed(tp);
557 560
558 if (elapsed >= keepalive_time_when(tp)) { 561 if (elapsed >= keepalive_time_when(tp)) {
559 if (icsk->icsk_probes_out >= keepalive_probes(tp)) { 562 /* If the TCP_USER_TIMEOUT option is enabled, use that
563 * to determine when to timeout instead.
564 */
565 if ((icsk->icsk_user_timeout != 0 &&
566 elapsed >= icsk->icsk_user_timeout &&
567 icsk->icsk_probes_out > 0) ||
568 (icsk->icsk_user_timeout == 0 &&
569 icsk->icsk_probes_out >= keepalive_probes(tp))) {
560 tcp_send_active_reset(sk, GFP_ATOMIC); 570 tcp_send_active_reset(sk, GFP_ATOMIC);
561 tcp_write_err(sk); 571 tcp_write_err(sk);
562 goto out; 572 goto out;