aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDamian Lukowski <damian@tvk.rwth-aachen.de>2009-08-25 20:16:34 -0400
committerDavid S. Miller <davem@davemloft.net>2009-09-01 05:45:47 -0400
commit6fa12c85031485dff38ce550c24f10da23b0adaa (patch)
tree73f6cac32302ab99381f38505a5dbe930cb28e44
parentf1ecd5d9e7366609d640ff4040304ea197fbc618 (diff)
Revert Backoff [v3]: Calculate TCP's connection close threshold as a time value.
RFC 1122 specifies two threshold values R1 and R2 for connection timeouts, which may represent a number of allowed retransmissions or a timeout value. Currently linux uses sysctl_tcp_retries{1,2} to specify the thresholds in number of allowed retransmissions. For any desired threshold R2 (by means of time) one can specify tcp_retries2 (by means of number of retransmissions) such that TCP will not time out earlier than R2. This is the case, because the RTO schedule follows a fixed pattern, namely exponential backoff. However, the RTO behaviour is not predictable any more if RTO backoffs can be reverted, as it is the case in the draft "Make TCP more Robust to Long Connectivity Disruptions" (http://tools.ietf.org/html/draft-zimmermann-tcp-lcd). In the worst case TCP would time out a connection after 3.2 seconds, if the initial RTO equaled MIN_RTO and each backoff has been reverted. This patch introduces a function retransmits_timed_out(N), which calculates the timeout of a TCP connection, assuming an initial RTO of MIN_RTO and N unsuccessful, exponentially backed-off retransmissions. Whenever timeout decisions are made by comparing the retransmission counter to some value N, this function can be used, instead. The meaning of tcp_retries2 will be changed, as many more RTO retransmissions can occur than the value indicates. However, it yields a timeout which is similar to the one of an unpatched, exponentially backing off TCP in the same scenario. As no application could rely on an RTO greater than MIN_RTO, there should be no risk of a regression. Signed-off-by: Damian Lukowski <damian@tvk.rwth-aachen.de> Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/tcp.h18
-rw-r--r--net/ipv4/tcp_timer.c11
2 files changed, 25 insertions, 4 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 54f212ce8aaf..e5319495f15e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1252,6 +1252,24 @@ static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_bu
1252#define tcp_for_write_queue_from_safe(skb, tmp, sk) \ 1252#define tcp_for_write_queue_from_safe(skb, tmp, sk) \
1253 skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) 1253 skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
1254 1254
1255static inline bool retransmits_timed_out(const struct sock *sk,
1256 unsigned int boundary)
1257{
1258 int limit, K;
1259 if (!inet_csk(sk)->icsk_retransmits)
1260 return false;
1261
1262 K = ilog2(TCP_RTO_MAX/TCP_RTO_MIN);
1263
1264 if (boundary <= K)
1265 limit = ((2 << boundary) - 1) * TCP_RTO_MIN;
1266 else
1267 limit = ((2 << K) - 1) * TCP_RTO_MIN +
1268 (boundary - K) * TCP_RTO_MAX;
1269
1270 return (tcp_time_stamp - tcp_sk(sk)->retrans_stamp) >= limit;
1271}
1272
1255static inline struct sk_buff *tcp_send_head(struct sock *sk) 1273static inline struct sk_buff *tcp_send_head(struct sock *sk)
1256{ 1274{
1257 return sk->sk_send_head; 1275 return sk->sk_send_head;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 408fa4b7b9ba..cdb2ca7684d4 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -137,13 +137,14 @@ static int tcp_write_timeout(struct sock *sk)
137{ 137{
138 struct inet_connection_sock *icsk = inet_csk(sk); 138 struct inet_connection_sock *icsk = inet_csk(sk);
139 int retry_until; 139 int retry_until;
140 bool do_reset;
140 141
141 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 142 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
142 if (icsk->icsk_retransmits) 143 if (icsk->icsk_retransmits)
143 dst_negative_advice(&sk->sk_dst_cache); 144 dst_negative_advice(&sk->sk_dst_cache);
144 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 145 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
145 } else { 146 } else {
146 if (icsk->icsk_retransmits >= sysctl_tcp_retries1) { 147 if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
147 /* Black hole detection */ 148 /* Black hole detection */
148 tcp_mtu_probing(icsk, sk); 149 tcp_mtu_probing(icsk, sk);
149 150
@@ -155,13 +156,15 @@ static int tcp_write_timeout(struct sock *sk)
155 const int alive = (icsk->icsk_rto < TCP_RTO_MAX); 156 const int alive = (icsk->icsk_rto < TCP_RTO_MAX);
156 157
157 retry_until = tcp_orphan_retries(sk, alive); 158 retry_until = tcp_orphan_retries(sk, alive);
159 do_reset = alive ||
160 !retransmits_timed_out(sk, retry_until);
158 161
159 if (tcp_out_of_resources(sk, alive || icsk->icsk_retransmits < retry_until)) 162 if (tcp_out_of_resources(sk, do_reset))
160 return 1; 163 return 1;
161 } 164 }
162 } 165 }
163 166
164 if (icsk->icsk_retransmits >= retry_until) { 167 if (retransmits_timed_out(sk, retry_until)) {
165 /* Has it gone just too far? */ 168 /* Has it gone just too far? */
166 tcp_write_err(sk); 169 tcp_write_err(sk);
167 return 1; 170 return 1;
@@ -385,7 +388,7 @@ void tcp_retransmit_timer(struct sock *sk)
385out_reset_timer: 388out_reset_timer:
386 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); 389 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
387 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); 390 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
388 if (icsk->icsk_retransmits > sysctl_tcp_retries1) 391 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
389 __sk_dst_reset(sk); 392 __sk_dst_reset(sk);
390 393
391out:; 394out:;