diff options
author | Damian Lukowski <damian@tvk.rwth-aachen.de> | 2009-08-25 20:16:31 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-09-01 05:45:42 -0400 |
commit | f1ecd5d9e7366609d640ff4040304ea197fbc618 (patch) | |
tree | ef4901d52ad0d2555094998f02d08788e197c20a /net/ipv4 | |
parent | 4d1a2d9ec1c17df077ed09a0d135bccf5637a3b7 (diff) |
Revert Backoff [v3]: Revert RTO on ICMP destination unreachable
Here, an ICMP host/network unreachable message, whose payload fits to
TCP's SND.UNA, is taken as an indication that the RTO retransmission has
not been lost due to congestion, but because of a route failure
somewhere along the path.
With true congestion, a router won't trigger such a message and the
patched TCP will operate as standard TCP.
This patch reverts one RTO backoff, if an ICMP host/network unreachable
message, whose payload fits to TCP's SND.UNA, arrives.
Based on the new RTO, the retransmission timer is reset to reflect the
remaining time, or - if the revert clocked out the timer - a retransmission
is sent out immediately.
Backoffs are only reverted, if TCP is in RTO loss recovery, i.e. if
there have been retransmissions and reversible backoffs, already.
Changes from v2:
1) Renaming of skb in tcp_v4_err() moved to another patch.
2) Reintroduced tcp_bound_rto() and __tcp_set_rto().
3) Fixed code comments.
Signed-off-by: Damian Lukowski <damian@tvk.rwth-aachen.de>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/tcp_input.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 37 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 2 |
3 files changed, 40 insertions, 4 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2bdb0da237e..af6d6fa00db 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -685,7 +685,7 @@ static inline void tcp_set_rto(struct sock *sk) | |||
685 | * is invisible. Actually, Linux-2.4 also generates erratic | 685 | * is invisible. Actually, Linux-2.4 also generates erratic |
686 | * ACKs in some circumstances. | 686 | * ACKs in some circumstances. |
687 | */ | 687 | */ |
688 | inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar; | 688 | inet_csk(sk)->icsk_rto = __tcp_set_rto(tp); |
689 | 689 | ||
690 | /* 2. Fixups made earlier cannot be right. | 690 | /* 2. Fixups made earlier cannot be right. |
691 | * If we do not estimate RTO correctly without them, | 691 | * If we do not estimate RTO correctly without them, |
@@ -696,8 +696,7 @@ static inline void tcp_set_rto(struct sock *sk) | |||
696 | /* NOTE: clamping at TCP_RTO_MIN is not required, current algo | 696 | /* NOTE: clamping at TCP_RTO_MIN is not required, current algo |
697 | * guarantees that rto is higher. | 697 | * guarantees that rto is higher. |
698 | */ | 698 | */ |
699 | if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) | 699 | tcp_bound_rto(sk); |
700 | inet_csk(sk)->icsk_rto = TCP_RTO_MAX; | ||
701 | } | 700 | } |
702 | 701 | ||
703 | /* Save metrics learned by this TCP session. | 702 | /* Save metrics learned by this TCP session. |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6ca1bc8c302..6755e29a6dd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -332,12 +332,15 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
332 | { | 332 | { |
333 | struct iphdr *iph = (struct iphdr *)icmp_skb->data; | 333 | struct iphdr *iph = (struct iphdr *)icmp_skb->data; |
334 | struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); | 334 | struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); |
335 | struct inet_connection_sock *icsk; | ||
335 | struct tcp_sock *tp; | 336 | struct tcp_sock *tp; |
336 | struct inet_sock *inet; | 337 | struct inet_sock *inet; |
337 | const int type = icmp_hdr(icmp_skb)->type; | 338 | const int type = icmp_hdr(icmp_skb)->type; |
338 | const int code = icmp_hdr(icmp_skb)->code; | 339 | const int code = icmp_hdr(icmp_skb)->code; |
339 | struct sock *sk; | 340 | struct sock *sk; |
341 | struct sk_buff *skb; | ||
340 | __u32 seq; | 342 | __u32 seq; |
343 | __u32 remaining; | ||
341 | int err; | 344 | int err; |
342 | struct net *net = dev_net(icmp_skb->dev); | 345 | struct net *net = dev_net(icmp_skb->dev); |
343 | 346 | ||
@@ -367,6 +370,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
367 | if (sk->sk_state == TCP_CLOSE) | 370 | if (sk->sk_state == TCP_CLOSE) |
368 | goto out; | 371 | goto out; |
369 | 372 | ||
373 | icsk = inet_csk(sk); | ||
370 | tp = tcp_sk(sk); | 374 | tp = tcp_sk(sk); |
371 | seq = ntohl(th->seq); | 375 | seq = ntohl(th->seq); |
372 | if (sk->sk_state != TCP_LISTEN && | 376 | if (sk->sk_state != TCP_LISTEN && |
@@ -393,6 +397,39 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
393 | } | 397 | } |
394 | 398 | ||
395 | err = icmp_err_convert[code].errno; | 399 | err = icmp_err_convert[code].errno; |
400 | /* check if icmp_skb allows revert of backoff | ||
401 | * (see draft-zimmermann-tcp-lcd) */ | ||
402 | if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) | ||
403 | break; | ||
404 | if (seq != tp->snd_una || !icsk->icsk_retransmits || | ||
405 | !icsk->icsk_backoff) | ||
406 | break; | ||
407 | |||
408 | icsk->icsk_backoff--; | ||
409 | inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << | ||
410 | icsk->icsk_backoff; | ||
411 | tcp_bound_rto(sk); | ||
412 | |||
413 | skb = tcp_write_queue_head(sk); | ||
414 | BUG_ON(!skb); | ||
415 | |||
416 | remaining = icsk->icsk_rto - min(icsk->icsk_rto, | ||
417 | tcp_time_stamp - TCP_SKB_CB(skb)->when); | ||
418 | |||
419 | if (remaining) { | ||
420 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
421 | remaining, TCP_RTO_MAX); | ||
422 | } else if (sock_owned_by_user(sk)) { | ||
423 | /* RTO revert clocked out retransmission, | ||
424 | * but socket is locked. Will defer. */ | ||
425 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
426 | HZ/20, TCP_RTO_MAX); | ||
427 | } else { | ||
428 | /* RTO revert clocked out retransmission. | ||
429 | * Will retransmit now */ | ||
430 | tcp_retransmit_timer(sk); | ||
431 | } | ||
432 | |||
396 | break; | 433 | break; |
397 | case ICMP_TIME_EXCEEDED: | 434 | case ICMP_TIME_EXCEEDED: |
398 | err = EHOSTUNREACH; | 435 | err = EHOSTUNREACH; |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index c520fb6e06d..408fa4b7b9b 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -279,7 +279,7 @@ static void tcp_probe_timer(struct sock *sk) | |||
279 | * The TCP retransmit timer. | 279 | * The TCP retransmit timer. |
280 | */ | 280 | */ |
281 | 281 | ||
282 | static void tcp_retransmit_timer(struct sock *sk) | 282 | void tcp_retransmit_timer(struct sock *sk) |
283 | { | 283 | { |
284 | struct tcp_sock *tp = tcp_sk(sk); | 284 | struct tcp_sock *tp = tcp_sk(sk); |
285 | struct inet_connection_sock *icsk = inet_csk(sk); | 285 | struct inet_connection_sock *icsk = inet_csk(sk); |