diff options
author | Jerry Chu <hkchu@google.com> | 2011-06-08 07:08:38 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-06-08 20:05:30 -0400 |
commit | 9ad7c049f0f79c418e293b1b68cf10d68f54fcdb (patch) | |
tree | 4b930b213f7a050afe9fae78432c1a4a6dce08a5 /net/ipv4/tcp_input.c | |
parent | aee80b54b235d34d87b25dfbe32f0f0ffee1b544 (diff) |
tcp: RFC2988bis + taking RTT sample from 3WHS for the passive open side
This patch lowers the default initRTO from 3secs to 1sec per
RFC2988bis. It falls back to 3secs if the SYN or SYN-ACK packet
has been retransmitted, AND the TCP timestamp option is not on.
It also adds support to take RTT sample during 3WHS on the passive
open side, just like its active open counterpart, and uses it, if
valid, to seed the initRTO for the data transmission phase.
The patch also resets ssthresh to its initial default at the
beginning of the data transmission phase, and reduces cwnd to 1 if
there has been MORE THAN ONE retransmission during 3WHS per RFC5681.
Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 46 |
1 files changed, 25 insertions, 21 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bef9f04c22ba..ea0d2183df4b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -880,6 +880,11 @@ static void tcp_init_metrics(struct sock *sk) | |||
880 | tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); | 880 | tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); |
881 | if (tp->snd_ssthresh > tp->snd_cwnd_clamp) | 881 | if (tp->snd_ssthresh > tp->snd_cwnd_clamp) |
882 | tp->snd_ssthresh = tp->snd_cwnd_clamp; | 882 | tp->snd_ssthresh = tp->snd_cwnd_clamp; |
883 | } else { | ||
884 | /* ssthresh may have been reduced unnecessarily during. | ||
885 | * 3WHS. Restore it back to its initial default. | ||
886 | */ | ||
887 | tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; | ||
883 | } | 888 | } |
884 | if (dst_metric(dst, RTAX_REORDERING) && | 889 | if (dst_metric(dst, RTAX_REORDERING) && |
885 | tp->reordering != dst_metric(dst, RTAX_REORDERING)) { | 890 | tp->reordering != dst_metric(dst, RTAX_REORDERING)) { |
@@ -887,10 +892,7 @@ static void tcp_init_metrics(struct sock *sk) | |||
887 | tp->reordering = dst_metric(dst, RTAX_REORDERING); | 892 | tp->reordering = dst_metric(dst, RTAX_REORDERING); |
888 | } | 893 | } |
889 | 894 | ||
890 | if (dst_metric(dst, RTAX_RTT) == 0) | 895 | if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0) |
891 | goto reset; | ||
892 | |||
893 | if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3)) | ||
894 | goto reset; | 896 | goto reset; |
895 | 897 | ||
896 | /* Initial rtt is determined from SYN,SYN-ACK. | 898 | /* Initial rtt is determined from SYN,SYN-ACK. |
@@ -916,19 +918,26 @@ static void tcp_init_metrics(struct sock *sk) | |||
916 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); | 918 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); |
917 | } | 919 | } |
918 | tcp_set_rto(sk); | 920 | tcp_set_rto(sk); |
919 | if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) { | ||
920 | reset: | 921 | reset: |
921 | /* Play conservative. If timestamps are not | 922 | if (tp->srtt == 0) { |
922 | * supported, TCP will fail to recalculate correct | 923 | /* RFC2988bis: We've failed to get a valid RTT sample from |
923 | * rtt, if initial rto is too small. FORGET ALL AND RESET! | 924 | * 3WHS. This is most likely due to retransmission, |
925 | * including spurious one. Reset the RTO back to 3secs | ||
926 | * from the more aggressive 1sec to avoid more spurious | ||
927 | * retransmission. | ||
924 | */ | 928 | */ |
925 | if (!tp->rx_opt.saw_tstamp && tp->srtt) { | 929 | tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; |
926 | tp->srtt = 0; | 930 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; |
927 | tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; | ||
928 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; | ||
929 | } | ||
930 | } | 931 | } |
931 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); | 932 | /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been |
933 | * retransmitted. In light of RFC2988bis' more aggressive 1sec | ||
934 | * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK | ||
935 | * retransmission has occurred. | ||
936 | */ | ||
937 | if (tp->total_retrans > 1) | ||
938 | tp->snd_cwnd = 1; | ||
939 | else | ||
940 | tp->snd_cwnd = tcp_init_cwnd(tp, dst); | ||
932 | tp->snd_cwnd_stamp = tcp_time_stamp; | 941 | tp->snd_cwnd_stamp = tcp_time_stamp; |
933 | } | 942 | } |
934 | 943 | ||
@@ -3112,12 +3121,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) | |||
3112 | tcp_xmit_retransmit_queue(sk); | 3121 | tcp_xmit_retransmit_queue(sk); |
3113 | } | 3122 | } |
3114 | 3123 | ||
3115 | static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) | 3124 | void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) |
3116 | { | 3125 | { |
3117 | tcp_rtt_estimator(sk, seq_rtt); | 3126 | tcp_rtt_estimator(sk, seq_rtt); |
3118 | tcp_set_rto(sk); | 3127 | tcp_set_rto(sk); |
3119 | inet_csk(sk)->icsk_backoff = 0; | 3128 | inet_csk(sk)->icsk_backoff = 0; |
3120 | } | 3129 | } |
3130 | EXPORT_SYMBOL(tcp_valid_rtt_meas); | ||
3121 | 3131 | ||
3122 | /* Read draft-ietf-tcplw-high-performance before mucking | 3132 | /* Read draft-ietf-tcplw-high-performance before mucking |
3123 | * with this code. (Supersedes RFC1323) | 3133 | * with this code. (Supersedes RFC1323) |
@@ -5806,12 +5816,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5806 | tp->rx_opt.snd_wscale; | 5816 | tp->rx_opt.snd_wscale; |
5807 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); | 5817 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); |
5808 | 5818 | ||
5809 | /* tcp_ack considers this ACK as duplicate | ||
5810 | * and does not calculate rtt. | ||
5811 | * Force it here. | ||
5812 | */ | ||
5813 | tcp_ack_update_rtt(sk, 0, 0); | ||
5814 | |||
5815 | if (tp->rx_opt.tstamp_ok) | 5819 | if (tp->rx_opt.tstamp_ok) |
5816 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; | 5820 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; |
5817 | 5821 | ||