aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorJerry Chu <hkchu@google.com>2011-06-08 07:08:38 -0400
committerDavid S. Miller <davem@davemloft.net>2011-06-08 20:05:30 -0400
commit9ad7c049f0f79c418e293b1b68cf10d68f54fcdb (patch)
tree4b930b213f7a050afe9fae78432c1a4a6dce08a5 /net/ipv4/tcp_input.c
parentaee80b54b235d34d87b25dfbe32f0f0ffee1b544 (diff)
tcp: RFC2988bis + taking RTT sample from 3WHS for the passive open side
This patch lowers the default initRTO from 3secs to 1sec per RFC2988bis. It falls back to 3secs if the SYN or SYN-ACK packet has been retransmitted, AND the TCP timestamp option is not on. It also adds support to take RTT sample during 3WHS on the passive open side, just like its active open counterpart, and uses it, if valid, to seed the initRTO for the data transmission phase. The patch also resets ssthresh to its initial default at the beginning of the data transmission phase, and reduces cwnd to 1 if there has been MORE THAN ONE retransmission during 3WHS per RFC5681. Signed-off-by: H.K. Jerry Chu <hkchu@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c46
1 files changed, 25 insertions, 21 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bef9f04c22ba..ea0d2183df4b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -880,6 +880,11 @@ static void tcp_init_metrics(struct sock *sk)
880 tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); 880 tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
881 if (tp->snd_ssthresh > tp->snd_cwnd_clamp) 881 if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
882 tp->snd_ssthresh = tp->snd_cwnd_clamp; 882 tp->snd_ssthresh = tp->snd_cwnd_clamp;
883 } else {
884 /* ssthresh may have been reduced unnecessarily during.
885 * 3WHS. Restore it back to its initial default.
886 */
887 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
883 } 888 }
884 if (dst_metric(dst, RTAX_REORDERING) && 889 if (dst_metric(dst, RTAX_REORDERING) &&
885 tp->reordering != dst_metric(dst, RTAX_REORDERING)) { 890 tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
@@ -887,10 +892,7 @@ static void tcp_init_metrics(struct sock *sk)
887 tp->reordering = dst_metric(dst, RTAX_REORDERING); 892 tp->reordering = dst_metric(dst, RTAX_REORDERING);
888 } 893 }
889 894
890 if (dst_metric(dst, RTAX_RTT) == 0) 895 if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0)
891 goto reset;
892
893 if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
894 goto reset; 896 goto reset;
895 897
896 /* Initial rtt is determined from SYN,SYN-ACK. 898 /* Initial rtt is determined from SYN,SYN-ACK.
@@ -916,19 +918,26 @@ static void tcp_init_metrics(struct sock *sk)
916 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); 918 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
917 } 919 }
918 tcp_set_rto(sk); 920 tcp_set_rto(sk);
919 if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) {
920reset: 921reset:
921 /* Play conservative. If timestamps are not 922 if (tp->srtt == 0) {
922 * supported, TCP will fail to recalculate correct 923 /* RFC2988bis: We've failed to get a valid RTT sample from
923 * rtt, if initial rto is too small. FORGET ALL AND RESET! 924 * 3WHS. This is most likely due to retransmission,
925 * including spurious one. Reset the RTO back to 3secs
926 * from the more aggressive 1sec to avoid more spurious
927 * retransmission.
924 */ 928 */
925 if (!tp->rx_opt.saw_tstamp && tp->srtt) { 929 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK;
926 tp->srtt = 0; 930 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
927 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
928 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
929 }
930 } 931 }
931 tp->snd_cwnd = tcp_init_cwnd(tp, dst); 932 /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
933 * retransmitted. In light of RFC2988bis' more aggressive 1sec
934 * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
935 * retransmission has occurred.
936 */
937 if (tp->total_retrans > 1)
938 tp->snd_cwnd = 1;
939 else
940 tp->snd_cwnd = tcp_init_cwnd(tp, dst);
932 tp->snd_cwnd_stamp = tcp_time_stamp; 941 tp->snd_cwnd_stamp = tcp_time_stamp;
933} 942}
934 943
@@ -3112,12 +3121,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
3112 tcp_xmit_retransmit_queue(sk); 3121 tcp_xmit_retransmit_queue(sk);
3113} 3122}
3114 3123
3115static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) 3124void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
3116{ 3125{
3117 tcp_rtt_estimator(sk, seq_rtt); 3126 tcp_rtt_estimator(sk, seq_rtt);
3118 tcp_set_rto(sk); 3127 tcp_set_rto(sk);
3119 inet_csk(sk)->icsk_backoff = 0; 3128 inet_csk(sk)->icsk_backoff = 0;
3120} 3129}
3130EXPORT_SYMBOL(tcp_valid_rtt_meas);
3121 3131
3122/* Read draft-ietf-tcplw-high-performance before mucking 3132/* Read draft-ietf-tcplw-high-performance before mucking
3123 * with this code. (Supersedes RFC1323) 3133 * with this code. (Supersedes RFC1323)
@@ -5806,12 +5816,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5806 tp->rx_opt.snd_wscale; 5816 tp->rx_opt.snd_wscale;
5807 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); 5817 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5808 5818
5809 /* tcp_ack considers this ACK as duplicate
5810 * and does not calculate rtt.
5811 * Force it here.
5812 */
5813 tcp_ack_update_rtt(sk, 0, 0);
5814
5815 if (tp->rx_opt.tstamp_ok) 5819 if (tp->rx_opt.tstamp_ok)
5816 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; 5820 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5817 5821