aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJerry Chu <hkchu@google.com>2011-06-08 07:08:38 -0400
committerDavid S. Miller <davem@davemloft.net>2011-06-08 20:05:30 -0400
commit9ad7c049f0f79c418e293b1b68cf10d68f54fcdb (patch)
tree4b930b213f7a050afe9fae78432c1a4a6dce08a5 /net
parentaee80b54b235d34d87b25dfbe32f0f0ffee1b544 (diff)
tcp: RFC2988bis + taking RTT sample from 3WHS for the passive open side
This patch lowers the default initRTO from 3secs to 1sec per RFC2988bis. It falls back to 3secs if the SYN or SYN-ACK packet has been retransmitted, AND the TCP timestamp option is not on. It also adds support to take RTT sample during 3WHS on the passive open side, just like its active open counterpart, and uses it, if valid, to seed the initRTO for the data transmission phase. The patch also resets ssthresh to its initial default at the beginning of the data transmission phase, and reduces cwnd to 1 if there has been MORE THAN ONE retransmission during 3WHS per RFC5681. Signed-off-by: H.K. Jerry Chu <hkchu@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/syncookies.c1
-rw-r--r--net/ipv4/tcp_input.c46
-rw-r--r--net/ipv4/tcp_ipv4.c11
-rw-r--r--net/ipv4/tcp_minisocks.c6
-rw-r--r--net/ipv6/syncookies.c1
-rw-r--r--net/ipv6/tcp_ipv6.c5
6 files changed, 45 insertions, 25 deletions
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 26461492a847..92bb9434b338 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -316,6 +316,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
316 ireq->wscale_ok = tcp_opt.wscale_ok; 316 ireq->wscale_ok = tcp_opt.wscale_ok;
317 ireq->tstamp_ok = tcp_opt.saw_tstamp; 317 ireq->tstamp_ok = tcp_opt.saw_tstamp;
318 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; 318 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
319 treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
319 320
320 /* We throwed the options of the initial SYN away, so we hope 321 /* We throwed the options of the initial SYN away, so we hope
321 * the ACK carries the same options again (see RFC1122 4.2.3.8) 322 * the ACK carries the same options again (see RFC1122 4.2.3.8)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bef9f04c22ba..ea0d2183df4b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -880,6 +880,11 @@ static void tcp_init_metrics(struct sock *sk)
880 tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH); 880 tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
881 if (tp->snd_ssthresh > tp->snd_cwnd_clamp) 881 if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
882 tp->snd_ssthresh = tp->snd_cwnd_clamp; 882 tp->snd_ssthresh = tp->snd_cwnd_clamp;
883 } else {
884 /* ssthresh may have been reduced unnecessarily during.
885 * 3WHS. Restore it back to its initial default.
886 */
887 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
883 } 888 }
884 if (dst_metric(dst, RTAX_REORDERING) && 889 if (dst_metric(dst, RTAX_REORDERING) &&
885 tp->reordering != dst_metric(dst, RTAX_REORDERING)) { 890 tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
@@ -887,10 +892,7 @@ static void tcp_init_metrics(struct sock *sk)
887 tp->reordering = dst_metric(dst, RTAX_REORDERING); 892 tp->reordering = dst_metric(dst, RTAX_REORDERING);
888 } 893 }
889 894
890 if (dst_metric(dst, RTAX_RTT) == 0) 895 if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0)
891 goto reset;
892
893 if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
894 goto reset; 896 goto reset;
895 897
896 /* Initial rtt is determined from SYN,SYN-ACK. 898 /* Initial rtt is determined from SYN,SYN-ACK.
@@ -916,19 +918,26 @@ static void tcp_init_metrics(struct sock *sk)
916 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); 918 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
917 } 919 }
918 tcp_set_rto(sk); 920 tcp_set_rto(sk);
919 if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) {
920reset: 921reset:
921 /* Play conservative. If timestamps are not 922 if (tp->srtt == 0) {
922 * supported, TCP will fail to recalculate correct 923 /* RFC2988bis: We've failed to get a valid RTT sample from
923 * rtt, if initial rto is too small. FORGET ALL AND RESET! 924 * 3WHS. This is most likely due to retransmission,
925 * including spurious one. Reset the RTO back to 3secs
926 * from the more aggressive 1sec to avoid more spurious
927 * retransmission.
924 */ 928 */
925 if (!tp->rx_opt.saw_tstamp && tp->srtt) { 929 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK;
926 tp->srtt = 0; 930 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
927 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
928 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
929 }
930 } 931 }
931 tp->snd_cwnd = tcp_init_cwnd(tp, dst); 932 /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
933 * retransmitted. In light of RFC2988bis' more aggressive 1sec
934 * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
935 * retransmission has occurred.
936 */
937 if (tp->total_retrans > 1)
938 tp->snd_cwnd = 1;
939 else
940 tp->snd_cwnd = tcp_init_cwnd(tp, dst);
932 tp->snd_cwnd_stamp = tcp_time_stamp; 941 tp->snd_cwnd_stamp = tcp_time_stamp;
933} 942}
934 943
@@ -3112,12 +3121,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
3112 tcp_xmit_retransmit_queue(sk); 3121 tcp_xmit_retransmit_queue(sk);
3113} 3122}
3114 3123
3115static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) 3124void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
3116{ 3125{
3117 tcp_rtt_estimator(sk, seq_rtt); 3126 tcp_rtt_estimator(sk, seq_rtt);
3118 tcp_set_rto(sk); 3127 tcp_set_rto(sk);
3119 inet_csk(sk)->icsk_backoff = 0; 3128 inet_csk(sk)->icsk_backoff = 0;
3120} 3129}
3130EXPORT_SYMBOL(tcp_valid_rtt_meas);
3121 3131
3122/* Read draft-ietf-tcplw-high-performance before mucking 3132/* Read draft-ietf-tcplw-high-performance before mucking
3123 * with this code. (Supersedes RFC1323) 3133 * with this code. (Supersedes RFC1323)
@@ -5806,12 +5816,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5806 tp->rx_opt.snd_wscale; 5816 tp->rx_opt.snd_wscale;
5807 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); 5817 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5808 5818
5809 /* tcp_ack considers this ACK as duplicate
5810 * and does not calculate rtt.
5811 * Force it here.
5812 */
5813 tcp_ack_update_rtt(sk, 0, 0);
5814
5815 if (tp->rx_opt.tstamp_ok) 5819 if (tp->rx_opt.tstamp_ok)
5816 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; 5820 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
5817 5821
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a7d6671e33b8..617dee3ccfb1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -429,8 +429,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
429 break; 429 break;
430 430
431 icsk->icsk_backoff--; 431 icsk->icsk_backoff--;
432 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << 432 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
433 icsk->icsk_backoff; 433 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
434 tcp_bound_rto(sk); 434 tcp_bound_rto(sk);
435 435
436 skb = tcp_write_queue_head(sk); 436 skb = tcp_write_queue_head(sk);
@@ -1384,6 +1384,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1384 isn = tcp_v4_init_sequence(skb); 1384 isn = tcp_v4_init_sequence(skb);
1385 } 1385 }
1386 tcp_rsk(req)->snt_isn = isn; 1386 tcp_rsk(req)->snt_isn = isn;
1387 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1387 1388
1388 if (tcp_v4_send_synack(sk, dst, req, 1389 if (tcp_v4_send_synack(sk, dst, req,
1389 (struct request_values *)&tmp_ext) || 1390 (struct request_values *)&tmp_ext) ||
@@ -1458,6 +1459,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1458 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; 1459 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1459 1460
1460 tcp_initialize_rcv_mss(newsk); 1461 tcp_initialize_rcv_mss(newsk);
1462 if (tcp_rsk(req)->snt_synack)
1463 tcp_valid_rtt_meas(newsk,
1464 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1465 newtp->total_retrans = req->retrans;
1461 1466
1462#ifdef CONFIG_TCP_MD5SIG 1467#ifdef CONFIG_TCP_MD5SIG
1463 /* Copy over the MD5 key from the original socket */ 1468 /* Copy over the MD5 key from the original socket */
@@ -1854,7 +1859,7 @@ static int tcp_v4_init_sock(struct sock *sk)
1854 * algorithms that we must have the following bandaid to talk 1859 * algorithms that we must have the following bandaid to talk
1855 * efficiently to them. -DaveM 1860 * efficiently to them. -DaveM
1856 */ 1861 */
1857 tp->snd_cwnd = 2; 1862 tp->snd_cwnd = TCP_INIT_CWND;
1858 1863
1859 /* See draft-stevens-tcpca-spec-01 for discussion of the 1864 /* See draft-stevens-tcpca-spec-01 for discussion of the
1860 * initialization of these values. 1865 * initialization of these values.
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 80b1f80759ab..d2fe4e06b472 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -486,7 +486,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
486 * algorithms that we must have the following bandaid to talk 486 * algorithms that we must have the following bandaid to talk
487 * efficiently to them. -DaveM 487 * efficiently to them. -DaveM
488 */ 488 */
489 newtp->snd_cwnd = 2; 489 newtp->snd_cwnd = TCP_INIT_CWND;
490 newtp->snd_cwnd_cnt = 0; 490 newtp->snd_cwnd_cnt = 0;
491 newtp->bytes_acked = 0; 491 newtp->bytes_acked = 0;
492 492
@@ -720,6 +720,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
720 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); 720 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
721 return NULL; 721 return NULL;
722 } 722 }
723 if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
724 tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
725 else if (req->retrans) /* don't take RTT sample if retrans && ~TS */
726 tcp_rsk(req)->snt_synack = 0;
723 727
724 /* OK, ACK is valid, create big socket and 728 /* OK, ACK is valid, create big socket and
725 * feed this segment to it. It will repeat all 729 * feed this segment to it. It will repeat all
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 8b9644a8b697..89d5bf806222 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -223,6 +223,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
223 ireq->wscale_ok = tcp_opt.wscale_ok; 223 ireq->wscale_ok = tcp_opt.wscale_ok;
224 ireq->tstamp_ok = tcp_opt.saw_tstamp; 224 ireq->tstamp_ok = tcp_opt.saw_tstamp;
225 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; 225 req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
226 treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
226 treq->rcv_isn = ntohl(th->seq) - 1; 227 treq->rcv_isn = ntohl(th->seq) - 1;
227 treq->snt_isn = cookie; 228 treq->snt_isn = cookie;
228 229
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d1fd28711ba5..a1ef61a889c3 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1341,6 +1341,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1341 } 1341 }
1342have_isn: 1342have_isn:
1343 tcp_rsk(req)->snt_isn = isn; 1343 tcp_rsk(req)->snt_isn = isn;
1344 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1344 1345
1345 security_inet_conn_request(sk, skb, req); 1346 security_inet_conn_request(sk, skb, req);
1346 1347
@@ -1509,6 +1510,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1509 tcp_sync_mss(newsk, dst_mtu(dst)); 1510 tcp_sync_mss(newsk, dst_mtu(dst));
1510 newtp->advmss = dst_metric_advmss(dst); 1511 newtp->advmss = dst_metric_advmss(dst);
1511 tcp_initialize_rcv_mss(newsk); 1512 tcp_initialize_rcv_mss(newsk);
1513 if (tcp_rsk(req)->snt_synack)
1514 tcp_valid_rtt_meas(newsk,
1515 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1516 newtp->total_retrans = req->retrans;
1512 1517
1513 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1518 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1514 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1519 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;