aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@googl.com>2017-09-13 23:30:39 -0400
committerDavid S. Miller <davem@davemloft.net>2017-09-15 17:36:28 -0400
commit8c72c65b426b47b3c166a8fef0d8927fe5e8a28d (patch)
tree95b5da840cb86705c4912ba22b6d3d6a9040b967 /net/ipv4/tcp_output.c
parentcbea8f02069533ea2ad4e5b3bfbcdb0894c20354 (diff)
tcp: update skb->skb_mstamp more carefully
liujian reported a problem in TCP_USER_TIMEOUT processing with a patch in tcp_probe_timer() : https://www.spinics.net/lists/netdev/msg454496.html After investigations, the root cause of the problem is that we update skb->skb_mstamp of skbs in write queue, even if the attempt to send a clone or copy of it failed. One reason being a routing problem. This patch prevents this, solving liujian issue. It also removes a potential RTT miscalculation, since __tcp_retransmit_skb() is not OR-ing TCP_SKB_CB(skb)->sacked with TCPCB_EVER_RETRANS if a failure happens, but skb->skb_mstamp has been changed. A future ACK would then lead to a very small RTT sample and min_rtt would then be lowered to this too small value. Tested: # cat user_timeout.pkt --local_ip=192.168.102.64 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 +0 `ifconfig tun0 192.168.102.64/16; ip ro add 192.0.2.1 dev tun0` +0 < S 0:0(0) win 0 <mss 1460> +0 > S. 0:0(0) ack 1 <mss 1460> +.1 < . 1:1(0) ack 1 win 65530 +0 accept(3, ..., ...) = 4 +0 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 +0 write(4, ..., 24) = 24 +0 > P. 1:25(24) ack 1 win 29200 +.1 < . 1:1(0) ack 25 win 65530 //change the ipaddress +1 `ifconfig tun0 192.168.0.10/16` +1 write(4, ..., 24) = 24 +1 write(4, ..., 24) = 24 +1 write(4, ..., 24) = 24 +1 write(4, ..., 24) = 24 +0 `ifconfig tun0 192.168.102.64/16` +0 < . 1:2(1) ack 25 win 65530 +0 `ifconfig tun0 192.168.0.10/16` +3 write(4, ..., 24) = -1 # ./packetdrill user_timeout.pkt Signed-off-by: Eric Dumazet <edumazet@googl.com> Reported-by: liujian <liujian56@huawei.com> Acked-by: Neal Cardwell <ncardwell@google.com> Acked-by: Yuchung Cheng <ycheng@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c19
1 files changed, 12 insertions, 7 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5b6690d05abb..a85a8c2948e5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -991,6 +991,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
991 struct tcp_skb_cb *tcb; 991 struct tcp_skb_cb *tcb;
992 struct tcp_out_options opts; 992 struct tcp_out_options opts;
993 unsigned int tcp_options_size, tcp_header_size; 993 unsigned int tcp_options_size, tcp_header_size;
994 struct sk_buff *oskb = NULL;
994 struct tcp_md5sig_key *md5; 995 struct tcp_md5sig_key *md5;
995 struct tcphdr *th; 996 struct tcphdr *th;
996 int err; 997 int err;
@@ -998,12 +999,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
998 BUG_ON(!skb || !tcp_skb_pcount(skb)); 999 BUG_ON(!skb || !tcp_skb_pcount(skb));
999 tp = tcp_sk(sk); 1000 tp = tcp_sk(sk);
1000 1001
1001 skb->skb_mstamp = tp->tcp_mstamp;
1002 if (clone_it) { 1002 if (clone_it) {
1003 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq 1003 TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
1004 - tp->snd_una; 1004 - tp->snd_una;
1005 tcp_rate_skb_sent(sk, skb); 1005 tcp_rate_skb_sent(sk, skb);
1006 1006
1007 oskb = skb;
1007 if (unlikely(skb_cloned(skb))) 1008 if (unlikely(skb_cloned(skb)))
1008 skb = pskb_copy(skb, gfp_mask); 1009 skb = pskb_copy(skb, gfp_mask);
1009 else 1010 else
@@ -1011,6 +1012,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1011 if (unlikely(!skb)) 1012 if (unlikely(!skb))
1012 return -ENOBUFS; 1013 return -ENOBUFS;
1013 } 1014 }
1015 skb->skb_mstamp = tp->tcp_mstamp;
1014 1016
1015 inet = inet_sk(sk); 1017 inet = inet_sk(sk);
1016 tcb = TCP_SKB_CB(skb); 1018 tcb = TCP_SKB_CB(skb);
@@ -1122,12 +1124,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1122 1124
1123 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); 1125 err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
1124 1126
1125 if (likely(err <= 0)) 1127 if (unlikely(err > 0)) {
1126 return err; 1128 tcp_enter_cwr(sk);
1127 1129 err = net_xmit_eval(err);
1128 tcp_enter_cwr(sk); 1130 }
1131 if (!err && oskb)
1132 oskb->skb_mstamp = tp->tcp_mstamp;
1129 1133
1130 return net_xmit_eval(err); 1134 return err;
1131} 1135}
1132 1136
1133/* This routine just queues the buffer for sending. 1137/* This routine just queues the buffer for sending.
@@ -2869,10 +2873,11 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
2869 skb_headroom(skb) >= 0xFFFF)) { 2873 skb_headroom(skb) >= 0xFFFF)) {
2870 struct sk_buff *nskb; 2874 struct sk_buff *nskb;
2871 2875
2872 skb->skb_mstamp = tp->tcp_mstamp;
2873 nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); 2876 nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
2874 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : 2877 err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2875 -ENOBUFS; 2878 -ENOBUFS;
2879 if (!err)
2880 skb->skb_mstamp = tp->tcp_mstamp;
2876 } else { 2881 } else {
2877 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2882 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2878 } 2883 }