diff options
author | Eric Dumazet <edumazet@googl.com> | 2017-09-13 23:30:39 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-09-15 17:36:28 -0400 |
commit | 8c72c65b426b47b3c166a8fef0d8927fe5e8a28d (patch) | |
tree | 95b5da840cb86705c4912ba22b6d3d6a9040b967 /net/ipv4/tcp_output.c | |
parent | cbea8f02069533ea2ad4e5b3bfbcdb0894c20354 (diff) |
tcp: update skb->skb_mstamp more carefully
liujian reported a problem in TCP_USER_TIMEOUT processing with a patch
in tcp_probe_timer() :
https://www.spinics.net/lists/netdev/msg454496.html
After investigations, the root cause of the problem is that we update
skb->skb_mstamp of skbs in write queue, even if the attempt to send a
clone or copy of it failed. One reason being a routing problem.
This patch prevents this, solving liujian issue.
It also removes a potential RTT miscalculation, since
__tcp_retransmit_skb() is not OR-ing TCP_SKB_CB(skb)->sacked with
TCPCB_EVER_RETRANS if a failure happens, but skb->skb_mstamp has
been changed.
A future ACK would then lead to a very small RTT sample and min_rtt
would then be lowered to this too small value.
Tested:
# cat user_timeout.pkt
--local_ip=192.168.102.64
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 `ifconfig tun0 192.168.102.64/16; ip ro add 192.0.2.1 dev tun0`
+0 < S 0:0(0) win 0 <mss 1460>
+0 > S. 0:0(0) ack 1 <mss 1460>
+.1 < . 1:1(0) ack 1 win 65530
+0 accept(3, ..., ...) = 4
+0 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0
+0 write(4, ..., 24) = 24
+0 > P. 1:25(24) ack 1 win 29200
+.1 < . 1:1(0) ack 25 win 65530
//change the ipaddress
+1 `ifconfig tun0 192.168.0.10/16`
+1 write(4, ..., 24) = 24
+1 write(4, ..., 24) = 24
+1 write(4, ..., 24) = 24
+1 write(4, ..., 24) = 24
+0 `ifconfig tun0 192.168.102.64/16`
+0 < . 1:2(1) ack 25 win 65530
+0 `ifconfig tun0 192.168.0.10/16`
+3 write(4, ..., 24) = -1
# ./packetdrill user_timeout.pkt
Signed-off-by: Eric Dumazet <edumazet@googl.com>
Reported-by: liujian <liujian56@huawei.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 19 |
1 files changed, 12 insertions, 7 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5b6690d05abb..a85a8c2948e5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -991,6 +991,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
991 | struct tcp_skb_cb *tcb; | 991 | struct tcp_skb_cb *tcb; |
992 | struct tcp_out_options opts; | 992 | struct tcp_out_options opts; |
993 | unsigned int tcp_options_size, tcp_header_size; | 993 | unsigned int tcp_options_size, tcp_header_size; |
994 | struct sk_buff *oskb = NULL; | ||
994 | struct tcp_md5sig_key *md5; | 995 | struct tcp_md5sig_key *md5; |
995 | struct tcphdr *th; | 996 | struct tcphdr *th; |
996 | int err; | 997 | int err; |
@@ -998,12 +999,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
998 | BUG_ON(!skb || !tcp_skb_pcount(skb)); | 999 | BUG_ON(!skb || !tcp_skb_pcount(skb)); |
999 | tp = tcp_sk(sk); | 1000 | tp = tcp_sk(sk); |
1000 | 1001 | ||
1001 | skb->skb_mstamp = tp->tcp_mstamp; | ||
1002 | if (clone_it) { | 1002 | if (clone_it) { |
1003 | TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq | 1003 | TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq |
1004 | - tp->snd_una; | 1004 | - tp->snd_una; |
1005 | tcp_rate_skb_sent(sk, skb); | 1005 | tcp_rate_skb_sent(sk, skb); |
1006 | 1006 | ||
1007 | oskb = skb; | ||
1007 | if (unlikely(skb_cloned(skb))) | 1008 | if (unlikely(skb_cloned(skb))) |
1008 | skb = pskb_copy(skb, gfp_mask); | 1009 | skb = pskb_copy(skb, gfp_mask); |
1009 | else | 1010 | else |
@@ -1011,6 +1012,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
1011 | if (unlikely(!skb)) | 1012 | if (unlikely(!skb)) |
1012 | return -ENOBUFS; | 1013 | return -ENOBUFS; |
1013 | } | 1014 | } |
1015 | skb->skb_mstamp = tp->tcp_mstamp; | ||
1014 | 1016 | ||
1015 | inet = inet_sk(sk); | 1017 | inet = inet_sk(sk); |
1016 | tcb = TCP_SKB_CB(skb); | 1018 | tcb = TCP_SKB_CB(skb); |
@@ -1122,12 +1124,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
1122 | 1124 | ||
1123 | err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); | 1125 | err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); |
1124 | 1126 | ||
1125 | if (likely(err <= 0)) | 1127 | if (unlikely(err > 0)) { |
1126 | return err; | 1128 | tcp_enter_cwr(sk); |
1127 | 1129 | err = net_xmit_eval(err); | |
1128 | tcp_enter_cwr(sk); | 1130 | } |
1131 | if (!err && oskb) | ||
1132 | oskb->skb_mstamp = tp->tcp_mstamp; | ||
1129 | 1133 | ||
1130 | return net_xmit_eval(err); | 1134 | return err; |
1131 | } | 1135 | } |
1132 | 1136 | ||
1133 | /* This routine just queues the buffer for sending. | 1137 | /* This routine just queues the buffer for sending. |
@@ -2869,10 +2873,11 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) | |||
2869 | skb_headroom(skb) >= 0xFFFF)) { | 2873 | skb_headroom(skb) >= 0xFFFF)) { |
2870 | struct sk_buff *nskb; | 2874 | struct sk_buff *nskb; |
2871 | 2875 | ||
2872 | skb->skb_mstamp = tp->tcp_mstamp; | ||
2873 | nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); | 2876 | nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); |
2874 | err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : | 2877 | err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : |
2875 | -ENOBUFS; | 2878 | -ENOBUFS; |
2879 | if (!err) | ||
2880 | skb->skb_mstamp = tp->tcp_mstamp; | ||
2876 | } else { | 2881 | } else { |
2877 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); | 2882 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
2878 | } | 2883 | } |