aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2018-08-29 17:53:56 -0400
committerDavid S. Miller <davem@davemloft.net>2018-09-01 02:03:00 -0400
commit7788174e8726c309d8bfd8aeca743cefd6943616 (patch)
treec22fbe4b578d94cf9e813fa8831a3bfb798717e8
parent506a03aa04deed321b26153aff4e61161aefa8af (diff)
tcp: change IPv6 flow-label upon receiving spurious retransmission
Currently a Linux IPv6 TCP sender will change the flow label upon timeouts to potentially steer away from a data path that has gone bad. However this does not help if the problem is on the ACK path and the data path is healthy. In this case the receiver is likely to receive repeated spurious retransmission because the sender couldn't get the ACKs in time and has recurring timeouts. This patch adds another feature to mitigate this problem. It leverages the DSACK states in the receiver to change the flow label of the ACKs to speculatively re-route the ACK packets. In order to allow triggering on the second consecutive spurious RTO, the receiver changes the flow label upon sending a second consecutive DSACK for a sequence number below RCV.NXT. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_input.c13
2 files changed, 15 insertions, 0 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b8af2fec5ad5..8c4235c098fd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2595,6 +2595,8 @@ int tcp_disconnect(struct sock *sk, int flags)
2595 tp->compressed_ack = 0; 2595 tp->compressed_ack = 0;
2596 tp->bytes_sent = 0; 2596 tp->bytes_sent = 0;
2597 tp->bytes_retrans = 0; 2597 tp->bytes_retrans = 0;
2598 tp->duplicate_sack[0].start_seq = 0;
2599 tp->duplicate_sack[0].end_seq = 0;
2598 tp->dsack_dups = 0; 2600 tp->dsack_dups = 0;
2599 tp->reord_seen = 0; 2601 tp->reord_seen = 0;
2600 2602
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4c2dd9f863f7..62508a2f9b21 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4199,6 +4199,17 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
4199 tcp_sack_extend(tp->duplicate_sack, seq, end_seq); 4199 tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
4200} 4200}
4201 4201
4202static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
4203{
4204 /* When the ACK path fails or drops most ACKs, the sender would
4205 * timeout and spuriously retransmit the same segment repeatedly.
4206 * The receiver remembers and reflects via DSACKs. Leverage the
4207 * DSACK state and change the txhash to re-route speculatively.
4208 */
4209 if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq)
4210 sk_rethink_txhash(sk);
4211}
4212
4202static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) 4213static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4203{ 4214{
4204 struct tcp_sock *tp = tcp_sk(sk); 4215 struct tcp_sock *tp = tcp_sk(sk);
@@ -4211,6 +4222,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
4211 if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { 4222 if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
4212 u32 end_seq = TCP_SKB_CB(skb)->end_seq; 4223 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
4213 4224
4225 tcp_rcv_spurious_retrans(sk, skb);
4214 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) 4226 if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
4215 end_seq = tp->rcv_nxt; 4227 end_seq = tp->rcv_nxt;
4216 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq); 4228 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
@@ -4755,6 +4767,7 @@ queue_and_out:
4755 } 4767 }
4756 4768
4757 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { 4769 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4770 tcp_rcv_spurious_retrans(sk, skb);
4758 /* A retransmit, 2nd most common case. Force an immediate ack. */ 4771 /* A retransmit, 2nd most common case. Force an immediate ack. */
4759 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); 4772 NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
4760 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); 4773 tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);