aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2014-07-02 15:07:16 -0400
committerDavid S. Miller <davem@davemloft.net>2014-07-08 00:40:48 -0400
commit6e08d5e3c8236e7484229e46fdf92006e1dd4c49 (patch)
tree0cef9beb502c504b884cccc127e34a50974d0f07 /net
parente326f2f13b209d56782609e833b87cb497e64b3b (diff)
tcp: fix false undo corner cases
The undo code assumes that, upon entering loss recovery, TCP 1) always retransmit something 2) the retransmission never fails locally (e.g., qdisc drop) so undo_marker is set in tcp_enter_recovery() and undo_retrans is incremented only when tcp_retransmit_skb() is successful. When the assumption is broken because TCP's cwnd is too small to retransmit or the retransmit fails locally. The next (DUP)ACK would incorrectly revert the cwnd and the congestion state in tcp_try_undo_dsack() or tcp_may_undo(). Subsequent (DUP)ACKs may enter the recovery state. The sender repeatedly enter and (incorrectly) exit recovery states if the retransmits continue to fail locally while receiving (DUP)ACKs. The fix is to initialize undo_retrans to -1 and start counting on the first retransmission. Always increment undo_retrans even if the retransmissions fail locally because they couldn't cause DSACKs to undo the cwnd reduction. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp_input.c8
-rw-r--r--net/ipv4/tcp_output.c6
2 files changed, 8 insertions, 6 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b5c23756965a..40639c288dc2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1106,7 +1106,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1106 } 1106 }
1107 1107
1108 /* D-SACK for already forgotten data... Do dumb counting. */ 1108 /* D-SACK for already forgotten data... Do dumb counting. */
1109 if (dup_sack && tp->undo_marker && tp->undo_retrans && 1109 if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 &&
1110 !after(end_seq_0, prior_snd_una) && 1110 !after(end_seq_0, prior_snd_una) &&
1111 after(end_seq_0, tp->undo_marker)) 1111 after(end_seq_0, tp->undo_marker))
1112 tp->undo_retrans--; 1112 tp->undo_retrans--;
@@ -1187,7 +1187,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
1187 1187
1188 /* Account D-SACK for retransmitted packet. */ 1188 /* Account D-SACK for retransmitted packet. */
1189 if (dup_sack && (sacked & TCPCB_RETRANS)) { 1189 if (dup_sack && (sacked & TCPCB_RETRANS)) {
1190 if (tp->undo_marker && tp->undo_retrans && 1190 if (tp->undo_marker && tp->undo_retrans > 0 &&
1191 after(end_seq, tp->undo_marker)) 1191 after(end_seq, tp->undo_marker))
1192 tp->undo_retrans--; 1192 tp->undo_retrans--;
1193 if (sacked & TCPCB_SACKED_ACKED) 1193 if (sacked & TCPCB_SACKED_ACKED)
@@ -1893,7 +1893,7 @@ static void tcp_clear_retrans_partial(struct tcp_sock *tp)
1893 tp->lost_out = 0; 1893 tp->lost_out = 0;
1894 1894
1895 tp->undo_marker = 0; 1895 tp->undo_marker = 0;
1896 tp->undo_retrans = 0; 1896 tp->undo_retrans = -1;
1897} 1897}
1898 1898
1899void tcp_clear_retrans(struct tcp_sock *tp) 1899void tcp_clear_retrans(struct tcp_sock *tp)
@@ -2665,7 +2665,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2665 2665
2666 tp->prior_ssthresh = 0; 2666 tp->prior_ssthresh = 0;
2667 tp->undo_marker = tp->snd_una; 2667 tp->undo_marker = tp->snd_una;
2668 tp->undo_retrans = tp->retrans_out; 2668 tp->undo_retrans = tp->retrans_out ? : -1;
2669 2669
2670 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { 2670 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
2671 if (!ece_ack) 2671 if (!ece_ack)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d92bce0ea24e..179b51e6bda3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2525,8 +2525,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2525 if (!tp->retrans_stamp) 2525 if (!tp->retrans_stamp)
2526 tp->retrans_stamp = TCP_SKB_CB(skb)->when; 2526 tp->retrans_stamp = TCP_SKB_CB(skb)->when;
2527 2527
2528 tp->undo_retrans += tcp_skb_pcount(skb);
2529
2530 /* snd_nxt is stored to detect loss of retransmitted segment, 2528 /* snd_nxt is stored to detect loss of retransmitted segment,
2531 * see tcp_input.c tcp_sacktag_write_queue(). 2529 * see tcp_input.c tcp_sacktag_write_queue().
2532 */ 2530 */
@@ -2534,6 +2532,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2534 } else if (err != -EBUSY) { 2532 } else if (err != -EBUSY) {
2535 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); 2533 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2536 } 2534 }
2535
2536 if (tp->undo_retrans < 0)
2537 tp->undo_retrans = 0;
2538 tp->undo_retrans += tcp_skb_pcount(skb);
2537 return err; 2539 return err;
2538} 2540}
2539 2541