aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2015-07-01 17:11:14 -0400
committerDavid S. Miller <davem@davemloft.net>2015-07-08 16:29:45 -0400
commit291a00d1a70f96b393da9ac90c58a82bc7949fc8 (patch)
tree4eaad0b287c5dbdc785f602a77ff49e6fd0b8438 /net/ipv4/tcp_input.c
parentfda8b18c515a5e2caf821887ceafb42c35094eaf (diff)
tcp: reduce cwnd if retransmit is lost in CA_Loss
If the retransmission in CA_Loss is lost again, we should not continue to slow start or raise cwnd in congestion avoidance mode. Instead we should enter fast recovery and use PRR to reduce cwnd, following the principle in RFC5681: "... or the loss of a retransmission, should be taken as two indications of congestion and, therefore, cwnd (and ssthresh) MUST be lowered twice in this case." This is especially important to reduce loss when the CA_Loss state was caused by a traffic policer dropping the entire inflight. The CA_Loss state has a problem where a loss of L packets causes the sender to send a burst of L packets. So a policer that's dropping most packets in a given RTT can cause a huge retransmit storm. By contrast, PRR includes logic to bound the number of outbound packets that result from a given ACK. So switching to CA_Recovery on lost retransmits in CA_Loss avoids this retransmit storm problem when in CA_Loss. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Nandita Dukkipati <nanditad@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c14
1 files changed, 8 insertions, 6 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 684f095d196e..923e0e568bfa 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -109,6 +109,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
109#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ 109#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */
110#define FLAG_DATA_SACKED 0x20 /* New SACK. */ 110#define FLAG_DATA_SACKED 0x20 /* New SACK. */
111#define FLAG_ECE 0x40 /* ECE in this ACK */ 111#define FLAG_ECE 0x40 /* ECE in this ACK */
112#define FLAG_LOST_RETRANS 0x80 /* This ACK marks some retransmission lost */
112#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ 113#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
113#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ 114#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */
114#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ 115#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
@@ -1037,7 +1038,7 @@ static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
1037 * highest SACK block). Also calculate the lowest snd_nxt among the remaining 1038 * highest SACK block). Also calculate the lowest snd_nxt among the remaining
1038 * retransmitted skbs to avoid some costly processing per ACKs. 1039 * retransmitted skbs to avoid some costly processing per ACKs.
1039 */ 1040 */
1040static void tcp_mark_lost_retrans(struct sock *sk) 1041static void tcp_mark_lost_retrans(struct sock *sk, int *flag)
1041{ 1042{
1042 const struct inet_connection_sock *icsk = inet_csk(sk); 1043 const struct inet_connection_sock *icsk = inet_csk(sk);
1043 struct tcp_sock *tp = tcp_sk(sk); 1044 struct tcp_sock *tp = tcp_sk(sk);
@@ -1078,7 +1079,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
1078 if (after(received_upto, ack_seq)) { 1079 if (after(received_upto, ack_seq)) {
1079 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1080 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1080 tp->retrans_out -= tcp_skb_pcount(skb); 1081 tp->retrans_out -= tcp_skb_pcount(skb);
1081 1082 *flag |= FLAG_LOST_RETRANS;
1082 tcp_skb_mark_lost_uncond_verify(tp, skb); 1083 tcp_skb_mark_lost_uncond_verify(tp, skb);
1083 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); 1084 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
1084 } else { 1085 } else {
@@ -1818,7 +1819,7 @@ advance_sp:
1818 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) 1819 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
1819 tcp_update_reordering(sk, tp->fackets_out - state->reord, 0); 1820 tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
1820 1821
1821 tcp_mark_lost_retrans(sk); 1822 tcp_mark_lost_retrans(sk, &state->flag);
1822 tcp_verify_left_out(tp); 1823 tcp_verify_left_out(tp);
1823out: 1824out:
1824 1825
@@ -2676,7 +2677,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2676 tp->prior_ssthresh = 0; 2677 tp->prior_ssthresh = 0;
2677 tcp_init_undo(tp); 2678 tcp_init_undo(tp);
2678 2679
2679 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { 2680 if (!tcp_in_cwnd_reduction(sk)) {
2680 if (!ece_ack) 2681 if (!ece_ack)
2681 tp->prior_ssthresh = tcp_current_ssthresh(sk); 2682 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2682 tcp_init_cwnd_reduction(sk); 2683 tcp_init_cwnd_reduction(sk);
@@ -2852,9 +2853,10 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2852 break; 2853 break;
2853 case TCP_CA_Loss: 2854 case TCP_CA_Loss:
2854 tcp_process_loss(sk, flag, is_dupack); 2855 tcp_process_loss(sk, flag, is_dupack);
2855 if (icsk->icsk_ca_state != TCP_CA_Open) 2856 if (icsk->icsk_ca_state != TCP_CA_Open &&
2857 !(flag & FLAG_LOST_RETRANS))
2856 return; 2858 return;
2857 /* Fall through to processing in Open state. */ 2859 /* Change state if cwnd is undone or retransmits are lost */
2858 default: 2860 default:
2859 if (tcp_is_reno(tp)) { 2861 if (tcp_is_reno(tp)) {
2860 if (flag & FLAG_SND_UNA_ADVANCED) 2862 if (flag & FLAG_SND_UNA_ADVANCED)