aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2019-01-16 18:05:35 -0500
committerDavid S. Miller <davem@davemloft.net>2019-01-17 18:12:26 -0500
commitc1d5674f8313b9f8e683c265f1c00a2582cf5fc5 (patch)
tree3b264a3fb495fd15153869e2b129a0d8cad4c944 /net/ipv4/tcp_output.c
parent590d2026d62418bb27de9ca87526e9131c1f48af (diff)
tcp: less aggressive window probing on local congestion
Previously when the sender fails to send (original) data packet or window probes due to congestion in the local host (e.g. throttling in qdisc), it'll retry within an RTO or two up to 500ms. In low-RTT networks such as data-centers, RTO is often far below the default minimum 200ms. Then local host congestion could trigger a retry storm pouring gas to the fire. Worse yet, the probe counter (icsk_probes_out) is not properly updated so the aggressive retry may exceed the system limit (15 rounds) until the packet finally slips through. On such rare events, it's wise to retry more conservatively (500ms) and update the stats properly to reflect these incidents and follow the system limit. Note that this is consistent with the behaviors when a keep-alive probe or RTO retry is dropped due to local congestion. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Neal Cardwell <ncardwell@google.com> Reviewed-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c22
1 files changed, 7 insertions, 15 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d2d494c74811..6527f61f59ff 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3749,7 +3749,7 @@ void tcp_send_probe0(struct sock *sk)
3749 struct inet_connection_sock *icsk = inet_csk(sk); 3749 struct inet_connection_sock *icsk = inet_csk(sk);
3750 struct tcp_sock *tp = tcp_sk(sk); 3750 struct tcp_sock *tp = tcp_sk(sk);
3751 struct net *net = sock_net(sk); 3751 struct net *net = sock_net(sk);
3752 unsigned long probe_max; 3752 unsigned long timeout;
3753 int err; 3753 int err;
3754 3754
3755 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); 3755 err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
@@ -3761,26 +3761,18 @@ void tcp_send_probe0(struct sock *sk)
3761 return; 3761 return;
3762 } 3762 }
3763 3763
3764 icsk->icsk_probes_out++;
3764 if (err <= 0) { 3765 if (err <= 0) {
3765 if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2) 3766 if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
3766 icsk->icsk_backoff++; 3767 icsk->icsk_backoff++;
3767 icsk->icsk_probes_out++; 3768 timeout = tcp_probe0_when(sk, TCP_RTO_MAX);
3768 probe_max = TCP_RTO_MAX;
3769 } else { 3769 } else {
3770 /* If packet was not sent due to local congestion, 3770 /* If packet was not sent due to local congestion,
3771 * do not backoff and do not remember icsk_probes_out. 3771 * Let senders fight for local resources conservatively.
3772 * Let local senders to fight for local resources.
3773 *
3774 * Use accumulated backoff yet.
3775 */ 3772 */
3776 if (!icsk->icsk_probes_out) 3773 timeout = TCP_RESOURCE_PROBE_INTERVAL;
3777 icsk->icsk_probes_out = 1; 3774 }
3778 probe_max = TCP_RESOURCE_PROBE_INTERVAL; 3775 tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX, NULL);
3779 }
3780 tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
3781 tcp_probe0_when(sk, probe_max),
3782 TCP_RTO_MAX,
3783 NULL);
3784} 3776}
3785 3777
3786int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) 3778int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)