aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2018-01-17 15:11:01 -0500
committerDavid S. Miller <davem@davemloft.net>2018-01-19 15:39:30 -0500
commite42866031ff03c89a5bdd2056c76dd6cb41c3d35 (patch)
treef72181b7dfc5d58ee416a996028ea5c3698a206c
parenteb36be0fd55e0a6f2cb3226acd711b2c7a2d7d09 (diff)
tcp: avoid min RTT bloat by skipping RTT from delayed-ACK in BBR
A persistent connection may send tiny amount of data (e.g. health-check) for a long period of time. BBR's windowed min RTT filter may only see RTT samples from delayed ACKs causing BBR to grossly over-estimate the path delay depending how much the ACK was delayed at the receiver. This patch skips RTT samples that are likely coming from delayed ACKs. Note that it is possible the sender never obtains a valid measure to set the min RTT. In this case BBR will continue to set cwnd to initial window which seems fine because the connection is thin stream. Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Acked-by: Priyaranjan Jha <priyarjha@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/tcp.h1
-rw-r--r--net/ipv4/tcp_bbr.c3
-rw-r--r--net/ipv4/tcp_input.c1
3 files changed, 4 insertions, 1 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6939e69d3c37..5a1d26a18599 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -953,6 +953,7 @@ struct rate_sample {
953 u32 prior_in_flight; /* in flight before this ACK */ 953 u32 prior_in_flight; /* in flight before this ACK */
954 bool is_app_limited; /* is sample from packet with bubble in pipe? */ 954 bool is_app_limited; /* is sample from packet with bubble in pipe? */
955 bool is_retrans; /* is sample from retransmission? */ 955 bool is_retrans; /* is sample from retransmission? */
956 bool is_ack_delayed; /* is this (likely) a delayed ACK? */
956}; 957};
957 958
958struct tcp_congestion_ops { 959struct tcp_congestion_ops {
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 8322f26e770e..785712be5b0d 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -766,7 +766,8 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
766 filter_expired = after(tcp_jiffies32, 766 filter_expired = after(tcp_jiffies32,
767 bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); 767 bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
768 if (rs->rtt_us >= 0 && 768 if (rs->rtt_us >= 0 &&
769 (rs->rtt_us <= bbr->min_rtt_us || filter_expired)) { 769 (rs->rtt_us <= bbr->min_rtt_us ||
770 (filter_expired && !rs->is_ack_delayed))) {
770 bbr->min_rtt_us = rs->rtt_us; 771 bbr->min_rtt_us = rs->rtt_us;
771 bbr->min_rtt_stamp = tcp_jiffies32; 772 bbr->min_rtt_stamp = tcp_jiffies32;
772 } 773 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2c6797134553..cfa51cfd2d99 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3633,6 +3633,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3633 3633
3634 delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ 3634 delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
3635 lost = tp->lost - lost; /* freshly marked lost */ 3635 lost = tp->lost - lost; /* freshly marked lost */
3636 rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED);
3636 tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); 3637 tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate);
3637 tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); 3638 tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
3638 tcp_xmit_recovery(sk, rexmit); 3639 tcp_xmit_recovery(sk, rexmit);