aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2018-01-17 15:11:00 -0500
committerDavid S. Miller <davem@davemloft.net>2018-01-19 15:39:30 -0500
commiteb36be0fd55e0a6f2cb3226acd711b2c7a2d7d09 (patch)
tree738a49bca485840679c8b140feaa99aa956a94ac /net/ipv4/tcp_input.c
parent60c2530696320ee6ffe4491c17079fa403790c98 (diff)
tcp: avoid min-RTT overestimation from delayed ACKs
This patch avoids having TCP sender or congestion control overestimate the min RTT by orders of magnitude. This happens when all the samples in the windowed filter are one-packet transfer like small request and health-check like chit-chat, which is farily common for applications using persistent connections. This patch tries to conservatively labels and skip RTT samples obtained from this type of workload. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c23
1 files changed, 21 insertions, 2 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ff71b18d9682..2c6797134553 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -97,6 +97,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
97#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ 97#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
98#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ 98#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
99#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ 99#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */
100#define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */
100 101
101#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) 102#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
102#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) 103#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -2857,11 +2858,18 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
2857 *rexmit = REXMIT_LOST; 2858 *rexmit = REXMIT_LOST;
2858} 2859}
2859 2860
2860static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) 2861static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
2861{ 2862{
2862 u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ; 2863 u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
2863 struct tcp_sock *tp = tcp_sk(sk); 2864 struct tcp_sock *tp = tcp_sk(sk);
2864 2865
2866 if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
2867 /* If the remote keeps returning delayed ACKs, eventually
2868 * the min filter would pick it up and overestimate the
2869 * prop. delay when it expires. Skip suspected delayed ACKs.
2870 */
2871 return;
2872 }
2865 minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32, 2873 minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
2866 rtt_us ? : jiffies_to_usecs(1)); 2874 rtt_us ? : jiffies_to_usecs(1));
2867} 2875}
@@ -2901,7 +2909,7 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2901 * always taken together with ACK, SACK, or TS-opts. Any negative 2909 * always taken together with ACK, SACK, or TS-opts. Any negative
2902 * values will be skipped with the seq_rtt_us < 0 check above. 2910 * values will be skipped with the seq_rtt_us < 0 check above.
2903 */ 2911 */
2904 tcp_update_rtt_min(sk, ca_rtt_us); 2912 tcp_update_rtt_min(sk, ca_rtt_us, flag);
2905 tcp_rtt_estimator(sk, seq_rtt_us); 2913 tcp_rtt_estimator(sk, seq_rtt_us);
2906 tcp_set_rto(sk); 2914 tcp_set_rto(sk);
2907 2915
@@ -3125,6 +3133,17 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
3125 if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) { 3133 if (likely(first_ackt) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
3126 seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt); 3134 seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt);
3127 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt); 3135 ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt);
3136
3137 if (pkts_acked == 1 && last_in_flight < tp->mss_cache &&
3138 last_in_flight && !prior_sacked && fully_acked &&
3139 sack->rate->prior_delivered + 1 == tp->delivered &&
3140 !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) {
3141 /* Conservatively mark a delayed ACK. It's typically
3142 * from a lone runt packet over the round trip to
3143 * a receiver w/o out-of-order or CE events.
3144 */
3145 flag |= FLAG_ACK_MAYBE_DELAYED;
3146 }
3128 } 3147 }
3129 if (sack->first_sackt) { 3148 if (sack->first_sackt) {
3130 sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt); 3149 sack_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, sack->first_sackt);