aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2013-09-03 17:14:35 -0400
committerDavid S. Miller <davem@davemloft.net>2013-09-04 14:41:55 -0400
commit52f20e655d9f6f7f937a1cdacf219d9df3ab6166 (patch)
tree234a887f331af07ebb2ff5a1ae80823f8e1456b5
parent430eda6d6d568eded71dfd1be5a16c0c1379e201 (diff)
tcp: better comments for RTO initiallization
Commit 1b7fdd2ab585("tcp: do not use cached RTT for RTT estimation") removes important comments on how RTO is initialized and updated. Hopefully this patch puts those information back. Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/ipv4/tcp_metrics.c26
1 files changed, 20 insertions, 6 deletions
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 273ed735cca2..4a22f3e715df 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -481,13 +481,27 @@ void tcp_init_metrics(struct sock *sk)
481 crtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); 481 crtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT);
482 rcu_read_unlock(); 482 rcu_read_unlock();
483reset: 483reset:
484 /* The initial RTT measurement from the SYN/SYN-ACK is not ideal
485 * to seed the RTO for later data packets because SYN packets are
486 * small. Use the per-dst cached values to seed the RTO but keep
487 * the RTT estimator variables intact (e.g., srtt, mdev, rttvar).
488 * Later the RTO will be updated immediately upon obtaining the first
489 * data RTT sample (tcp_rtt_estimator()). Hence the cached RTT only
490 * influences the first RTO but not later RTT estimation.
491 *
492 * But if RTT is not available from the SYN (due to retransmits or
493 * syn cookies) or the cache, force a conservative 3secs timeout.
494 *
495 * A bit of theory. RTT is time passed after "normal" sized packet
496 * is sent until it is ACKed. In normal circumstances sending small
497 * packets force peer to delay ACKs and calculation is correct too.
498 * The algorithm is adaptive and, provided we follow specs, it
499 * NEVER underestimate RTT. BUT! If peer tries to make some clever
500 * tricks sort of "quick acks" for time long enough to decrease RTT
501 * to low value, and then abruptly stops to do it and starts to delay
502 * ACKs, wait for troubles.
503 */
484 if (crtt > tp->srtt) { 504 if (crtt > tp->srtt) {
485 /* Initial RTT (tp->srtt) from SYN usually don't measure
486 * serialization delay on low BW links well so RTO may be
487 * under-estimated. Stay conservative and seed RTO with
488 * the RTTs from past data exchanges, using the same seeding
489 * formula in tcp_rtt_estimator().
490 */
491 inet_csk(sk)->icsk_rto = crtt + max(crtt >> 2, tcp_rto_min(sk)); 505 inet_csk(sk)->icsk_rto = crtt + max(crtt >> 2, tcp_rto_min(sk));
492 } else if (tp->srtt == 0) { 506 } else if (tp->srtt == 0) {
493 /* RFC6298: 5.7 We've failed to get a valid RTT sample from 507 /* RFC6298: 5.7 We've failed to get a valid RTT sample from