aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_timer.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2017-05-16 17:00:14 -0400
committerDavid S. Miller <davem@davemloft.net>2017-05-17 16:06:01 -0400
commit9a568de4818dea9a05af141046bd3e589245ab83 (patch)
tree6f1502edf55ecb7205660d62bd683ebcf912cfea /net/ipv4/tcp_timer.c
parentac9517fcf310327fa3e3b0d8366e4b11236b1b4b (diff)
tcp: switch TCP TS option (RFC 7323) to 1ms clock
TCP Timestamps option is defined in RFC 7323 Traditionally on linux, it has been tied to the internal 'jiffies' variable, because it had been a cheap and good enough generator. For TCP flows on the Internet, 1 ms resolution would be much better than 4ms or 10ms (HZ=250 or HZ=100 respectively) For TCP flows in the DC, Google has used usec resolution for more than two years with great success [1] Receive size autotuning (DRS) is indeed more precise and converges faster to optimal window size. This patch converts tp->tcp_mstamp to a plain u64 value storing a 1 usec TCP clock. This choice will allow us to upstream the 1 usec TS option as discussed in IETF 97. [1] https://www.ietf.org/proceedings/97/slides/slides-97-tcpm-tcp-options-for-low-latency-00.pdf Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_timer.c')
-rw-r--r--net/ipv4/tcp_timer.c8
1 files changed, 4 insertions, 4 deletions
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6629f47aa7f0..27a667bce806 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -153,8 +153,8 @@ static bool retransmits_timed_out(struct sock *sk,
153 unsigned int timeout, 153 unsigned int timeout,
154 bool syn_set) 154 bool syn_set)
155{ 155{
156 unsigned int linear_backoff_thresh, start_ts;
157 unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN; 156 unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN;
157 unsigned int linear_backoff_thresh, start_ts;
158 158
159 if (!inet_csk(sk)->icsk_retransmits) 159 if (!inet_csk(sk)->icsk_retransmits)
160 return false; 160 return false;
@@ -172,7 +172,7 @@ static bool retransmits_timed_out(struct sock *sk,
172 timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + 172 timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
173 (boundary - linear_backoff_thresh) * TCP_RTO_MAX; 173 (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
174 } 174 }
175 return (tcp_time_stamp - start_ts) >= timeout; 175 return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= jiffies_to_msecs(timeout);
176} 176}
177 177
178/* A write timeout has occurred. Process the after effects. */ 178/* A write timeout has occurred. Process the after effects. */
@@ -341,7 +341,7 @@ static void tcp_probe_timer(struct sock *sk)
341 if (!start_ts) 341 if (!start_ts)
342 tcp_send_head(sk)->skb_mstamp = tp->tcp_mstamp; 342 tcp_send_head(sk)->skb_mstamp = tp->tcp_mstamp;
343 else if (icsk->icsk_user_timeout && 343 else if (icsk->icsk_user_timeout &&
344 (s32)(tcp_time_stamp - start_ts) > icsk->icsk_user_timeout) 344 (s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout)
345 goto abort; 345 goto abort;
346 346
347 max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2; 347 max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
@@ -561,7 +561,7 @@ void tcp_write_timer_handler(struct sock *sk)
561 goto out; 561 goto out;
562 } 562 }
563 563
564 skb_mstamp_get(&tcp_sk(sk)->tcp_mstamp); 564 tcp_mstamp_refresh(tcp_sk(sk));
565 event = icsk->icsk_pending; 565 event = icsk->icsk_pending;
566 566
567 switch (event) { 567 switch (event) {