aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-02-26 17:02:48 -0500
committerDavid S. Miller <davem@davemloft.net>2014-02-26 17:08:40 -0500
commit740b0f1841f6e39085b711d41db9ffb07198682b (patch)
tree7befd549fc20c51bff4c79790ad4520fcc0e324e /include
parent363ec392352e55c61ce2799c3f15f89f9429bba7 (diff)
tcp: switch rtt estimations to usec resolution
Upcoming congestion controls for TCP require usec resolution for RTT estimations. Millisecond resolution is simply not enough these days. FQ/pacing in DC environments also require this change for finer control and removal of bimodal behavior due to the current hack in tcp_update_pacing_rate() for 'small rtt' TCP_CONG_RTT_STAMP is no longer needed. As Julian Anastasov pointed out, we need to keep user compatibility : tcp_metrics used to export RTT and RTTVAR in msec resolution, so we added RTT_US and RTTVAR_US. An iproute2 patch is needed to use the new attributes if provided by the kernel. In this example ss command displays a srtt of 32 usecs (10Gbit link) lpk51:~# ./ss -i dst lpk52 Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port tcp ESTAB 0 1 10.246.11.51:42959 10.246.11.52:64614 cubic wscale:6,6 rto:201 rtt:0.032/0.001 ato:40 mss:1448 cwnd:10 send 3620.0Mbps pacing_rate 7240.0Mbps unacked:1 rcv_rtt:993 rcv_space:29559 Updated iproute2 ip command displays : lpk51:~# ./ip tcp_metrics | grep 10.246.11.52 10.246.11.52 age 561.914sec cwnd 10 rtt 274us rttvar 213us source 10.246.11.51 Old binary displays : lpk51:~# ip tcp_metrics | grep 10.246.11.52 10.246.11.52 age 561.914sec cwnd 10 rtt 250us rttvar 125us source 10.246.11.51 With help from Julian Anastasov, Stephen Hemminger and Yuchung Cheng Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Cc: Stephen Hemminger <stephen@networkplumber.org> Cc: Yuchung Cheng <ycheng@google.com> Cc: Larry Brakmo <brakmo@google.com> Cc: Julian Anastasov <ja@ssi.bg> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/tcp.h8
-rw-r--r--include/net/tcp.h10
-rw-r--r--include/uapi/linux/tcp_metrics.h7
3 files changed, 16 insertions, 9 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4ad0706d40eb..239946868142 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -201,10 +201,10 @@ struct tcp_sock {
201 u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ 201 u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */
202 202
203/* RTT measurement */ 203/* RTT measurement */
204 u32 srtt; /* smoothed round trip time << 3 */ 204 u32 srtt_us; /* smoothed round trip time << 3 in usecs */
205 u32 mdev; /* medium deviation */ 205 u32 mdev_us; /* medium deviation */
206 u32 mdev_max; /* maximal mdev for the last rtt period */ 206 u32 mdev_max_us; /* maximal mdev for the last rtt period */
207 u32 rttvar; /* smoothed mdev_max */ 207 u32 rttvar_us; /* smoothed mdev_max */
208 u32 rtt_seq; /* sequence number to update rttvar */ 208 u32 rtt_seq; /* sequence number to update rttvar */
209 209
210 u32 packets_out; /* Packets which are "in flight" */ 210 u32 packets_out; /* Packets which are "in flight" */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1f820537741a..93eab0b9da60 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -31,6 +31,7 @@
31#include <linux/crypto.h> 31#include <linux/crypto.h>
32#include <linux/cryptohash.h> 32#include <linux/cryptohash.h>
33#include <linux/kref.h> 33#include <linux/kref.h>
34#include <linux/ktime.h>
34 35
35#include <net/inet_connection_sock.h> 36#include <net/inet_connection_sock.h>
36#include <net/inet_timewait_sock.h> 37#include <net/inet_timewait_sock.h>
@@ -478,7 +479,6 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
478struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 479struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
479 struct ip_options *opt); 480 struct ip_options *opt);
480#ifdef CONFIG_SYN_COOKIES 481#ifdef CONFIG_SYN_COOKIES
481#include <linux/ktime.h>
482 482
483/* Syncookies use a monotonic timer which increments every 64 seconds. 483/* Syncookies use a monotonic timer which increments every 64 seconds.
484 * This counter is used both as a hash input and partially encoded into 484 * This counter is used both as a hash input and partially encoded into
@@ -619,7 +619,7 @@ static inline void tcp_bound_rto(const struct sock *sk)
619 619
620static inline u32 __tcp_set_rto(const struct tcp_sock *tp) 620static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
621{ 621{
622 return (tp->srtt >> 3) + tp->rttvar; 622 return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us);
623} 623}
624 624
625static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) 625static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
@@ -656,6 +656,11 @@ static inline u32 tcp_rto_min(struct sock *sk)
656 return rto_min; 656 return rto_min;
657} 657}
658 658
659static inline u32 tcp_rto_min_us(struct sock *sk)
660{
661 return jiffies_to_usecs(tcp_rto_min(sk));
662}
663
659/* Compute the actual receive window we are currently advertising. 664/* Compute the actual receive window we are currently advertising.
660 * Rcv_nxt can be after the window if our peer push more data 665 * Rcv_nxt can be after the window if our peer push more data
661 * than the offered window. 666 * than the offered window.
@@ -778,7 +783,6 @@ enum tcp_ca_event {
778#define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX) 783#define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX)
779 784
780#define TCP_CONG_NON_RESTRICTED 0x1 785#define TCP_CONG_NON_RESTRICTED 0x1
781#define TCP_CONG_RTT_STAMP 0x2
782 786
783struct tcp_congestion_ops { 787struct tcp_congestion_ops {
784 struct list_head list; 788 struct list_head list;
diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h
index 54a37b13f2c4..93533926035c 100644
--- a/include/uapi/linux/tcp_metrics.h
+++ b/include/uapi/linux/tcp_metrics.h
@@ -11,12 +11,15 @@
11#define TCP_METRICS_GENL_VERSION 0x1 11#define TCP_METRICS_GENL_VERSION 0x1
12 12
13enum tcp_metric_index { 13enum tcp_metric_index {
14 TCP_METRIC_RTT, 14 TCP_METRIC_RTT, /* in ms units */
15 TCP_METRIC_RTTVAR, 15 TCP_METRIC_RTTVAR, /* in ms units */
16 TCP_METRIC_SSTHRESH, 16 TCP_METRIC_SSTHRESH,
17 TCP_METRIC_CWND, 17 TCP_METRIC_CWND,
18 TCP_METRIC_REORDERING, 18 TCP_METRIC_REORDERING,
19 19
20 TCP_METRIC_RTT_US, /* in usec units */
21 TCP_METRIC_RTTVAR_US, /* in usec units */
22
20 /* Always last. */ 23 /* Always last. */
21 __TCP_METRIC_MAX, 24 __TCP_METRIC_MAX,
22}; 25};