diff options
author | Eric Dumazet <edumazet@google.com> | 2014-02-26 17:02:48 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-02-26 17:08:40 -0500 |
commit | 740b0f1841f6e39085b711d41db9ffb07198682b (patch) | |
tree | 7befd549fc20c51bff4c79790ad4520fcc0e324e | |
parent | 363ec392352e55c61ce2799c3f15f89f9429bba7 (diff) |
tcp: switch rtt estimations to usec resolution
Upcoming congestion controls for TCP require usec resolution for RTT
estimations. Millisecond resolution is simply not enough these days.
FQ/pacing in DC environments also require this change for finer control
and removal of bimodal behavior due to the current hack in
tcp_update_pacing_rate() for 'small rtt'
TCP_CONG_RTT_STAMP is no longer needed.
As Julian Anastasov pointed out, we need to keep user compatibility :
tcp_metrics used to export RTT and RTTVAR in msec resolution,
so we added RTT_US and RTTVAR_US. An iproute2 patch is needed
to use the new attributes if provided by the kernel.
In this example ss command displays a srtt of 32 usecs (10Gbit link)
lpk51:~# ./ss -i dst lpk52
Netid State Recv-Q Send-Q Local Address:Port Peer
Address:Port
tcp ESTAB 0 1 10.246.11.51:42959
10.246.11.52:64614
cubic wscale:6,6 rto:201 rtt:0.032/0.001 ato:40 mss:1448
cwnd:10 send
3620.0Mbps pacing_rate 7240.0Mbps unacked:1 rcv_rtt:993 rcv_space:29559
Updated iproute2 ip command displays :
lpk51:~# ./ip tcp_metrics | grep 10.246.11.52
10.246.11.52 age 561.914sec cwnd 10 rtt 274us rttvar 213us source
10.246.11.51
Old binary displays :
lpk51:~# ip tcp_metrics | grep 10.246.11.52
10.246.11.52 age 561.914sec cwnd 10 rtt 250us rttvar 125us source
10.246.11.51
With help from Julian Anastasov, Stephen Hemminger and Yuchung Cheng
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Larry Brakmo <brakmo@google.com>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/tcp.h | 8 | ||||
-rw-r--r-- | include/net/tcp.h | 10 | ||||
-rw-r--r-- | include/uapi/linux/tcp_metrics.h | 7 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 8 | ||||
-rw-r--r-- | net/ipv4/tcp_cubic.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_hybla.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp_illinois.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 183 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_lp.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_metrics.c | 83 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 15 | ||||
-rw-r--r-- | net/ipv4/tcp_probe.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_vegas.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_veno.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_yeah.c | 1 |
17 files changed, 174 insertions, 169 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4ad0706d40eb..239946868142 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h | |||
@@ -201,10 +201,10 @@ struct tcp_sock { | |||
201 | u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ | 201 | u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ |
202 | 202 | ||
203 | /* RTT measurement */ | 203 | /* RTT measurement */ |
204 | u32 srtt; /* smoothed round trip time << 3 */ | 204 | u32 srtt_us; /* smoothed round trip time << 3 in usecs */ |
205 | u32 mdev; /* medium deviation */ | 205 | u32 mdev_us; /* medium deviation */ |
206 | u32 mdev_max; /* maximal mdev for the last rtt period */ | 206 | u32 mdev_max_us; /* maximal mdev for the last rtt period */ |
207 | u32 rttvar; /* smoothed mdev_max */ | 207 | u32 rttvar_us; /* smoothed mdev_max */ |
208 | u32 rtt_seq; /* sequence number to update rttvar */ | 208 | u32 rtt_seq; /* sequence number to update rttvar */ |
209 | 209 | ||
210 | u32 packets_out; /* Packets which are "in flight" */ | 210 | u32 packets_out; /* Packets which are "in flight" */ |
diff --git a/include/net/tcp.h b/include/net/tcp.h index 1f820537741a..93eab0b9da60 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/crypto.h> | 31 | #include <linux/crypto.h> |
32 | #include <linux/cryptohash.h> | 32 | #include <linux/cryptohash.h> |
33 | #include <linux/kref.h> | 33 | #include <linux/kref.h> |
34 | #include <linux/ktime.h> | ||
34 | 35 | ||
35 | #include <net/inet_connection_sock.h> | 36 | #include <net/inet_connection_sock.h> |
36 | #include <net/inet_timewait_sock.h> | 37 | #include <net/inet_timewait_sock.h> |
@@ -478,7 +479,6 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, | |||
478 | struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | 479 | struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, |
479 | struct ip_options *opt); | 480 | struct ip_options *opt); |
480 | #ifdef CONFIG_SYN_COOKIES | 481 | #ifdef CONFIG_SYN_COOKIES |
481 | #include <linux/ktime.h> | ||
482 | 482 | ||
483 | /* Syncookies use a monotonic timer which increments every 64 seconds. | 483 | /* Syncookies use a monotonic timer which increments every 64 seconds. |
484 | * This counter is used both as a hash input and partially encoded into | 484 | * This counter is used both as a hash input and partially encoded into |
@@ -619,7 +619,7 @@ static inline void tcp_bound_rto(const struct sock *sk) | |||
619 | 619 | ||
620 | static inline u32 __tcp_set_rto(const struct tcp_sock *tp) | 620 | static inline u32 __tcp_set_rto(const struct tcp_sock *tp) |
621 | { | 621 | { |
622 | return (tp->srtt >> 3) + tp->rttvar; | 622 | return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); |
623 | } | 623 | } |
624 | 624 | ||
625 | static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) | 625 | static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) |
@@ -656,6 +656,11 @@ static inline u32 tcp_rto_min(struct sock *sk) | |||
656 | return rto_min; | 656 | return rto_min; |
657 | } | 657 | } |
658 | 658 | ||
659 | static inline u32 tcp_rto_min_us(struct sock *sk) | ||
660 | { | ||
661 | return jiffies_to_usecs(tcp_rto_min(sk)); | ||
662 | } | ||
663 | |||
659 | /* Compute the actual receive window we are currently advertising. | 664 | /* Compute the actual receive window we are currently advertising. |
660 | * Rcv_nxt can be after the window if our peer push more data | 665 | * Rcv_nxt can be after the window if our peer push more data |
661 | * than the offered window. | 666 | * than the offered window. |
@@ -778,7 +783,6 @@ enum tcp_ca_event { | |||
778 | #define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX) | 783 | #define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX) |
779 | 784 | ||
780 | #define TCP_CONG_NON_RESTRICTED 0x1 | 785 | #define TCP_CONG_NON_RESTRICTED 0x1 |
781 | #define TCP_CONG_RTT_STAMP 0x2 | ||
782 | 786 | ||
783 | struct tcp_congestion_ops { | 787 | struct tcp_congestion_ops { |
784 | struct list_head list; | 788 | struct list_head list; |
diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h index 54a37b13f2c4..93533926035c 100644 --- a/include/uapi/linux/tcp_metrics.h +++ b/include/uapi/linux/tcp_metrics.h | |||
@@ -11,12 +11,15 @@ | |||
11 | #define TCP_METRICS_GENL_VERSION 0x1 | 11 | #define TCP_METRICS_GENL_VERSION 0x1 |
12 | 12 | ||
13 | enum tcp_metric_index { | 13 | enum tcp_metric_index { |
14 | TCP_METRIC_RTT, | 14 | TCP_METRIC_RTT, /* in ms units */ |
15 | TCP_METRIC_RTTVAR, | 15 | TCP_METRIC_RTTVAR, /* in ms units */ |
16 | TCP_METRIC_SSTHRESH, | 16 | TCP_METRIC_SSTHRESH, |
17 | TCP_METRIC_CWND, | 17 | TCP_METRIC_CWND, |
18 | TCP_METRIC_REORDERING, | 18 | TCP_METRIC_REORDERING, |
19 | 19 | ||
20 | TCP_METRIC_RTT_US, /* in usec units */ | ||
21 | TCP_METRIC_RTTVAR_US, /* in usec units */ | ||
22 | |||
20 | /* Always last. */ | 23 | /* Always last. */ |
21 | __TCP_METRIC_MAX, | 24 | __TCP_METRIC_MAX, |
22 | }; | 25 | }; |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bed379c7abcd..7374905b3701 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -387,7 +387,7 @@ void tcp_init_sock(struct sock *sk) | |||
387 | INIT_LIST_HEAD(&tp->tsq_node); | 387 | INIT_LIST_HEAD(&tp->tsq_node); |
388 | 388 | ||
389 | icsk->icsk_rto = TCP_TIMEOUT_INIT; | 389 | icsk->icsk_rto = TCP_TIMEOUT_INIT; |
390 | tp->mdev = TCP_TIMEOUT_INIT; | 390 | tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); |
391 | 391 | ||
392 | /* So many TCP implementations out there (incorrectly) count the | 392 | /* So many TCP implementations out there (incorrectly) count the |
393 | * initial SYN frame in their delayed-ACK and congestion control | 393 | * initial SYN frame in their delayed-ACK and congestion control |
@@ -2339,7 +2339,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
2339 | 2339 | ||
2340 | sk->sk_shutdown = 0; | 2340 | sk->sk_shutdown = 0; |
2341 | sock_reset_flag(sk, SOCK_DONE); | 2341 | sock_reset_flag(sk, SOCK_DONE); |
2342 | tp->srtt = 0; | 2342 | tp->srtt_us = 0; |
2343 | if ((tp->write_seq += tp->max_window + 2) == 0) | 2343 | if ((tp->write_seq += tp->max_window + 2) == 0) |
2344 | tp->write_seq = 1; | 2344 | tp->write_seq = 1; |
2345 | icsk->icsk_backoff = 0; | 2345 | icsk->icsk_backoff = 0; |
@@ -2783,8 +2783,8 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info) | |||
2783 | 2783 | ||
2784 | info->tcpi_pmtu = icsk->icsk_pmtu_cookie; | 2784 | info->tcpi_pmtu = icsk->icsk_pmtu_cookie; |
2785 | info->tcpi_rcv_ssthresh = tp->rcv_ssthresh; | 2785 | info->tcpi_rcv_ssthresh = tp->rcv_ssthresh; |
2786 | info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3; | 2786 | info->tcpi_rtt = tp->srtt_us >> 3; |
2787 | info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2; | 2787 | info->tcpi_rttvar = tp->mdev_us >> 2; |
2788 | info->tcpi_snd_ssthresh = tp->snd_ssthresh; | 2788 | info->tcpi_snd_ssthresh = tp->snd_ssthresh; |
2789 | info->tcpi_snd_cwnd = tp->snd_cwnd; | 2789 | info->tcpi_snd_cwnd = tp->snd_cwnd; |
2790 | info->tcpi_advmss = tp->advmss; | 2790 | info->tcpi_advmss = tp->advmss; |
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 828e4c3ffbaf..8bf224516ba2 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
@@ -476,10 +476,6 @@ static int __init cubictcp_register(void) | |||
476 | /* divide by bic_scale and by constant Srtt (100ms) */ | 476 | /* divide by bic_scale and by constant Srtt (100ms) */ |
477 | do_div(cube_factor, bic_scale * 10); | 477 | do_div(cube_factor, bic_scale * 10); |
478 | 478 | ||
479 | /* hystart needs ms clock resolution */ | ||
480 | if (hystart && HZ < 1000) | ||
481 | cubictcp.flags |= TCP_CONG_RTT_STAMP; | ||
482 | |||
483 | return tcp_register_congestion_control(&cubictcp); | 479 | return tcp_register_congestion_control(&cubictcp); |
484 | } | 480 | } |
485 | 481 | ||
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 2a1a9e2a4e51..a15a799bf768 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c | |||
@@ -21,7 +21,7 @@ struct hybla { | |||
21 | u32 rho2; /* Rho * Rho, integer part */ | 21 | u32 rho2; /* Rho * Rho, integer part */ |
22 | u32 rho_3ls; /* Rho parameter, <<3 */ | 22 | u32 rho_3ls; /* Rho parameter, <<3 */ |
23 | u32 rho2_7ls; /* Rho^2, <<7 */ | 23 | u32 rho2_7ls; /* Rho^2, <<7 */ |
24 | u32 minrtt; /* Minimum smoothed round trip time value seen */ | 24 | u32 minrtt_us; /* Minimum smoothed round trip time value seen */ |
25 | }; | 25 | }; |
26 | 26 | ||
27 | /* Hybla reference round trip time (default= 1/40 sec = 25 ms), in ms */ | 27 | /* Hybla reference round trip time (default= 1/40 sec = 25 ms), in ms */ |
@@ -35,7 +35,9 @@ static inline void hybla_recalc_param (struct sock *sk) | |||
35 | { | 35 | { |
36 | struct hybla *ca = inet_csk_ca(sk); | 36 | struct hybla *ca = inet_csk_ca(sk); |
37 | 37 | ||
38 | ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8); | 38 | ca->rho_3ls = max_t(u32, |
39 | tcp_sk(sk)->srtt_us / (rtt0 * USEC_PER_MSEC), | ||
40 | 8U); | ||
39 | ca->rho = ca->rho_3ls >> 3; | 41 | ca->rho = ca->rho_3ls >> 3; |
40 | ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1; | 42 | ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1; |
41 | ca->rho2 = ca->rho2_7ls >> 7; | 43 | ca->rho2 = ca->rho2_7ls >> 7; |
@@ -59,7 +61,7 @@ static void hybla_init(struct sock *sk) | |||
59 | hybla_recalc_param(sk); | 61 | hybla_recalc_param(sk); |
60 | 62 | ||
61 | /* set minimum rtt as this is the 1st ever seen */ | 63 | /* set minimum rtt as this is the 1st ever seen */ |
62 | ca->minrtt = tp->srtt; | 64 | ca->minrtt_us = tp->srtt_us; |
63 | tp->snd_cwnd = ca->rho; | 65 | tp->snd_cwnd = ca->rho; |
64 | } | 66 | } |
65 | 67 | ||
@@ -94,9 +96,9 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked, | |||
94 | int is_slowstart = 0; | 96 | int is_slowstart = 0; |
95 | 97 | ||
96 | /* Recalculate rho only if this srtt is the lowest */ | 98 | /* Recalculate rho only if this srtt is the lowest */ |
97 | if (tp->srtt < ca->minrtt){ | 99 | if (tp->srtt_us < ca->minrtt_us) { |
98 | hybla_recalc_param(sk); | 100 | hybla_recalc_param(sk); |
99 | ca->minrtt = tp->srtt; | 101 | ca->minrtt_us = tp->srtt_us; |
100 | } | 102 | } |
101 | 103 | ||
102 | if (!tcp_is_cwnd_limited(sk, in_flight)) | 104 | if (!tcp_is_cwnd_limited(sk, in_flight)) |
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index be047c63ca10..863d105e3015 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c | |||
@@ -325,7 +325,6 @@ static void tcp_illinois_info(struct sock *sk, u32 ext, | |||
325 | } | 325 | } |
326 | 326 | ||
327 | static struct tcp_congestion_ops tcp_illinois __read_mostly = { | 327 | static struct tcp_congestion_ops tcp_illinois __read_mostly = { |
328 | .flags = TCP_CONG_RTT_STAMP, | ||
329 | .init = tcp_illinois_init, | 328 | .init = tcp_illinois_init, |
330 | .ssthresh = tcp_illinois_ssthresh, | 329 | .ssthresh = tcp_illinois_ssthresh, |
331 | .cong_avoid = tcp_illinois_cong_avoid, | 330 | .cong_avoid = tcp_illinois_cong_avoid, |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 227cba79fa6b..23a41d978fad 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -667,11 +667,11 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) | |||
667 | * To save cycles in the RFC 1323 implementation it was better to break | 667 | * To save cycles in the RFC 1323 implementation it was better to break |
668 | * it up into three procedures. -- erics | 668 | * it up into three procedures. -- erics |
669 | */ | 669 | */ |
670 | static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) | 670 | static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) |
671 | { | 671 | { |
672 | struct tcp_sock *tp = tcp_sk(sk); | 672 | struct tcp_sock *tp = tcp_sk(sk); |
673 | long m = mrtt; /* RTT */ | 673 | long m = mrtt_us; /* RTT */ |
674 | u32 srtt = tp->srtt; | 674 | u32 srtt = tp->srtt_us; |
675 | 675 | ||
676 | /* The following amusing code comes from Jacobson's | 676 | /* The following amusing code comes from Jacobson's |
677 | * article in SIGCOMM '88. Note that rtt and mdev | 677 | * article in SIGCOMM '88. Note that rtt and mdev |
@@ -694,7 +694,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) | |||
694 | srtt += m; /* rtt = 7/8 rtt + 1/8 new */ | 694 | srtt += m; /* rtt = 7/8 rtt + 1/8 new */ |
695 | if (m < 0) { | 695 | if (m < 0) { |
696 | m = -m; /* m is now abs(error) */ | 696 | m = -m; /* m is now abs(error) */ |
697 | m -= (tp->mdev >> 2); /* similar update on mdev */ | 697 | m -= (tp->mdev_us >> 2); /* similar update on mdev */ |
698 | /* This is similar to one of Eifel findings. | 698 | /* This is similar to one of Eifel findings. |
699 | * Eifel blocks mdev updates when rtt decreases. | 699 | * Eifel blocks mdev updates when rtt decreases. |
700 | * This solution is a bit different: we use finer gain | 700 | * This solution is a bit different: we use finer gain |
@@ -706,28 +706,29 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) | |||
706 | if (m > 0) | 706 | if (m > 0) |
707 | m >>= 3; | 707 | m >>= 3; |
708 | } else { | 708 | } else { |
709 | m -= (tp->mdev >> 2); /* similar update on mdev */ | 709 | m -= (tp->mdev_us >> 2); /* similar update on mdev */ |
710 | } | 710 | } |
711 | tp->mdev += m; /* mdev = 3/4 mdev + 1/4 new */ | 711 | tp->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ |
712 | if (tp->mdev > tp->mdev_max) { | 712 | if (tp->mdev_us > tp->mdev_max_us) { |
713 | tp->mdev_max = tp->mdev; | 713 | tp->mdev_max_us = tp->mdev_us; |
714 | if (tp->mdev_max > tp->rttvar) | 714 | if (tp->mdev_max_us > tp->rttvar_us) |
715 | tp->rttvar = tp->mdev_max; | 715 | tp->rttvar_us = tp->mdev_max_us; |
716 | } | 716 | } |
717 | if (after(tp->snd_una, tp->rtt_seq)) { | 717 | if (after(tp->snd_una, tp->rtt_seq)) { |
718 | if (tp->mdev_max < tp->rttvar) | 718 | if (tp->mdev_max_us < tp->rttvar_us) |
719 | tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2; | 719 | tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2; |
720 | tp->rtt_seq = tp->snd_nxt; | 720 | tp->rtt_seq = tp->snd_nxt; |
721 | tp->mdev_max = tcp_rto_min(sk); | 721 | tp->mdev_max_us = tcp_rto_min_us(sk); |
722 | } | 722 | } |
723 | } else { | 723 | } else { |
724 | /* no previous measure. */ | 724 | /* no previous measure. */ |
725 | srtt = m << 3; /* take the measured time to be rtt */ | 725 | srtt = m << 3; /* take the measured time to be rtt */ |
726 | tp->mdev = m << 1; /* make sure rto = 3*rtt */ | 726 | tp->mdev_us = m << 1; /* make sure rto = 3*rtt */ |
727 | tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); | 727 | tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk)); |
728 | tp->mdev_max_us = tp->rttvar_us; | ||
728 | tp->rtt_seq = tp->snd_nxt; | 729 | tp->rtt_seq = tp->snd_nxt; |
729 | } | 730 | } |
730 | tp->srtt = max(1U, srtt); | 731 | tp->srtt_us = max(1U, srtt); |
731 | } | 732 | } |
732 | 733 | ||
733 | /* Set the sk_pacing_rate to allow proper sizing of TSO packets. | 734 | /* Set the sk_pacing_rate to allow proper sizing of TSO packets. |
@@ -742,20 +743,12 @@ static void tcp_update_pacing_rate(struct sock *sk) | |||
742 | u64 rate; | 743 | u64 rate; |
743 | 744 | ||
744 | /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ | 745 | /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ |
745 | rate = (u64)tp->mss_cache * 2 * (HZ << 3); | 746 | rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3); |
746 | 747 | ||
747 | rate *= max(tp->snd_cwnd, tp->packets_out); | 748 | rate *= max(tp->snd_cwnd, tp->packets_out); |
748 | 749 | ||
749 | /* Correction for small srtt and scheduling constraints. | 750 | if (likely(tp->srtt_us)) |
750 | * For small rtt, consider noise is too high, and use | 751 | do_div(rate, tp->srtt_us); |
751 | * the minimal value (srtt = 1 -> 125 us for HZ=1000) | ||
752 | * | ||
753 | * We probably need usec resolution in the future. | ||
754 | * Note: This also takes care of possible srtt=0 case, | ||
755 | * when tcp_rtt_estimator() was not yet called. | ||
756 | */ | ||
757 | if (tp->srtt > 8 + 2) | ||
758 | do_div(rate, tp->srtt); | ||
759 | 752 | ||
760 | /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate | 753 | /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate |
761 | * without any lock. We want to make sure compiler wont store | 754 | * without any lock. We want to make sure compiler wont store |
@@ -1122,10 +1115,10 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, | |||
1122 | } | 1115 | } |
1123 | 1116 | ||
1124 | struct tcp_sacktag_state { | 1117 | struct tcp_sacktag_state { |
1125 | int reord; | 1118 | int reord; |
1126 | int fack_count; | 1119 | int fack_count; |
1127 | int flag; | 1120 | long rtt_us; /* RTT measured by SACKing never-retransmitted data */ |
1128 | s32 rtt; /* RTT measured by SACKing never-retransmitted data */ | 1121 | int flag; |
1129 | }; | 1122 | }; |
1130 | 1123 | ||
1131 | /* Check if skb is fully within the SACK block. In presence of GSO skbs, | 1124 | /* Check if skb is fully within the SACK block. In presence of GSO skbs, |
@@ -1186,7 +1179,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, | |||
1186 | static u8 tcp_sacktag_one(struct sock *sk, | 1179 | static u8 tcp_sacktag_one(struct sock *sk, |
1187 | struct tcp_sacktag_state *state, u8 sacked, | 1180 | struct tcp_sacktag_state *state, u8 sacked, |
1188 | u32 start_seq, u32 end_seq, | 1181 | u32 start_seq, u32 end_seq, |
1189 | int dup_sack, int pcount, u32 xmit_time) | 1182 | int dup_sack, int pcount, |
1183 | const struct skb_mstamp *xmit_time) | ||
1190 | { | 1184 | { |
1191 | struct tcp_sock *tp = tcp_sk(sk); | 1185 | struct tcp_sock *tp = tcp_sk(sk); |
1192 | int fack_count = state->fack_count; | 1186 | int fack_count = state->fack_count; |
@@ -1227,8 +1221,13 @@ static u8 tcp_sacktag_one(struct sock *sk, | |||
1227 | if (!after(end_seq, tp->high_seq)) | 1221 | if (!after(end_seq, tp->high_seq)) |
1228 | state->flag |= FLAG_ORIG_SACK_ACKED; | 1222 | state->flag |= FLAG_ORIG_SACK_ACKED; |
1229 | /* Pick the earliest sequence sacked for RTT */ | 1223 | /* Pick the earliest sequence sacked for RTT */ |
1230 | if (state->rtt < 0) | 1224 | if (state->rtt_us < 0) { |
1231 | state->rtt = tcp_time_stamp - xmit_time; | 1225 | struct skb_mstamp now; |
1226 | |||
1227 | skb_mstamp_get(&now); | ||
1228 | state->rtt_us = skb_mstamp_us_delta(&now, | ||
1229 | xmit_time); | ||
1230 | } | ||
1232 | } | 1231 | } |
1233 | 1232 | ||
1234 | if (sacked & TCPCB_LOST) { | 1233 | if (sacked & TCPCB_LOST) { |
@@ -1287,7 +1286,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | |||
1287 | */ | 1286 | */ |
1288 | tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, | 1287 | tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, |
1289 | start_seq, end_seq, dup_sack, pcount, | 1288 | start_seq, end_seq, dup_sack, pcount, |
1290 | TCP_SKB_CB(skb)->when); | 1289 | &skb->skb_mstamp); |
1291 | 1290 | ||
1292 | if (skb == tp->lost_skb_hint) | 1291 | if (skb == tp->lost_skb_hint) |
1293 | tp->lost_cnt_hint += pcount; | 1292 | tp->lost_cnt_hint += pcount; |
@@ -1565,7 +1564,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, | |||
1565 | TCP_SKB_CB(skb)->end_seq, | 1564 | TCP_SKB_CB(skb)->end_seq, |
1566 | dup_sack, | 1565 | dup_sack, |
1567 | tcp_skb_pcount(skb), | 1566 | tcp_skb_pcount(skb), |
1568 | TCP_SKB_CB(skb)->when); | 1567 | &skb->skb_mstamp); |
1569 | 1568 | ||
1570 | if (!before(TCP_SKB_CB(skb)->seq, | 1569 | if (!before(TCP_SKB_CB(skb)->seq, |
1571 | tcp_highest_sack_seq(tp))) | 1570 | tcp_highest_sack_seq(tp))) |
@@ -1622,7 +1621,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl | |||
1622 | 1621 | ||
1623 | static int | 1622 | static int |
1624 | tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | 1623 | tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, |
1625 | u32 prior_snd_una, s32 *sack_rtt) | 1624 | u32 prior_snd_una, long *sack_rtt_us) |
1626 | { | 1625 | { |
1627 | struct tcp_sock *tp = tcp_sk(sk); | 1626 | struct tcp_sock *tp = tcp_sk(sk); |
1628 | const unsigned char *ptr = (skb_transport_header(ack_skb) + | 1627 | const unsigned char *ptr = (skb_transport_header(ack_skb) + |
@@ -1640,7 +1639,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, | |||
1640 | 1639 | ||
1641 | state.flag = 0; | 1640 | state.flag = 0; |
1642 | state.reord = tp->packets_out; | 1641 | state.reord = tp->packets_out; |
1643 | state.rtt = -1; | 1642 | state.rtt_us = -1L; |
1644 | 1643 | ||
1645 | if (!tp->sacked_out) { | 1644 | if (!tp->sacked_out) { |
1646 | if (WARN_ON(tp->fackets_out)) | 1645 | if (WARN_ON(tp->fackets_out)) |
@@ -1824,7 +1823,7 @@ out: | |||
1824 | WARN_ON((int)tp->retrans_out < 0); | 1823 | WARN_ON((int)tp->retrans_out < 0); |
1825 | WARN_ON((int)tcp_packets_in_flight(tp) < 0); | 1824 | WARN_ON((int)tcp_packets_in_flight(tp) < 0); |
1826 | #endif | 1825 | #endif |
1827 | *sack_rtt = state.rtt; | 1826 | *sack_rtt_us = state.rtt_us; |
1828 | return state.flag; | 1827 | return state.flag; |
1829 | } | 1828 | } |
1830 | 1829 | ||
@@ -2034,10 +2033,12 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag) | |||
2034 | * available, or RTO is scheduled to fire first. | 2033 | * available, or RTO is scheduled to fire first. |
2035 | */ | 2034 | */ |
2036 | if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 || | 2035 | if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 || |
2037 | (flag & FLAG_ECE) || !tp->srtt) | 2036 | (flag & FLAG_ECE) || !tp->srtt_us) |
2038 | return false; | 2037 | return false; |
2039 | 2038 | ||
2040 | delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2)); | 2039 | delay = max(usecs_to_jiffies(tp->srtt_us >> 5), |
2040 | msecs_to_jiffies(2)); | ||
2041 | |||
2041 | if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) | 2042 | if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) |
2042 | return false; | 2043 | return false; |
2043 | 2044 | ||
@@ -2884,7 +2885,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, | |||
2884 | } | 2885 | } |
2885 | 2886 | ||
2886 | static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, | 2887 | static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, |
2887 | s32 seq_rtt, s32 sack_rtt) | 2888 | long seq_rtt_us, long sack_rtt_us) |
2888 | { | 2889 | { |
2889 | const struct tcp_sock *tp = tcp_sk(sk); | 2890 | const struct tcp_sock *tp = tcp_sk(sk); |
2890 | 2891 | ||
@@ -2894,10 +2895,10 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, | |||
2894 | * is acked (RFC6298). | 2895 | * is acked (RFC6298). |
2895 | */ | 2896 | */ |
2896 | if (flag & FLAG_RETRANS_DATA_ACKED) | 2897 | if (flag & FLAG_RETRANS_DATA_ACKED) |
2897 | seq_rtt = -1; | 2898 | seq_rtt_us = -1L; |
2898 | 2899 | ||
2899 | if (seq_rtt < 0) | 2900 | if (seq_rtt_us < 0) |
2900 | seq_rtt = sack_rtt; | 2901 | seq_rtt_us = sack_rtt_us; |
2901 | 2902 | ||
2902 | /* RTTM Rule: A TSecr value received in a segment is used to | 2903 | /* RTTM Rule: A TSecr value received in a segment is used to |
2903 | * update the averaged RTT measurement only if the segment | 2904 | * update the averaged RTT measurement only if the segment |
@@ -2905,14 +2906,14 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, | |||
2905 | * left edge of the send window. | 2906 | * left edge of the send window. |
2906 | * See draft-ietf-tcplw-high-performance-00, section 3.3. | 2907 | * See draft-ietf-tcplw-high-performance-00, section 3.3. |
2907 | */ | 2908 | */ |
2908 | if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && | 2909 | if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && |
2909 | flag & FLAG_ACKED) | 2910 | flag & FLAG_ACKED) |
2910 | seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; | 2911 | seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - tp->rx_opt.rcv_tsecr); |
2911 | 2912 | ||
2912 | if (seq_rtt < 0) | 2913 | if (seq_rtt_us < 0) |
2913 | return false; | 2914 | return false; |
2914 | 2915 | ||
2915 | tcp_rtt_estimator(sk, seq_rtt); | 2916 | tcp_rtt_estimator(sk, seq_rtt_us); |
2916 | tcp_set_rto(sk); | 2917 | tcp_set_rto(sk); |
2917 | 2918 | ||
2918 | /* RFC6298: only reset backoff on valid RTT measurement. */ | 2919 | /* RFC6298: only reset backoff on valid RTT measurement. */ |
@@ -2924,16 +2925,16 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, | |||
2924 | static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) | 2925 | static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) |
2925 | { | 2926 | { |
2926 | struct tcp_sock *tp = tcp_sk(sk); | 2927 | struct tcp_sock *tp = tcp_sk(sk); |
2927 | s32 seq_rtt = -1; | 2928 | long seq_rtt_us = -1L; |
2928 | 2929 | ||
2929 | if (synack_stamp && !tp->total_retrans) | 2930 | if (synack_stamp && !tp->total_retrans) |
2930 | seq_rtt = tcp_time_stamp - synack_stamp; | 2931 | seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - synack_stamp); |
2931 | 2932 | ||
2932 | /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets | 2933 | /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets |
2933 | * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack() | 2934 | * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack() |
2934 | */ | 2935 | */ |
2935 | if (!tp->srtt) | 2936 | if (!tp->srtt_us) |
2936 | tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); | 2937 | tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L); |
2937 | } | 2938 | } |
2938 | 2939 | ||
2939 | static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) | 2940 | static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) |
@@ -3022,26 +3023,27 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) | |||
3022 | * arrived at the other end. | 3023 | * arrived at the other end. |
3023 | */ | 3024 | */ |
3024 | static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | 3025 | static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, |
3025 | u32 prior_snd_una, s32 sack_rtt) | 3026 | u32 prior_snd_una, long sack_rtt_us) |
3026 | { | 3027 | { |
3027 | struct tcp_sock *tp = tcp_sk(sk); | ||
3028 | const struct inet_connection_sock *icsk = inet_csk(sk); | 3028 | const struct inet_connection_sock *icsk = inet_csk(sk); |
3029 | struct sk_buff *skb; | 3029 | struct skb_mstamp first_ackt, last_ackt, now; |
3030 | u32 now = tcp_time_stamp; | 3030 | struct tcp_sock *tp = tcp_sk(sk); |
3031 | u32 prior_sacked = tp->sacked_out; | ||
3032 | u32 reord = tp->packets_out; | ||
3031 | bool fully_acked = true; | 3033 | bool fully_acked = true; |
3032 | int flag = 0; | 3034 | long ca_seq_rtt_us = -1L; |
3035 | long seq_rtt_us = -1L; | ||
3036 | struct sk_buff *skb; | ||
3033 | u32 pkts_acked = 0; | 3037 | u32 pkts_acked = 0; |
3034 | u32 reord = tp->packets_out; | ||
3035 | u32 prior_sacked = tp->sacked_out; | ||
3036 | s32 seq_rtt = -1; | ||
3037 | s32 ca_seq_rtt = -1; | ||
3038 | ktime_t last_ackt = net_invalid_timestamp(); | ||
3039 | bool rtt_update; | 3038 | bool rtt_update; |
3039 | int flag = 0; | ||
3040 | |||
3041 | first_ackt.v64 = 0; | ||
3040 | 3042 | ||
3041 | while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { | 3043 | while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { |
3042 | struct tcp_skb_cb *scb = TCP_SKB_CB(skb); | 3044 | struct tcp_skb_cb *scb = TCP_SKB_CB(skb); |
3043 | u32 acked_pcount; | ||
3044 | u8 sacked = scb->sacked; | 3045 | u8 sacked = scb->sacked; |
3046 | u32 acked_pcount; | ||
3045 | 3047 | ||
3046 | /* Determine how many packets and what bytes were acked, tso and else */ | 3048 | /* Determine how many packets and what bytes were acked, tso and else */ |
3047 | if (after(scb->end_seq, tp->snd_una)) { | 3049 | if (after(scb->end_seq, tp->snd_una)) { |
@@ -3063,11 +3065,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3063 | tp->retrans_out -= acked_pcount; | 3065 | tp->retrans_out -= acked_pcount; |
3064 | flag |= FLAG_RETRANS_DATA_ACKED; | 3066 | flag |= FLAG_RETRANS_DATA_ACKED; |
3065 | } else { | 3067 | } else { |
3066 | ca_seq_rtt = now - scb->when; | 3068 | last_ackt = skb->skb_mstamp; |
3067 | last_ackt = skb->tstamp; | 3069 | if (!first_ackt.v64) |
3068 | if (seq_rtt < 0) { | 3070 | first_ackt = last_ackt; |
3069 | seq_rtt = ca_seq_rtt; | 3071 | |
3070 | } | ||
3071 | if (!(sacked & TCPCB_SACKED_ACKED)) | 3072 | if (!(sacked & TCPCB_SACKED_ACKED)) |
3072 | reord = min(pkts_acked, reord); | 3073 | reord = min(pkts_acked, reord); |
3073 | if (!after(scb->end_seq, tp->high_seq)) | 3074 | if (!after(scb->end_seq, tp->high_seq)) |
@@ -3113,7 +3114,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3113 | if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) | 3114 | if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) |
3114 | flag |= FLAG_SACK_RENEGING; | 3115 | flag |= FLAG_SACK_RENEGING; |
3115 | 3116 | ||
3116 | rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt); | 3117 | skb_mstamp_get(&now); |
3118 | if (first_ackt.v64) { | ||
3119 | seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); | ||
3120 | ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); | ||
3121 | } | ||
3122 | |||
3123 | rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us); | ||
3117 | 3124 | ||
3118 | if (flag & FLAG_ACKED) { | 3125 | if (flag & FLAG_ACKED) { |
3119 | const struct tcp_congestion_ops *ca_ops | 3126 | const struct tcp_congestion_ops *ca_ops |
@@ -3141,25 +3148,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3141 | 3148 | ||
3142 | tp->fackets_out -= min(pkts_acked, tp->fackets_out); | 3149 | tp->fackets_out -= min(pkts_acked, tp->fackets_out); |
3143 | 3150 | ||
3144 | if (ca_ops->pkts_acked) { | 3151 | if (ca_ops->pkts_acked) |
3145 | s32 rtt_us = -1; | 3152 | ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us); |
3146 | |||
3147 | /* Is the ACK triggering packet unambiguous? */ | ||
3148 | if (!(flag & FLAG_RETRANS_DATA_ACKED)) { | ||
3149 | /* High resolution needed and available? */ | ||
3150 | if (ca_ops->flags & TCP_CONG_RTT_STAMP && | ||
3151 | !ktime_equal(last_ackt, | ||
3152 | net_invalid_timestamp())) | ||
3153 | rtt_us = ktime_us_delta(ktime_get_real(), | ||
3154 | last_ackt); | ||
3155 | else if (ca_seq_rtt >= 0) | ||
3156 | rtt_us = jiffies_to_usecs(ca_seq_rtt); | ||
3157 | } | ||
3158 | 3153 | ||
3159 | ca_ops->pkts_acked(sk, pkts_acked, rtt_us); | 3154 | } else if (skb && rtt_update && sack_rtt_us >= 0 && |
3160 | } | 3155 | sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { |
3161 | } else if (skb && rtt_update && sack_rtt >= 0 && | ||
3162 | sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) { | ||
3163 | /* Do not re-arm RTO if the sack RTT is measured from data sent | 3156 | /* Do not re-arm RTO if the sack RTT is measured from data sent |
3164 | * after when the head was last (re)transmitted. Otherwise the | 3157 | * after when the head was last (re)transmitted. Otherwise the |
3165 | * timeout may continue to extend in loss recovery. | 3158 | * timeout may continue to extend in loss recovery. |
@@ -3369,12 +3362,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3369 | u32 ack_seq = TCP_SKB_CB(skb)->seq; | 3362 | u32 ack_seq = TCP_SKB_CB(skb)->seq; |
3370 | u32 ack = TCP_SKB_CB(skb)->ack_seq; | 3363 | u32 ack = TCP_SKB_CB(skb)->ack_seq; |
3371 | bool is_dupack = false; | 3364 | bool is_dupack = false; |
3372 | u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt; | 3365 | u32 prior_in_flight; |
3373 | u32 prior_fackets; | 3366 | u32 prior_fackets; |
3374 | int prior_packets = tp->packets_out; | 3367 | int prior_packets = tp->packets_out; |
3375 | const int prior_unsacked = tp->packets_out - tp->sacked_out; | 3368 | const int prior_unsacked = tp->packets_out - tp->sacked_out; |
3376 | int acked = 0; /* Number of packets newly acked */ | 3369 | int acked = 0; /* Number of packets newly acked */ |
3377 | s32 sack_rtt = -1; | 3370 | long sack_rtt_us = -1L; |
3378 | 3371 | ||
3379 | /* If the ack is older than previous acks | 3372 | /* If the ack is older than previous acks |
3380 | * then we can probably ignore it. | 3373 | * then we can probably ignore it. |
@@ -3432,7 +3425,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3432 | 3425 | ||
3433 | if (TCP_SKB_CB(skb)->sacked) | 3426 | if (TCP_SKB_CB(skb)->sacked) |
3434 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, | 3427 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, |
3435 | &sack_rtt); | 3428 | &sack_rtt_us); |
3436 | 3429 | ||
3437 | if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb))) | 3430 | if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb))) |
3438 | flag |= FLAG_ECE; | 3431 | flag |= FLAG_ECE; |
@@ -3451,7 +3444,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3451 | 3444 | ||
3452 | /* See if we can take anything off of the retransmit queue. */ | 3445 | /* See if we can take anything off of the retransmit queue. */ |
3453 | acked = tp->packets_out; | 3446 | acked = tp->packets_out; |
3454 | flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt); | 3447 | flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, |
3448 | sack_rtt_us); | ||
3455 | acked -= tp->packets_out; | 3449 | acked -= tp->packets_out; |
3456 | 3450 | ||
3457 | /* Advance cwnd if state allows */ | 3451 | /* Advance cwnd if state allows */ |
@@ -3474,8 +3468,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3474 | 3468 | ||
3475 | if (icsk->icsk_pending == ICSK_TIME_RETRANS) | 3469 | if (icsk->icsk_pending == ICSK_TIME_RETRANS) |
3476 | tcp_schedule_loss_probe(sk); | 3470 | tcp_schedule_loss_probe(sk); |
3477 | if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd) | 3471 | tcp_update_pacing_rate(sk); |
3478 | tcp_update_pacing_rate(sk); | ||
3479 | return 1; | 3472 | return 1; |
3480 | 3473 | ||
3481 | no_queue: | 3474 | no_queue: |
@@ -3504,7 +3497,7 @@ old_ack: | |||
3504 | */ | 3497 | */ |
3505 | if (TCP_SKB_CB(skb)->sacked) { | 3498 | if (TCP_SKB_CB(skb)->sacked) { |
3506 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, | 3499 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, |
3507 | &sack_rtt); | 3500 | &sack_rtt_us); |
3508 | tcp_fastretrans_alert(sk, acked, prior_unsacked, | 3501 | tcp_fastretrans_alert(sk, acked, prior_unsacked, |
3509 | is_dupack, flag); | 3502 | is_dupack, flag); |
3510 | } | 3503 | } |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3cf976510497..17c0fb172fba 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -435,7 +435,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
435 | break; | 435 | break; |
436 | 436 | ||
437 | icsk->icsk_backoff--; | 437 | icsk->icsk_backoff--; |
438 | inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) : | 438 | inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) : |
439 | TCP_TIMEOUT_INIT) << icsk->icsk_backoff; | 439 | TCP_TIMEOUT_INIT) << icsk->icsk_backoff; |
440 | tcp_bound_rto(sk); | 440 | tcp_bound_rto(sk); |
441 | 441 | ||
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index 503798f2fcd6..c9aecae31327 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c | |||
@@ -315,7 +315,6 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us) | |||
315 | } | 315 | } |
316 | 316 | ||
317 | static struct tcp_congestion_ops tcp_lp __read_mostly = { | 317 | static struct tcp_congestion_ops tcp_lp __read_mostly = { |
318 | .flags = TCP_CONG_RTT_STAMP, | ||
319 | .init = tcp_lp_init, | 318 | .init = tcp_lp_init, |
320 | .ssthresh = tcp_reno_ssthresh, | 319 | .ssthresh = tcp_reno_ssthresh, |
321 | .cong_avoid = tcp_lp_cong_avoid, | 320 | .cong_avoid = tcp_lp_cong_avoid, |
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index d547075d8300..dcaf72f10216 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c | |||
@@ -33,6 +33,11 @@ struct tcp_fastopen_metrics { | |||
33 | struct tcp_fastopen_cookie cookie; | 33 | struct tcp_fastopen_cookie cookie; |
34 | }; | 34 | }; |
35 | 35 | ||
36 | /* TCP_METRIC_MAX includes 2 extra fields for userspace compatibility | ||
37 | * Kernel only stores RTT and RTTVAR in usec resolution | ||
38 | */ | ||
39 | #define TCP_METRIC_MAX_KERNEL (TCP_METRIC_MAX - 2) | ||
40 | |||
36 | struct tcp_metrics_block { | 41 | struct tcp_metrics_block { |
37 | struct tcp_metrics_block __rcu *tcpm_next; | 42 | struct tcp_metrics_block __rcu *tcpm_next; |
38 | struct inetpeer_addr tcpm_saddr; | 43 | struct inetpeer_addr tcpm_saddr; |
@@ -41,7 +46,7 @@ struct tcp_metrics_block { | |||
41 | u32 tcpm_ts; | 46 | u32 tcpm_ts; |
42 | u32 tcpm_ts_stamp; | 47 | u32 tcpm_ts_stamp; |
43 | u32 tcpm_lock; | 48 | u32 tcpm_lock; |
44 | u32 tcpm_vals[TCP_METRIC_MAX + 1]; | 49 | u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1]; |
45 | struct tcp_fastopen_metrics tcpm_fastopen; | 50 | struct tcp_fastopen_metrics tcpm_fastopen; |
46 | 51 | ||
47 | struct rcu_head rcu_head; | 52 | struct rcu_head rcu_head; |
@@ -59,12 +64,6 @@ static u32 tcp_metric_get(struct tcp_metrics_block *tm, | |||
59 | return tm->tcpm_vals[idx]; | 64 | return tm->tcpm_vals[idx]; |
60 | } | 65 | } |
61 | 66 | ||
62 | static u32 tcp_metric_get_jiffies(struct tcp_metrics_block *tm, | ||
63 | enum tcp_metric_index idx) | ||
64 | { | ||
65 | return msecs_to_jiffies(tm->tcpm_vals[idx]); | ||
66 | } | ||
67 | |||
68 | static void tcp_metric_set(struct tcp_metrics_block *tm, | 67 | static void tcp_metric_set(struct tcp_metrics_block *tm, |
69 | enum tcp_metric_index idx, | 68 | enum tcp_metric_index idx, |
70 | u32 val) | 69 | u32 val) |
@@ -72,13 +71,6 @@ static void tcp_metric_set(struct tcp_metrics_block *tm, | |||
72 | tm->tcpm_vals[idx] = val; | 71 | tm->tcpm_vals[idx] = val; |
73 | } | 72 | } |
74 | 73 | ||
75 | static void tcp_metric_set_msecs(struct tcp_metrics_block *tm, | ||
76 | enum tcp_metric_index idx, | ||
77 | u32 val) | ||
78 | { | ||
79 | tm->tcpm_vals[idx] = jiffies_to_msecs(val); | ||
80 | } | ||
81 | |||
82 | static bool addr_same(const struct inetpeer_addr *a, | 74 | static bool addr_same(const struct inetpeer_addr *a, |
83 | const struct inetpeer_addr *b) | 75 | const struct inetpeer_addr *b) |
84 | { | 76 | { |
@@ -101,9 +93,11 @@ struct tcpm_hash_bucket { | |||
101 | 93 | ||
102 | static DEFINE_SPINLOCK(tcp_metrics_lock); | 94 | static DEFINE_SPINLOCK(tcp_metrics_lock); |
103 | 95 | ||
104 | static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst, | 96 | static void tcpm_suck_dst(struct tcp_metrics_block *tm, |
97 | const struct dst_entry *dst, | ||
105 | bool fastopen_clear) | 98 | bool fastopen_clear) |
106 | { | 99 | { |
100 | u32 msval; | ||
107 | u32 val; | 101 | u32 val; |
108 | 102 | ||
109 | tm->tcpm_stamp = jiffies; | 103 | tm->tcpm_stamp = jiffies; |
@@ -121,8 +115,11 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst, | |||
121 | val |= 1 << TCP_METRIC_REORDERING; | 115 | val |= 1 << TCP_METRIC_REORDERING; |
122 | tm->tcpm_lock = val; | 116 | tm->tcpm_lock = val; |
123 | 117 | ||
124 | tm->tcpm_vals[TCP_METRIC_RTT] = dst_metric_raw(dst, RTAX_RTT); | 118 | msval = dst_metric_raw(dst, RTAX_RTT); |
125 | tm->tcpm_vals[TCP_METRIC_RTTVAR] = dst_metric_raw(dst, RTAX_RTTVAR); | 119 | tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC; |
120 | |||
121 | msval = dst_metric_raw(dst, RTAX_RTTVAR); | ||
122 | tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC; | ||
126 | tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); | 123 | tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); |
127 | tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); | 124 | tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); |
128 | tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); | 125 | tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); |
@@ -384,7 +381,7 @@ void tcp_update_metrics(struct sock *sk) | |||
384 | dst_confirm(dst); | 381 | dst_confirm(dst); |
385 | 382 | ||
386 | rcu_read_lock(); | 383 | rcu_read_lock(); |
387 | if (icsk->icsk_backoff || !tp->srtt) { | 384 | if (icsk->icsk_backoff || !tp->srtt_us) { |
388 | /* This session failed to estimate rtt. Why? | 385 | /* This session failed to estimate rtt. Why? |
389 | * Probably, no packets returned in time. Reset our | 386 | * Probably, no packets returned in time. Reset our |
390 | * results. | 387 | * results. |
@@ -399,8 +396,8 @@ void tcp_update_metrics(struct sock *sk) | |||
399 | if (!tm) | 396 | if (!tm) |
400 | goto out_unlock; | 397 | goto out_unlock; |
401 | 398 | ||
402 | rtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); | 399 | rtt = tcp_metric_get(tm, TCP_METRIC_RTT); |
403 | m = rtt - tp->srtt; | 400 | m = rtt - tp->srtt_us; |
404 | 401 | ||
405 | /* If newly calculated rtt larger than stored one, store new | 402 | /* If newly calculated rtt larger than stored one, store new |
406 | * one. Otherwise, use EWMA. Remember, rtt overestimation is | 403 | * one. Otherwise, use EWMA. Remember, rtt overestimation is |
@@ -408,10 +405,10 @@ void tcp_update_metrics(struct sock *sk) | |||
408 | */ | 405 | */ |
409 | if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) { | 406 | if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) { |
410 | if (m <= 0) | 407 | if (m <= 0) |
411 | rtt = tp->srtt; | 408 | rtt = tp->srtt_us; |
412 | else | 409 | else |
413 | rtt -= (m >> 3); | 410 | rtt -= (m >> 3); |
414 | tcp_metric_set_msecs(tm, TCP_METRIC_RTT, rtt); | 411 | tcp_metric_set(tm, TCP_METRIC_RTT, rtt); |
415 | } | 412 | } |
416 | 413 | ||
417 | if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) { | 414 | if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) { |
@@ -422,16 +419,16 @@ void tcp_update_metrics(struct sock *sk) | |||
422 | 419 | ||
423 | /* Scale deviation to rttvar fixed point */ | 420 | /* Scale deviation to rttvar fixed point */ |
424 | m >>= 1; | 421 | m >>= 1; |
425 | if (m < tp->mdev) | 422 | if (m < tp->mdev_us) |
426 | m = tp->mdev; | 423 | m = tp->mdev_us; |
427 | 424 | ||
428 | var = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR); | 425 | var = tcp_metric_get(tm, TCP_METRIC_RTTVAR); |
429 | if (m >= var) | 426 | if (m >= var) |
430 | var = m; | 427 | var = m; |
431 | else | 428 | else |
432 | var -= (var - m) >> 2; | 429 | var -= (var - m) >> 2; |
433 | 430 | ||
434 | tcp_metric_set_msecs(tm, TCP_METRIC_RTTVAR, var); | 431 | tcp_metric_set(tm, TCP_METRIC_RTTVAR, var); |
435 | } | 432 | } |
436 | 433 | ||
437 | if (tcp_in_initial_slowstart(tp)) { | 434 | if (tcp_in_initial_slowstart(tp)) { |
@@ -528,7 +525,7 @@ void tcp_init_metrics(struct sock *sk) | |||
528 | tp->reordering = val; | 525 | tp->reordering = val; |
529 | } | 526 | } |
530 | 527 | ||
531 | crtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); | 528 | crtt = tcp_metric_get(tm, TCP_METRIC_RTT); |
532 | rcu_read_unlock(); | 529 | rcu_read_unlock(); |
533 | reset: | 530 | reset: |
534 | /* The initial RTT measurement from the SYN/SYN-ACK is not ideal | 531 | /* The initial RTT measurement from the SYN/SYN-ACK is not ideal |
@@ -551,18 +548,20 @@ reset: | |||
551 | * to low value, and then abruptly stops to do it and starts to delay | 548 | * to low value, and then abruptly stops to do it and starts to delay |
552 | * ACKs, wait for troubles. | 549 | * ACKs, wait for troubles. |
553 | */ | 550 | */ |
554 | if (crtt > tp->srtt) { | 551 | if (crtt > tp->srtt_us) { |
555 | /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */ | 552 | /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */ |
556 | crtt >>= 3; | 553 | crtt /= 8 * USEC_PER_MSEC; |
557 | inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk)); | 554 | inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk)); |
558 | } else if (tp->srtt == 0) { | 555 | } else if (tp->srtt_us == 0) { |
559 | /* RFC6298: 5.7 We've failed to get a valid RTT sample from | 556 | /* RFC6298: 5.7 We've failed to get a valid RTT sample from |
560 | * 3WHS. This is most likely due to retransmission, | 557 | * 3WHS. This is most likely due to retransmission, |
561 | * including spurious one. Reset the RTO back to 3secs | 558 | * including spurious one. Reset the RTO back to 3secs |
562 | * from the more aggressive 1sec to avoid more spurious | 559 | * from the more aggressive 1sec to avoid more spurious |
563 | * retransmission. | 560 | * retransmission. |
564 | */ | 561 | */ |
565 | tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; | 562 | tp->rttvar_us = jiffies_to_usecs(TCP_TIMEOUT_FALLBACK); |
563 | tp->mdev_us = tp->mdev_max_us = tp->rttvar_us; | ||
564 | |||
566 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; | 565 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; |
567 | } | 566 | } |
568 | /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been | 567 | /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been |
@@ -809,10 +808,26 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, | |||
809 | nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS); | 808 | nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS); |
810 | if (!nest) | 809 | if (!nest) |
811 | goto nla_put_failure; | 810 | goto nla_put_failure; |
812 | for (i = 0; i < TCP_METRIC_MAX + 1; i++) { | 811 | for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) { |
813 | if (!tm->tcpm_vals[i]) | 812 | u32 val = tm->tcpm_vals[i]; |
813 | |||
814 | if (!val) | ||
814 | continue; | 815 | continue; |
815 | if (nla_put_u32(msg, i + 1, tm->tcpm_vals[i]) < 0) | 816 | if (i == TCP_METRIC_RTT) { |
817 | if (nla_put_u32(msg, TCP_METRIC_RTT_US + 1, | ||
818 | val) < 0) | ||
819 | goto nla_put_failure; | ||
820 | n++; | ||
821 | val = max(val / 1000, 1U); | ||
822 | } | ||
823 | if (i == TCP_METRIC_RTTVAR) { | ||
824 | if (nla_put_u32(msg, TCP_METRIC_RTTVAR_US + 1, | ||
825 | val) < 0) | ||
826 | goto nla_put_failure; | ||
827 | n++; | ||
828 | val = max(val / 1000, 1U); | ||
829 | } | ||
830 | if (nla_put_u32(msg, i + 1, val) < 0) | ||
816 | goto nla_put_failure; | 831 | goto nla_put_failure; |
817 | n++; | 832 | n++; |
818 | } | 833 | } |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 7a436c517e44..ca788ada5bd3 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -398,8 +398,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
398 | 398 | ||
399 | tcp_init_wl(newtp, treq->rcv_isn); | 399 | tcp_init_wl(newtp, treq->rcv_isn); |
400 | 400 | ||
401 | newtp->srtt = 0; | 401 | newtp->srtt_us = 0; |
402 | newtp->mdev = TCP_TIMEOUT_INIT; | 402 | newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); |
403 | newicsk->icsk_rto = TCP_TIMEOUT_INIT; | 403 | newicsk->icsk_rto = TCP_TIMEOUT_INIT; |
404 | 404 | ||
405 | newtp->packets_out = 0; | 405 | newtp->packets_out = 0; |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c5eadec001c1..bf38b1fb63ab 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -866,11 +866,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
866 | if (clone_it) { | 866 | if (clone_it) { |
867 | const struct sk_buff *fclone = skb + 1; | 867 | const struct sk_buff *fclone = skb + 1; |
868 | 868 | ||
869 | /* If congestion control is doing timestamping, we must | 869 | skb_mstamp_get(&skb->skb_mstamp); |
870 | * take such a timestamp before we potentially clone/copy. | ||
871 | */ | ||
872 | if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP) | ||
873 | __net_timestamp(skb); | ||
874 | 870 | ||
875 | if (unlikely(skb->fclone == SKB_FCLONE_ORIG && | 871 | if (unlikely(skb->fclone == SKB_FCLONE_ORIG && |
876 | fclone->fclone == SKB_FCLONE_CLONE)) | 872 | fclone->fclone == SKB_FCLONE_CLONE)) |
@@ -1974,7 +1970,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) | |||
1974 | struct inet_connection_sock *icsk = inet_csk(sk); | 1970 | struct inet_connection_sock *icsk = inet_csk(sk); |
1975 | struct tcp_sock *tp = tcp_sk(sk); | 1971 | struct tcp_sock *tp = tcp_sk(sk); |
1976 | u32 timeout, tlp_time_stamp, rto_time_stamp; | 1972 | u32 timeout, tlp_time_stamp, rto_time_stamp; |
1977 | u32 rtt = tp->srtt >> 3; | 1973 | u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); |
1978 | 1974 | ||
1979 | if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) | 1975 | if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) |
1980 | return false; | 1976 | return false; |
@@ -1996,7 +1992,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) | |||
1996 | /* Schedule a loss probe in 2*RTT for SACK capable connections | 1992 | /* Schedule a loss probe in 2*RTT for SACK capable connections |
1997 | * in Open state, that are either limited by cwnd or application. | 1993 | * in Open state, that are either limited by cwnd or application. |
1998 | */ | 1994 | */ |
1999 | if (sysctl_tcp_early_retrans < 3 || !tp->srtt || !tp->packets_out || | 1995 | if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out || |
2000 | !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) | 1996 | !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) |
2001 | return false; | 1997 | return false; |
2002 | 1998 | ||
@@ -3050,8 +3046,9 @@ void tcp_send_delayed_ack(struct sock *sk) | |||
3050 | * Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements | 3046 | * Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements |
3051 | * directly. | 3047 | * directly. |
3052 | */ | 3048 | */ |
3053 | if (tp->srtt) { | 3049 | if (tp->srtt_us) { |
3054 | int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN); | 3050 | int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3), |
3051 | TCP_DELACK_MIN); | ||
3055 | 3052 | ||
3056 | if (rtt < max_ato) | 3053 | if (rtt < max_ato) |
3057 | max_ato = rtt; | 3054 | max_ato = rtt; |
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 1f2d37613c9e..3b66610d4156 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c | |||
@@ -154,7 +154,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
154 | p->snd_wnd = tp->snd_wnd; | 154 | p->snd_wnd = tp->snd_wnd; |
155 | p->rcv_wnd = tp->rcv_wnd; | 155 | p->rcv_wnd = tp->rcv_wnd; |
156 | p->ssthresh = tcp_current_ssthresh(sk); | 156 | p->ssthresh = tcp_current_ssthresh(sk); |
157 | p->srtt = tp->srtt >> 3; | 157 | p->srtt = tp->srtt_us >> 3; |
158 | 158 | ||
159 | tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); | 159 | tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); |
160 | } | 160 | } |
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index a022c17c9cf1..48539fff6357 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c | |||
@@ -306,7 +306,6 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) | |||
306 | EXPORT_SYMBOL_GPL(tcp_vegas_get_info); | 306 | EXPORT_SYMBOL_GPL(tcp_vegas_get_info); |
307 | 307 | ||
308 | static struct tcp_congestion_ops tcp_vegas __read_mostly = { | 308 | static struct tcp_congestion_ops tcp_vegas __read_mostly = { |
309 | .flags = TCP_CONG_RTT_STAMP, | ||
310 | .init = tcp_vegas_init, | 309 | .init = tcp_vegas_init, |
311 | .ssthresh = tcp_reno_ssthresh, | 310 | .ssthresh = tcp_reno_ssthresh, |
312 | .cong_avoid = tcp_vegas_cong_avoid, | 311 | .cong_avoid = tcp_vegas_cong_avoid, |
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 326475a94865..1b8e28fcd7e1 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c | |||
@@ -203,7 +203,6 @@ static u32 tcp_veno_ssthresh(struct sock *sk) | |||
203 | } | 203 | } |
204 | 204 | ||
205 | static struct tcp_congestion_ops tcp_veno __read_mostly = { | 205 | static struct tcp_congestion_ops tcp_veno __read_mostly = { |
206 | .flags = TCP_CONG_RTT_STAMP, | ||
207 | .init = tcp_veno_init, | 206 | .init = tcp_veno_init, |
208 | .ssthresh = tcp_veno_ssthresh, | 207 | .ssthresh = tcp_veno_ssthresh, |
209 | .cong_avoid = tcp_veno_cong_avoid, | 208 | .cong_avoid = tcp_veno_cong_avoid, |
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 8eab02030ed0..5ede0e727945 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c | |||
@@ -227,7 +227,6 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) { | |||
227 | } | 227 | } |
228 | 228 | ||
229 | static struct tcp_congestion_ops tcp_yeah __read_mostly = { | 229 | static struct tcp_congestion_ops tcp_yeah __read_mostly = { |
230 | .flags = TCP_CONG_RTT_STAMP, | ||
231 | .init = tcp_yeah_init, | 230 | .init = tcp_yeah_init, |
232 | .ssthresh = tcp_yeah_ssthresh, | 231 | .ssthresh = tcp_yeah_ssthresh, |
233 | .cong_avoid = tcp_yeah_cong_avoid, | 232 | .cong_avoid = tcp_yeah_cong_avoid, |