aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/tcp.h8
-rw-r--r--include/net/tcp.h10
-rw-r--r--include/uapi/linux/tcp_metrics.h7
-rw-r--r--net/ipv4/tcp.c8
-rw-r--r--net/ipv4/tcp_cubic.c4
-rw-r--r--net/ipv4/tcp_hybla.c12
-rw-r--r--net/ipv4/tcp_illinois.c1
-rw-r--r--net/ipv4/tcp_input.c183
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_lp.c1
-rw-r--r--net/ipv4/tcp_metrics.c83
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c15
-rw-r--r--net/ipv4/tcp_probe.c2
-rw-r--r--net/ipv4/tcp_vegas.c1
-rw-r--r--net/ipv4/tcp_veno.c1
-rw-r--r--net/ipv4/tcp_yeah.c1
17 files changed, 174 insertions, 169 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4ad0706d40eb..239946868142 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -201,10 +201,10 @@ struct tcp_sock {
201 u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ 201 u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */
202 202
203/* RTT measurement */ 203/* RTT measurement */
204 u32 srtt; /* smoothed round trip time << 3 */ 204 u32 srtt_us; /* smoothed round trip time << 3 in usecs */
205 u32 mdev; /* medium deviation */ 205 u32 mdev_us; /* medium deviation */
206 u32 mdev_max; /* maximal mdev for the last rtt period */ 206 u32 mdev_max_us; /* maximal mdev for the last rtt period */
207 u32 rttvar; /* smoothed mdev_max */ 207 u32 rttvar_us; /* smoothed mdev_max */
208 u32 rtt_seq; /* sequence number to update rttvar */ 208 u32 rtt_seq; /* sequence number to update rttvar */
209 209
210 u32 packets_out; /* Packets which are "in flight" */ 210 u32 packets_out; /* Packets which are "in flight" */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1f820537741a..93eab0b9da60 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -31,6 +31,7 @@
31#include <linux/crypto.h> 31#include <linux/crypto.h>
32#include <linux/cryptohash.h> 32#include <linux/cryptohash.h>
33#include <linux/kref.h> 33#include <linux/kref.h>
34#include <linux/ktime.h>
34 35
35#include <net/inet_connection_sock.h> 36#include <net/inet_connection_sock.h>
36#include <net/inet_timewait_sock.h> 37#include <net/inet_timewait_sock.h>
@@ -478,7 +479,6 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
478struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 479struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
479 struct ip_options *opt); 480 struct ip_options *opt);
480#ifdef CONFIG_SYN_COOKIES 481#ifdef CONFIG_SYN_COOKIES
481#include <linux/ktime.h>
482 482
483/* Syncookies use a monotonic timer which increments every 64 seconds. 483/* Syncookies use a monotonic timer which increments every 64 seconds.
484 * This counter is used both as a hash input and partially encoded into 484 * This counter is used both as a hash input and partially encoded into
@@ -619,7 +619,7 @@ static inline void tcp_bound_rto(const struct sock *sk)
619 619
620static inline u32 __tcp_set_rto(const struct tcp_sock *tp) 620static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
621{ 621{
622 return (tp->srtt >> 3) + tp->rttvar; 622 return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us);
623} 623}
624 624
625static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) 625static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
@@ -656,6 +656,11 @@ static inline u32 tcp_rto_min(struct sock *sk)
656 return rto_min; 656 return rto_min;
657} 657}
658 658
659static inline u32 tcp_rto_min_us(struct sock *sk)
660{
661 return jiffies_to_usecs(tcp_rto_min(sk));
662}
663
659/* Compute the actual receive window we are currently advertising. 664/* Compute the actual receive window we are currently advertising.
660 * Rcv_nxt can be after the window if our peer push more data 665 * Rcv_nxt can be after the window if our peer push more data
661 * than the offered window. 666 * than the offered window.
@@ -778,7 +783,6 @@ enum tcp_ca_event {
778#define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX) 783#define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX)
779 784
780#define TCP_CONG_NON_RESTRICTED 0x1 785#define TCP_CONG_NON_RESTRICTED 0x1
781#define TCP_CONG_RTT_STAMP 0x2
782 786
783struct tcp_congestion_ops { 787struct tcp_congestion_ops {
784 struct list_head list; 788 struct list_head list;
diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h
index 54a37b13f2c4..93533926035c 100644
--- a/include/uapi/linux/tcp_metrics.h
+++ b/include/uapi/linux/tcp_metrics.h
@@ -11,12 +11,15 @@
11#define TCP_METRICS_GENL_VERSION 0x1 11#define TCP_METRICS_GENL_VERSION 0x1
12 12
13enum tcp_metric_index { 13enum tcp_metric_index {
14 TCP_METRIC_RTT, 14 TCP_METRIC_RTT, /* in ms units */
15 TCP_METRIC_RTTVAR, 15 TCP_METRIC_RTTVAR, /* in ms units */
16 TCP_METRIC_SSTHRESH, 16 TCP_METRIC_SSTHRESH,
17 TCP_METRIC_CWND, 17 TCP_METRIC_CWND,
18 TCP_METRIC_REORDERING, 18 TCP_METRIC_REORDERING,
19 19
20 TCP_METRIC_RTT_US, /* in usec units */
21 TCP_METRIC_RTTVAR_US, /* in usec units */
22
20 /* Always last. */ 23 /* Always last. */
21 __TCP_METRIC_MAX, 24 __TCP_METRIC_MAX,
22}; 25};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bed379c7abcd..7374905b3701 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -387,7 +387,7 @@ void tcp_init_sock(struct sock *sk)
387 INIT_LIST_HEAD(&tp->tsq_node); 387 INIT_LIST_HEAD(&tp->tsq_node);
388 388
389 icsk->icsk_rto = TCP_TIMEOUT_INIT; 389 icsk->icsk_rto = TCP_TIMEOUT_INIT;
390 tp->mdev = TCP_TIMEOUT_INIT; 390 tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
391 391
392 /* So many TCP implementations out there (incorrectly) count the 392 /* So many TCP implementations out there (incorrectly) count the
393 * initial SYN frame in their delayed-ACK and congestion control 393 * initial SYN frame in their delayed-ACK and congestion control
@@ -2339,7 +2339,7 @@ int tcp_disconnect(struct sock *sk, int flags)
2339 2339
2340 sk->sk_shutdown = 0; 2340 sk->sk_shutdown = 0;
2341 sock_reset_flag(sk, SOCK_DONE); 2341 sock_reset_flag(sk, SOCK_DONE);
2342 tp->srtt = 0; 2342 tp->srtt_us = 0;
2343 if ((tp->write_seq += tp->max_window + 2) == 0) 2343 if ((tp->write_seq += tp->max_window + 2) == 0)
2344 tp->write_seq = 1; 2344 tp->write_seq = 1;
2345 icsk->icsk_backoff = 0; 2345 icsk->icsk_backoff = 0;
@@ -2783,8 +2783,8 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
2783 2783
2784 info->tcpi_pmtu = icsk->icsk_pmtu_cookie; 2784 info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
2785 info->tcpi_rcv_ssthresh = tp->rcv_ssthresh; 2785 info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
2786 info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3; 2786 info->tcpi_rtt = tp->srtt_us >> 3;
2787 info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2; 2787 info->tcpi_rttvar = tp->mdev_us >> 2;
2788 info->tcpi_snd_ssthresh = tp->snd_ssthresh; 2788 info->tcpi_snd_ssthresh = tp->snd_ssthresh;
2789 info->tcpi_snd_cwnd = tp->snd_cwnd; 2789 info->tcpi_snd_cwnd = tp->snd_cwnd;
2790 info->tcpi_advmss = tp->advmss; 2790 info->tcpi_advmss = tp->advmss;
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 828e4c3ffbaf..8bf224516ba2 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -476,10 +476,6 @@ static int __init cubictcp_register(void)
476 /* divide by bic_scale and by constant Srtt (100ms) */ 476 /* divide by bic_scale and by constant Srtt (100ms) */
477 do_div(cube_factor, bic_scale * 10); 477 do_div(cube_factor, bic_scale * 10);
478 478
479 /* hystart needs ms clock resolution */
480 if (hystart && HZ < 1000)
481 cubictcp.flags |= TCP_CONG_RTT_STAMP;
482
483 return tcp_register_congestion_control(&cubictcp); 479 return tcp_register_congestion_control(&cubictcp);
484} 480}
485 481
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 2a1a9e2a4e51..a15a799bf768 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -21,7 +21,7 @@ struct hybla {
21 u32 rho2; /* Rho * Rho, integer part */ 21 u32 rho2; /* Rho * Rho, integer part */
22 u32 rho_3ls; /* Rho parameter, <<3 */ 22 u32 rho_3ls; /* Rho parameter, <<3 */
23 u32 rho2_7ls; /* Rho^2, <<7 */ 23 u32 rho2_7ls; /* Rho^2, <<7 */
24 u32 minrtt; /* Minimum smoothed round trip time value seen */ 24 u32 minrtt_us; /* Minimum smoothed round trip time value seen */
25}; 25};
26 26
27/* Hybla reference round trip time (default= 1/40 sec = 25 ms), in ms */ 27/* Hybla reference round trip time (default= 1/40 sec = 25 ms), in ms */
@@ -35,7 +35,9 @@ static inline void hybla_recalc_param (struct sock *sk)
35{ 35{
36 struct hybla *ca = inet_csk_ca(sk); 36 struct hybla *ca = inet_csk_ca(sk);
37 37
38 ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8); 38 ca->rho_3ls = max_t(u32,
39 tcp_sk(sk)->srtt_us / (rtt0 * USEC_PER_MSEC),
40 8U);
39 ca->rho = ca->rho_3ls >> 3; 41 ca->rho = ca->rho_3ls >> 3;
40 ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1; 42 ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1;
41 ca->rho2 = ca->rho2_7ls >> 7; 43 ca->rho2 = ca->rho2_7ls >> 7;
@@ -59,7 +61,7 @@ static void hybla_init(struct sock *sk)
59 hybla_recalc_param(sk); 61 hybla_recalc_param(sk);
60 62
61 /* set minimum rtt as this is the 1st ever seen */ 63 /* set minimum rtt as this is the 1st ever seen */
62 ca->minrtt = tp->srtt; 64 ca->minrtt_us = tp->srtt_us;
63 tp->snd_cwnd = ca->rho; 65 tp->snd_cwnd = ca->rho;
64} 66}
65 67
@@ -94,9 +96,9 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked,
94 int is_slowstart = 0; 96 int is_slowstart = 0;
95 97
96 /* Recalculate rho only if this srtt is the lowest */ 98 /* Recalculate rho only if this srtt is the lowest */
97 if (tp->srtt < ca->minrtt){ 99 if (tp->srtt_us < ca->minrtt_us) {
98 hybla_recalc_param(sk); 100 hybla_recalc_param(sk);
99 ca->minrtt = tp->srtt; 101 ca->minrtt_us = tp->srtt_us;
100 } 102 }
101 103
102 if (!tcp_is_cwnd_limited(sk, in_flight)) 104 if (!tcp_is_cwnd_limited(sk, in_flight))
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index be047c63ca10..863d105e3015 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -325,7 +325,6 @@ static void tcp_illinois_info(struct sock *sk, u32 ext,
325} 325}
326 326
327static struct tcp_congestion_ops tcp_illinois __read_mostly = { 327static struct tcp_congestion_ops tcp_illinois __read_mostly = {
328 .flags = TCP_CONG_RTT_STAMP,
329 .init = tcp_illinois_init, 328 .init = tcp_illinois_init,
330 .ssthresh = tcp_illinois_ssthresh, 329 .ssthresh = tcp_illinois_ssthresh,
331 .cong_avoid = tcp_illinois_cong_avoid, 330 .cong_avoid = tcp_illinois_cong_avoid,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 227cba79fa6b..23a41d978fad 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -667,11 +667,11 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
667 * To save cycles in the RFC 1323 implementation it was better to break 667 * To save cycles in the RFC 1323 implementation it was better to break
668 * it up into three procedures. -- erics 668 * it up into three procedures. -- erics
669 */ 669 */
670static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) 670static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
671{ 671{
672 struct tcp_sock *tp = tcp_sk(sk); 672 struct tcp_sock *tp = tcp_sk(sk);
673 long m = mrtt; /* RTT */ 673 long m = mrtt_us; /* RTT */
674 u32 srtt = tp->srtt; 674 u32 srtt = tp->srtt_us;
675 675
676 /* The following amusing code comes from Jacobson's 676 /* The following amusing code comes from Jacobson's
677 * article in SIGCOMM '88. Note that rtt and mdev 677 * article in SIGCOMM '88. Note that rtt and mdev
@@ -694,7 +694,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
694 srtt += m; /* rtt = 7/8 rtt + 1/8 new */ 694 srtt += m; /* rtt = 7/8 rtt + 1/8 new */
695 if (m < 0) { 695 if (m < 0) {
696 m = -m; /* m is now abs(error) */ 696 m = -m; /* m is now abs(error) */
697 m -= (tp->mdev >> 2); /* similar update on mdev */ 697 m -= (tp->mdev_us >> 2); /* similar update on mdev */
698 /* This is similar to one of Eifel findings. 698 /* This is similar to one of Eifel findings.
699 * Eifel blocks mdev updates when rtt decreases. 699 * Eifel blocks mdev updates when rtt decreases.
700 * This solution is a bit different: we use finer gain 700 * This solution is a bit different: we use finer gain
@@ -706,28 +706,29 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
706 if (m > 0) 706 if (m > 0)
707 m >>= 3; 707 m >>= 3;
708 } else { 708 } else {
709 m -= (tp->mdev >> 2); /* similar update on mdev */ 709 m -= (tp->mdev_us >> 2); /* similar update on mdev */
710 } 710 }
711 tp->mdev += m; /* mdev = 3/4 mdev + 1/4 new */ 711 tp->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */
712 if (tp->mdev > tp->mdev_max) { 712 if (tp->mdev_us > tp->mdev_max_us) {
713 tp->mdev_max = tp->mdev; 713 tp->mdev_max_us = tp->mdev_us;
714 if (tp->mdev_max > tp->rttvar) 714 if (tp->mdev_max_us > tp->rttvar_us)
715 tp->rttvar = tp->mdev_max; 715 tp->rttvar_us = tp->mdev_max_us;
716 } 716 }
717 if (after(tp->snd_una, tp->rtt_seq)) { 717 if (after(tp->snd_una, tp->rtt_seq)) {
718 if (tp->mdev_max < tp->rttvar) 718 if (tp->mdev_max_us < tp->rttvar_us)
719 tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2; 719 tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
720 tp->rtt_seq = tp->snd_nxt; 720 tp->rtt_seq = tp->snd_nxt;
721 tp->mdev_max = tcp_rto_min(sk); 721 tp->mdev_max_us = tcp_rto_min_us(sk);
722 } 722 }
723 } else { 723 } else {
724 /* no previous measure. */ 724 /* no previous measure. */
725 srtt = m << 3; /* take the measured time to be rtt */ 725 srtt = m << 3; /* take the measured time to be rtt */
726 tp->mdev = m << 1; /* make sure rto = 3*rtt */ 726 tp->mdev_us = m << 1; /* make sure rto = 3*rtt */
727 tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); 727 tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
728 tp->mdev_max_us = tp->rttvar_us;
728 tp->rtt_seq = tp->snd_nxt; 729 tp->rtt_seq = tp->snd_nxt;
729 } 730 }
730 tp->srtt = max(1U, srtt); 731 tp->srtt_us = max(1U, srtt);
731} 732}
732 733
733/* Set the sk_pacing_rate to allow proper sizing of TSO packets. 734/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
@@ -742,20 +743,12 @@ static void tcp_update_pacing_rate(struct sock *sk)
742 u64 rate; 743 u64 rate;
743 744
744 /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ 745 /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
745 rate = (u64)tp->mss_cache * 2 * (HZ << 3); 746 rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3);
746 747
747 rate *= max(tp->snd_cwnd, tp->packets_out); 748 rate *= max(tp->snd_cwnd, tp->packets_out);
748 749
749 /* Correction for small srtt and scheduling constraints. 750 if (likely(tp->srtt_us))
750 * For small rtt, consider noise is too high, and use 751 do_div(rate, tp->srtt_us);
751 * the minimal value (srtt = 1 -> 125 us for HZ=1000)
752 *
753 * We probably need usec resolution in the future.
754 * Note: This also takes care of possible srtt=0 case,
755 * when tcp_rtt_estimator() was not yet called.
756 */
757 if (tp->srtt > 8 + 2)
758 do_div(rate, tp->srtt);
759 752
760 /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate 753 /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate
761 * without any lock. We want to make sure compiler wont store 754 * without any lock. We want to make sure compiler wont store
@@ -1122,10 +1115,10 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
1122} 1115}
1123 1116
1124struct tcp_sacktag_state { 1117struct tcp_sacktag_state {
1125 int reord; 1118 int reord;
1126 int fack_count; 1119 int fack_count;
1127 int flag; 1120 long rtt_us; /* RTT measured by SACKing never-retransmitted data */
1128 s32 rtt; /* RTT measured by SACKing never-retransmitted data */ 1121 int flag;
1129}; 1122};
1130 1123
1131/* Check if skb is fully within the SACK block. In presence of GSO skbs, 1124/* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@ -1186,7 +1179,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
1186static u8 tcp_sacktag_one(struct sock *sk, 1179static u8 tcp_sacktag_one(struct sock *sk,
1187 struct tcp_sacktag_state *state, u8 sacked, 1180 struct tcp_sacktag_state *state, u8 sacked,
1188 u32 start_seq, u32 end_seq, 1181 u32 start_seq, u32 end_seq,
1189 int dup_sack, int pcount, u32 xmit_time) 1182 int dup_sack, int pcount,
1183 const struct skb_mstamp *xmit_time)
1190{ 1184{
1191 struct tcp_sock *tp = tcp_sk(sk); 1185 struct tcp_sock *tp = tcp_sk(sk);
1192 int fack_count = state->fack_count; 1186 int fack_count = state->fack_count;
@@ -1227,8 +1221,13 @@ static u8 tcp_sacktag_one(struct sock *sk,
1227 if (!after(end_seq, tp->high_seq)) 1221 if (!after(end_seq, tp->high_seq))
1228 state->flag |= FLAG_ORIG_SACK_ACKED; 1222 state->flag |= FLAG_ORIG_SACK_ACKED;
1229 /* Pick the earliest sequence sacked for RTT */ 1223 /* Pick the earliest sequence sacked for RTT */
1230 if (state->rtt < 0) 1224 if (state->rtt_us < 0) {
1231 state->rtt = tcp_time_stamp - xmit_time; 1225 struct skb_mstamp now;
1226
1227 skb_mstamp_get(&now);
1228 state->rtt_us = skb_mstamp_us_delta(&now,
1229 xmit_time);
1230 }
1232 } 1231 }
1233 1232
1234 if (sacked & TCPCB_LOST) { 1233 if (sacked & TCPCB_LOST) {
@@ -1287,7 +1286,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
1287 */ 1286 */
1288 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, 1287 tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
1289 start_seq, end_seq, dup_sack, pcount, 1288 start_seq, end_seq, dup_sack, pcount,
1290 TCP_SKB_CB(skb)->when); 1289 &skb->skb_mstamp);
1291 1290
1292 if (skb == tp->lost_skb_hint) 1291 if (skb == tp->lost_skb_hint)
1293 tp->lost_cnt_hint += pcount; 1292 tp->lost_cnt_hint += pcount;
@@ -1565,7 +1564,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
1565 TCP_SKB_CB(skb)->end_seq, 1564 TCP_SKB_CB(skb)->end_seq,
1566 dup_sack, 1565 dup_sack,
1567 tcp_skb_pcount(skb), 1566 tcp_skb_pcount(skb),
1568 TCP_SKB_CB(skb)->when); 1567 &skb->skb_mstamp);
1569 1568
1570 if (!before(TCP_SKB_CB(skb)->seq, 1569 if (!before(TCP_SKB_CB(skb)->seq,
1571 tcp_highest_sack_seq(tp))) 1570 tcp_highest_sack_seq(tp)))
@@ -1622,7 +1621,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
1622 1621
1623static int 1622static int
1624tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, 1623tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1625 u32 prior_snd_una, s32 *sack_rtt) 1624 u32 prior_snd_una, long *sack_rtt_us)
1626{ 1625{
1627 struct tcp_sock *tp = tcp_sk(sk); 1626 struct tcp_sock *tp = tcp_sk(sk);
1628 const unsigned char *ptr = (skb_transport_header(ack_skb) + 1627 const unsigned char *ptr = (skb_transport_header(ack_skb) +
@@ -1640,7 +1639,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1640 1639
1641 state.flag = 0; 1640 state.flag = 0;
1642 state.reord = tp->packets_out; 1641 state.reord = tp->packets_out;
1643 state.rtt = -1; 1642 state.rtt_us = -1L;
1644 1643
1645 if (!tp->sacked_out) { 1644 if (!tp->sacked_out) {
1646 if (WARN_ON(tp->fackets_out)) 1645 if (WARN_ON(tp->fackets_out))
@@ -1824,7 +1823,7 @@ out:
1824 WARN_ON((int)tp->retrans_out < 0); 1823 WARN_ON((int)tp->retrans_out < 0);
1825 WARN_ON((int)tcp_packets_in_flight(tp) < 0); 1824 WARN_ON((int)tcp_packets_in_flight(tp) < 0);
1826#endif 1825#endif
1827 *sack_rtt = state.rtt; 1826 *sack_rtt_us = state.rtt_us;
1828 return state.flag; 1827 return state.flag;
1829} 1828}
1830 1829
@@ -2034,10 +2033,12 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
2034 * available, or RTO is scheduled to fire first. 2033 * available, or RTO is scheduled to fire first.
2035 */ 2034 */
2036 if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 || 2035 if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
2037 (flag & FLAG_ECE) || !tp->srtt) 2036 (flag & FLAG_ECE) || !tp->srtt_us)
2038 return false; 2037 return false;
2039 2038
2040 delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2)); 2039 delay = max(usecs_to_jiffies(tp->srtt_us >> 5),
2040 msecs_to_jiffies(2));
2041
2041 if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) 2042 if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
2042 return false; 2043 return false;
2043 2044
@@ -2884,7 +2885,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
2884} 2885}
2885 2886
2886static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, 2887static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2887 s32 seq_rtt, s32 sack_rtt) 2888 long seq_rtt_us, long sack_rtt_us)
2888{ 2889{
2889 const struct tcp_sock *tp = tcp_sk(sk); 2890 const struct tcp_sock *tp = tcp_sk(sk);
2890 2891
@@ -2894,10 +2895,10 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2894 * is acked (RFC6298). 2895 * is acked (RFC6298).
2895 */ 2896 */
2896 if (flag & FLAG_RETRANS_DATA_ACKED) 2897 if (flag & FLAG_RETRANS_DATA_ACKED)
2897 seq_rtt = -1; 2898 seq_rtt_us = -1L;
2898 2899
2899 if (seq_rtt < 0) 2900 if (seq_rtt_us < 0)
2900 seq_rtt = sack_rtt; 2901 seq_rtt_us = sack_rtt_us;
2901 2902
2902 /* RTTM Rule: A TSecr value received in a segment is used to 2903 /* RTTM Rule: A TSecr value received in a segment is used to
2903 * update the averaged RTT measurement only if the segment 2904 * update the averaged RTT measurement only if the segment
@@ -2905,14 +2906,14 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2905 * left edge of the send window. 2906 * left edge of the send window.
2906 * See draft-ietf-tcplw-high-performance-00, section 3.3. 2907 * See draft-ietf-tcplw-high-performance-00, section 3.3.
2907 */ 2908 */
2908 if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && 2909 if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
2909 flag & FLAG_ACKED) 2910 flag & FLAG_ACKED)
2910 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; 2911 seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - tp->rx_opt.rcv_tsecr);
2911 2912
2912 if (seq_rtt < 0) 2913 if (seq_rtt_us < 0)
2913 return false; 2914 return false;
2914 2915
2915 tcp_rtt_estimator(sk, seq_rtt); 2916 tcp_rtt_estimator(sk, seq_rtt_us);
2916 tcp_set_rto(sk); 2917 tcp_set_rto(sk);
2917 2918
2918 /* RFC6298: only reset backoff on valid RTT measurement. */ 2919 /* RFC6298: only reset backoff on valid RTT measurement. */
@@ -2924,16 +2925,16 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
2924static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) 2925static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
2925{ 2926{
2926 struct tcp_sock *tp = tcp_sk(sk); 2927 struct tcp_sock *tp = tcp_sk(sk);
2927 s32 seq_rtt = -1; 2928 long seq_rtt_us = -1L;
2928 2929
2929 if (synack_stamp && !tp->total_retrans) 2930 if (synack_stamp && !tp->total_retrans)
2930 seq_rtt = tcp_time_stamp - synack_stamp; 2931 seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - synack_stamp);
2931 2932
2932 /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets 2933 /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets
2933 * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack() 2934 * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack()
2934 */ 2935 */
2935 if (!tp->srtt) 2936 if (!tp->srtt_us)
2936 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); 2937 tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L);
2937} 2938}
2938 2939
2939static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight) 2940static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked, u32 in_flight)
@@ -3022,26 +3023,27 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
3022 * arrived at the other end. 3023 * arrived at the other end.
3023 */ 3024 */
3024static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, 3025static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3025 u32 prior_snd_una, s32 sack_rtt) 3026 u32 prior_snd_una, long sack_rtt_us)
3026{ 3027{
3027 struct tcp_sock *tp = tcp_sk(sk);
3028 const struct inet_connection_sock *icsk = inet_csk(sk); 3028 const struct inet_connection_sock *icsk = inet_csk(sk);
3029 struct sk_buff *skb; 3029 struct skb_mstamp first_ackt, last_ackt, now;
3030 u32 now = tcp_time_stamp; 3030 struct tcp_sock *tp = tcp_sk(sk);
3031 u32 prior_sacked = tp->sacked_out;
3032 u32 reord = tp->packets_out;
3031 bool fully_acked = true; 3033 bool fully_acked = true;
3032 int flag = 0; 3034 long ca_seq_rtt_us = -1L;
3035 long seq_rtt_us = -1L;
3036 struct sk_buff *skb;
3033 u32 pkts_acked = 0; 3037 u32 pkts_acked = 0;
3034 u32 reord = tp->packets_out;
3035 u32 prior_sacked = tp->sacked_out;
3036 s32 seq_rtt = -1;
3037 s32 ca_seq_rtt = -1;
3038 ktime_t last_ackt = net_invalid_timestamp();
3039 bool rtt_update; 3038 bool rtt_update;
3039 int flag = 0;
3040
3041 first_ackt.v64 = 0;
3040 3042
3041 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { 3043 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
3042 struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 3044 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3043 u32 acked_pcount;
3044 u8 sacked = scb->sacked; 3045 u8 sacked = scb->sacked;
3046 u32 acked_pcount;
3045 3047
3046 /* Determine how many packets and what bytes were acked, tso and else */ 3048 /* Determine how many packets and what bytes were acked, tso and else */
3047 if (after(scb->end_seq, tp->snd_una)) { 3049 if (after(scb->end_seq, tp->snd_una)) {
@@ -3063,11 +3065,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3063 tp->retrans_out -= acked_pcount; 3065 tp->retrans_out -= acked_pcount;
3064 flag |= FLAG_RETRANS_DATA_ACKED; 3066 flag |= FLAG_RETRANS_DATA_ACKED;
3065 } else { 3067 } else {
3066 ca_seq_rtt = now - scb->when; 3068 last_ackt = skb->skb_mstamp;
3067 last_ackt = skb->tstamp; 3069 if (!first_ackt.v64)
3068 if (seq_rtt < 0) { 3070 first_ackt = last_ackt;
3069 seq_rtt = ca_seq_rtt; 3071
3070 }
3071 if (!(sacked & TCPCB_SACKED_ACKED)) 3072 if (!(sacked & TCPCB_SACKED_ACKED))
3072 reord = min(pkts_acked, reord); 3073 reord = min(pkts_acked, reord);
3073 if (!after(scb->end_seq, tp->high_seq)) 3074 if (!after(scb->end_seq, tp->high_seq))
@@ -3113,7 +3114,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3113 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) 3114 if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
3114 flag |= FLAG_SACK_RENEGING; 3115 flag |= FLAG_SACK_RENEGING;
3115 3116
3116 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt); 3117 skb_mstamp_get(&now);
3118 if (first_ackt.v64) {
3119 seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
3120 ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
3121 }
3122
3123 rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
3117 3124
3118 if (flag & FLAG_ACKED) { 3125 if (flag & FLAG_ACKED) {
3119 const struct tcp_congestion_ops *ca_ops 3126 const struct tcp_congestion_ops *ca_ops
@@ -3141,25 +3148,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3141 3148
3142 tp->fackets_out -= min(pkts_acked, tp->fackets_out); 3149 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
3143 3150
3144 if (ca_ops->pkts_acked) { 3151 if (ca_ops->pkts_acked)
3145 s32 rtt_us = -1; 3152 ca_ops->pkts_acked(sk, pkts_acked, ca_seq_rtt_us);
3146
3147 /* Is the ACK triggering packet unambiguous? */
3148 if (!(flag & FLAG_RETRANS_DATA_ACKED)) {
3149 /* High resolution needed and available? */
3150 if (ca_ops->flags & TCP_CONG_RTT_STAMP &&
3151 !ktime_equal(last_ackt,
3152 net_invalid_timestamp()))
3153 rtt_us = ktime_us_delta(ktime_get_real(),
3154 last_ackt);
3155 else if (ca_seq_rtt >= 0)
3156 rtt_us = jiffies_to_usecs(ca_seq_rtt);
3157 }
3158 3153
3159 ca_ops->pkts_acked(sk, pkts_acked, rtt_us); 3154 } else if (skb && rtt_update && sack_rtt_us >= 0 &&
3160 } 3155 sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
3161 } else if (skb && rtt_update && sack_rtt >= 0 &&
3162 sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) {
3163 /* Do not re-arm RTO if the sack RTT is measured from data sent 3156 /* Do not re-arm RTO if the sack RTT is measured from data sent
3164 * after when the head was last (re)transmitted. Otherwise the 3157 * after when the head was last (re)transmitted. Otherwise the
3165 * timeout may continue to extend in loss recovery. 3158 * timeout may continue to extend in loss recovery.
@@ -3369,12 +3362,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3369 u32 ack_seq = TCP_SKB_CB(skb)->seq; 3362 u32 ack_seq = TCP_SKB_CB(skb)->seq;
3370 u32 ack = TCP_SKB_CB(skb)->ack_seq; 3363 u32 ack = TCP_SKB_CB(skb)->ack_seq;
3371 bool is_dupack = false; 3364 bool is_dupack = false;
3372 u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt; 3365 u32 prior_in_flight;
3373 u32 prior_fackets; 3366 u32 prior_fackets;
3374 int prior_packets = tp->packets_out; 3367 int prior_packets = tp->packets_out;
3375 const int prior_unsacked = tp->packets_out - tp->sacked_out; 3368 const int prior_unsacked = tp->packets_out - tp->sacked_out;
3376 int acked = 0; /* Number of packets newly acked */ 3369 int acked = 0; /* Number of packets newly acked */
3377 s32 sack_rtt = -1; 3370 long sack_rtt_us = -1L;
3378 3371
3379 /* If the ack is older than previous acks 3372 /* If the ack is older than previous acks
3380 * then we can probably ignore it. 3373 * then we can probably ignore it.
@@ -3432,7 +3425,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3432 3425
3433 if (TCP_SKB_CB(skb)->sacked) 3426 if (TCP_SKB_CB(skb)->sacked)
3434 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, 3427 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3435 &sack_rtt); 3428 &sack_rtt_us);
3436 3429
3437 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb))) 3430 if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
3438 flag |= FLAG_ECE; 3431 flag |= FLAG_ECE;
@@ -3451,7 +3444,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3451 3444
3452 /* See if we can take anything off of the retransmit queue. */ 3445 /* See if we can take anything off of the retransmit queue. */
3453 acked = tp->packets_out; 3446 acked = tp->packets_out;
3454 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt); 3447 flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
3448 sack_rtt_us);
3455 acked -= tp->packets_out; 3449 acked -= tp->packets_out;
3456 3450
3457 /* Advance cwnd if state allows */ 3451 /* Advance cwnd if state allows */
@@ -3474,8 +3468,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3474 3468
3475 if (icsk->icsk_pending == ICSK_TIME_RETRANS) 3469 if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3476 tcp_schedule_loss_probe(sk); 3470 tcp_schedule_loss_probe(sk);
3477 if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd) 3471 tcp_update_pacing_rate(sk);
3478 tcp_update_pacing_rate(sk);
3479 return 1; 3472 return 1;
3480 3473
3481no_queue: 3474no_queue:
@@ -3504,7 +3497,7 @@ old_ack:
3504 */ 3497 */
3505 if (TCP_SKB_CB(skb)->sacked) { 3498 if (TCP_SKB_CB(skb)->sacked) {
3506 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, 3499 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
3507 &sack_rtt); 3500 &sack_rtt_us);
3508 tcp_fastretrans_alert(sk, acked, prior_unsacked, 3501 tcp_fastretrans_alert(sk, acked, prior_unsacked,
3509 is_dupack, flag); 3502 is_dupack, flag);
3510 } 3503 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3cf976510497..17c0fb172fba 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -435,7 +435,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
435 break; 435 break;
436 436
437 icsk->icsk_backoff--; 437 icsk->icsk_backoff--;
438 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) : 438 inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) :
439 TCP_TIMEOUT_INIT) << icsk->icsk_backoff; 439 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
440 tcp_bound_rto(sk); 440 tcp_bound_rto(sk);
441 441
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 503798f2fcd6..c9aecae31327 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -315,7 +315,6 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us)
315} 315}
316 316
317static struct tcp_congestion_ops tcp_lp __read_mostly = { 317static struct tcp_congestion_ops tcp_lp __read_mostly = {
318 .flags = TCP_CONG_RTT_STAMP,
319 .init = tcp_lp_init, 318 .init = tcp_lp_init,
320 .ssthresh = tcp_reno_ssthresh, 319 .ssthresh = tcp_reno_ssthresh,
321 .cong_avoid = tcp_lp_cong_avoid, 320 .cong_avoid = tcp_lp_cong_avoid,
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index d547075d8300..dcaf72f10216 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -33,6 +33,11 @@ struct tcp_fastopen_metrics {
33 struct tcp_fastopen_cookie cookie; 33 struct tcp_fastopen_cookie cookie;
34}; 34};
35 35
36/* TCP_METRIC_MAX includes 2 extra fields for userspace compatibility
37 * Kernel only stores RTT and RTTVAR in usec resolution
38 */
39#define TCP_METRIC_MAX_KERNEL (TCP_METRIC_MAX - 2)
40
36struct tcp_metrics_block { 41struct tcp_metrics_block {
37 struct tcp_metrics_block __rcu *tcpm_next; 42 struct tcp_metrics_block __rcu *tcpm_next;
38 struct inetpeer_addr tcpm_saddr; 43 struct inetpeer_addr tcpm_saddr;
@@ -41,7 +46,7 @@ struct tcp_metrics_block {
41 u32 tcpm_ts; 46 u32 tcpm_ts;
42 u32 tcpm_ts_stamp; 47 u32 tcpm_ts_stamp;
43 u32 tcpm_lock; 48 u32 tcpm_lock;
44 u32 tcpm_vals[TCP_METRIC_MAX + 1]; 49 u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1];
45 struct tcp_fastopen_metrics tcpm_fastopen; 50 struct tcp_fastopen_metrics tcpm_fastopen;
46 51
47 struct rcu_head rcu_head; 52 struct rcu_head rcu_head;
@@ -59,12 +64,6 @@ static u32 tcp_metric_get(struct tcp_metrics_block *tm,
59 return tm->tcpm_vals[idx]; 64 return tm->tcpm_vals[idx];
60} 65}
61 66
62static u32 tcp_metric_get_jiffies(struct tcp_metrics_block *tm,
63 enum tcp_metric_index idx)
64{
65 return msecs_to_jiffies(tm->tcpm_vals[idx]);
66}
67
68static void tcp_metric_set(struct tcp_metrics_block *tm, 67static void tcp_metric_set(struct tcp_metrics_block *tm,
69 enum tcp_metric_index idx, 68 enum tcp_metric_index idx,
70 u32 val) 69 u32 val)
@@ -72,13 +71,6 @@ static void tcp_metric_set(struct tcp_metrics_block *tm,
72 tm->tcpm_vals[idx] = val; 71 tm->tcpm_vals[idx] = val;
73} 72}
74 73
75static void tcp_metric_set_msecs(struct tcp_metrics_block *tm,
76 enum tcp_metric_index idx,
77 u32 val)
78{
79 tm->tcpm_vals[idx] = jiffies_to_msecs(val);
80}
81
82static bool addr_same(const struct inetpeer_addr *a, 74static bool addr_same(const struct inetpeer_addr *a,
83 const struct inetpeer_addr *b) 75 const struct inetpeer_addr *b)
84{ 76{
@@ -101,9 +93,11 @@ struct tcpm_hash_bucket {
101 93
102static DEFINE_SPINLOCK(tcp_metrics_lock); 94static DEFINE_SPINLOCK(tcp_metrics_lock);
103 95
104static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst, 96static void tcpm_suck_dst(struct tcp_metrics_block *tm,
97 const struct dst_entry *dst,
105 bool fastopen_clear) 98 bool fastopen_clear)
106{ 99{
100 u32 msval;
107 u32 val; 101 u32 val;
108 102
109 tm->tcpm_stamp = jiffies; 103 tm->tcpm_stamp = jiffies;
@@ -121,8 +115,11 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst,
121 val |= 1 << TCP_METRIC_REORDERING; 115 val |= 1 << TCP_METRIC_REORDERING;
122 tm->tcpm_lock = val; 116 tm->tcpm_lock = val;
123 117
124 tm->tcpm_vals[TCP_METRIC_RTT] = dst_metric_raw(dst, RTAX_RTT); 118 msval = dst_metric_raw(dst, RTAX_RTT);
125 tm->tcpm_vals[TCP_METRIC_RTTVAR] = dst_metric_raw(dst, RTAX_RTTVAR); 119 tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
120
121 msval = dst_metric_raw(dst, RTAX_RTTVAR);
122 tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;
126 tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); 123 tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
127 tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); 124 tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
128 tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); 125 tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
@@ -384,7 +381,7 @@ void tcp_update_metrics(struct sock *sk)
384 dst_confirm(dst); 381 dst_confirm(dst);
385 382
386 rcu_read_lock(); 383 rcu_read_lock();
387 if (icsk->icsk_backoff || !tp->srtt) { 384 if (icsk->icsk_backoff || !tp->srtt_us) {
388 /* This session failed to estimate rtt. Why? 385 /* This session failed to estimate rtt. Why?
389 * Probably, no packets returned in time. Reset our 386 * Probably, no packets returned in time. Reset our
390 * results. 387 * results.
@@ -399,8 +396,8 @@ void tcp_update_metrics(struct sock *sk)
399 if (!tm) 396 if (!tm)
400 goto out_unlock; 397 goto out_unlock;
401 398
402 rtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); 399 rtt = tcp_metric_get(tm, TCP_METRIC_RTT);
403 m = rtt - tp->srtt; 400 m = rtt - tp->srtt_us;
404 401
405 /* If newly calculated rtt larger than stored one, store new 402 /* If newly calculated rtt larger than stored one, store new
406 * one. Otherwise, use EWMA. Remember, rtt overestimation is 403 * one. Otherwise, use EWMA. Remember, rtt overestimation is
@@ -408,10 +405,10 @@ void tcp_update_metrics(struct sock *sk)
408 */ 405 */
409 if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) { 406 if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) {
410 if (m <= 0) 407 if (m <= 0)
411 rtt = tp->srtt; 408 rtt = tp->srtt_us;
412 else 409 else
413 rtt -= (m >> 3); 410 rtt -= (m >> 3);
414 tcp_metric_set_msecs(tm, TCP_METRIC_RTT, rtt); 411 tcp_metric_set(tm, TCP_METRIC_RTT, rtt);
415 } 412 }
416 413
417 if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) { 414 if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) {
@@ -422,16 +419,16 @@ void tcp_update_metrics(struct sock *sk)
422 419
423 /* Scale deviation to rttvar fixed point */ 420 /* Scale deviation to rttvar fixed point */
424 m >>= 1; 421 m >>= 1;
425 if (m < tp->mdev) 422 if (m < tp->mdev_us)
426 m = tp->mdev; 423 m = tp->mdev_us;
427 424
428 var = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR); 425 var = tcp_metric_get(tm, TCP_METRIC_RTTVAR);
429 if (m >= var) 426 if (m >= var)
430 var = m; 427 var = m;
431 else 428 else
432 var -= (var - m) >> 2; 429 var -= (var - m) >> 2;
433 430
434 tcp_metric_set_msecs(tm, TCP_METRIC_RTTVAR, var); 431 tcp_metric_set(tm, TCP_METRIC_RTTVAR, var);
435 } 432 }
436 433
437 if (tcp_in_initial_slowstart(tp)) { 434 if (tcp_in_initial_slowstart(tp)) {
@@ -528,7 +525,7 @@ void tcp_init_metrics(struct sock *sk)
528 tp->reordering = val; 525 tp->reordering = val;
529 } 526 }
530 527
531 crtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); 528 crtt = tcp_metric_get(tm, TCP_METRIC_RTT);
532 rcu_read_unlock(); 529 rcu_read_unlock();
533reset: 530reset:
534 /* The initial RTT measurement from the SYN/SYN-ACK is not ideal 531 /* The initial RTT measurement from the SYN/SYN-ACK is not ideal
@@ -551,18 +548,20 @@ reset:
551 * to low value, and then abruptly stops to do it and starts to delay 548 * to low value, and then abruptly stops to do it and starts to delay
552 * ACKs, wait for troubles. 549 * ACKs, wait for troubles.
553 */ 550 */
554 if (crtt > tp->srtt) { 551 if (crtt > tp->srtt_us) {
555 /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */ 552 /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
556 crtt >>= 3; 553 crtt /= 8 * USEC_PER_MSEC;
557 inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk)); 554 inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
558 } else if (tp->srtt == 0) { 555 } else if (tp->srtt_us == 0) {
559 /* RFC6298: 5.7 We've failed to get a valid RTT sample from 556 /* RFC6298: 5.7 We've failed to get a valid RTT sample from
560 * 3WHS. This is most likely due to retransmission, 557 * 3WHS. This is most likely due to retransmission,
561 * including spurious one. Reset the RTO back to 3secs 558 * including spurious one. Reset the RTO back to 3secs
562 * from the more aggressive 1sec to avoid more spurious 559 * from the more aggressive 1sec to avoid more spurious
563 * retransmission. 560 * retransmission.
564 */ 561 */
565 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; 562 tp->rttvar_us = jiffies_to_usecs(TCP_TIMEOUT_FALLBACK);
563 tp->mdev_us = tp->mdev_max_us = tp->rttvar_us;
564
566 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; 565 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
567 } 566 }
568 /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been 567 /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
@@ -809,10 +808,26 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
809 nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS); 808 nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS);
810 if (!nest) 809 if (!nest)
811 goto nla_put_failure; 810 goto nla_put_failure;
812 for (i = 0; i < TCP_METRIC_MAX + 1; i++) { 811 for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
813 if (!tm->tcpm_vals[i]) 812 u32 val = tm->tcpm_vals[i];
813
814 if (!val)
814 continue; 815 continue;
815 if (nla_put_u32(msg, i + 1, tm->tcpm_vals[i]) < 0) 816 if (i == TCP_METRIC_RTT) {
817 if (nla_put_u32(msg, TCP_METRIC_RTT_US + 1,
818 val) < 0)
819 goto nla_put_failure;
820 n++;
821 val = max(val / 1000, 1U);
822 }
823 if (i == TCP_METRIC_RTTVAR) {
824 if (nla_put_u32(msg, TCP_METRIC_RTTVAR_US + 1,
825 val) < 0)
826 goto nla_put_failure;
827 n++;
828 val = max(val / 1000, 1U);
829 }
830 if (nla_put_u32(msg, i + 1, val) < 0)
816 goto nla_put_failure; 831 goto nla_put_failure;
817 n++; 832 n++;
818 } 833 }
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7a436c517e44..ca788ada5bd3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -398,8 +398,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
398 398
399 tcp_init_wl(newtp, treq->rcv_isn); 399 tcp_init_wl(newtp, treq->rcv_isn);
400 400
401 newtp->srtt = 0; 401 newtp->srtt_us = 0;
402 newtp->mdev = TCP_TIMEOUT_INIT; 402 newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
403 newicsk->icsk_rto = TCP_TIMEOUT_INIT; 403 newicsk->icsk_rto = TCP_TIMEOUT_INIT;
404 404
405 newtp->packets_out = 0; 405 newtp->packets_out = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c5eadec001c1..bf38b1fb63ab 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -866,11 +866,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
866 if (clone_it) { 866 if (clone_it) {
867 const struct sk_buff *fclone = skb + 1; 867 const struct sk_buff *fclone = skb + 1;
868 868
869 /* If congestion control is doing timestamping, we must 869 skb_mstamp_get(&skb->skb_mstamp);
870 * take such a timestamp before we potentially clone/copy.
871 */
872 if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
873 __net_timestamp(skb);
874 870
875 if (unlikely(skb->fclone == SKB_FCLONE_ORIG && 871 if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
876 fclone->fclone == SKB_FCLONE_CLONE)) 872 fclone->fclone == SKB_FCLONE_CLONE))
@@ -1974,7 +1970,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
1974 struct inet_connection_sock *icsk = inet_csk(sk); 1970 struct inet_connection_sock *icsk = inet_csk(sk);
1975 struct tcp_sock *tp = tcp_sk(sk); 1971 struct tcp_sock *tp = tcp_sk(sk);
1976 u32 timeout, tlp_time_stamp, rto_time_stamp; 1972 u32 timeout, tlp_time_stamp, rto_time_stamp;
1977 u32 rtt = tp->srtt >> 3; 1973 u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
1978 1974
1979 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) 1975 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
1980 return false; 1976 return false;
@@ -1996,7 +1992,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
1996 /* Schedule a loss probe in 2*RTT for SACK capable connections 1992 /* Schedule a loss probe in 2*RTT for SACK capable connections
1997 * in Open state, that are either limited by cwnd or application. 1993 * in Open state, that are either limited by cwnd or application.
1998 */ 1994 */
1999 if (sysctl_tcp_early_retrans < 3 || !tp->srtt || !tp->packets_out || 1995 if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out ||
2000 !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) 1996 !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2001 return false; 1997 return false;
2002 1998
@@ -3050,8 +3046,9 @@ void tcp_send_delayed_ack(struct sock *sk)
3050 * Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements 3046 * Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements
3051 * directly. 3047 * directly.
3052 */ 3048 */
3053 if (tp->srtt) { 3049 if (tp->srtt_us) {
3054 int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN); 3050 int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3),
3051 TCP_DELACK_MIN);
3055 3052
3056 if (rtt < max_ato) 3053 if (rtt < max_ato)
3057 max_ato = rtt; 3054 max_ato = rtt;
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 1f2d37613c9e..3b66610d4156 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -154,7 +154,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
154 p->snd_wnd = tp->snd_wnd; 154 p->snd_wnd = tp->snd_wnd;
155 p->rcv_wnd = tp->rcv_wnd; 155 p->rcv_wnd = tp->rcv_wnd;
156 p->ssthresh = tcp_current_ssthresh(sk); 156 p->ssthresh = tcp_current_ssthresh(sk);
157 p->srtt = tp->srtt >> 3; 157 p->srtt = tp->srtt_us >> 3;
158 158
159 tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); 159 tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1);
160 } 160 }
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index a022c17c9cf1..48539fff6357 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -306,7 +306,6 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
306EXPORT_SYMBOL_GPL(tcp_vegas_get_info); 306EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
307 307
308static struct tcp_congestion_ops tcp_vegas __read_mostly = { 308static struct tcp_congestion_ops tcp_vegas __read_mostly = {
309 .flags = TCP_CONG_RTT_STAMP,
310 .init = tcp_vegas_init, 309 .init = tcp_vegas_init,
311 .ssthresh = tcp_reno_ssthresh, 310 .ssthresh = tcp_reno_ssthresh,
312 .cong_avoid = tcp_vegas_cong_avoid, 311 .cong_avoid = tcp_vegas_cong_avoid,
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 326475a94865..1b8e28fcd7e1 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -203,7 +203,6 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
203} 203}
204 204
205static struct tcp_congestion_ops tcp_veno __read_mostly = { 205static struct tcp_congestion_ops tcp_veno __read_mostly = {
206 .flags = TCP_CONG_RTT_STAMP,
207 .init = tcp_veno_init, 206 .init = tcp_veno_init,
208 .ssthresh = tcp_veno_ssthresh, 207 .ssthresh = tcp_veno_ssthresh,
209 .cong_avoid = tcp_veno_cong_avoid, 208 .cong_avoid = tcp_veno_cong_avoid,
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 8eab02030ed0..5ede0e727945 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -227,7 +227,6 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) {
227} 227}
228 228
229static struct tcp_congestion_ops tcp_yeah __read_mostly = { 229static struct tcp_congestion_ops tcp_yeah __read_mostly = {
230 .flags = TCP_CONG_RTT_STAMP,
231 .init = tcp_yeah_init, 230 .init = tcp_yeah_init,
232 .ssthresh = tcp_yeah_ssthresh, 231 .ssthresh = tcp_yeah_ssthresh,
233 .cong_avoid = tcp_yeah_cong_avoid, 232 .cong_avoid = tcp_yeah_cong_avoid,