aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_metrics.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_metrics.c')
-rw-r--r--net/ipv4/tcp_metrics.c83
1 files changed, 49 insertions, 34 deletions
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index d547075d8300..dcaf72f10216 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -33,6 +33,11 @@ struct tcp_fastopen_metrics {
33 struct tcp_fastopen_cookie cookie; 33 struct tcp_fastopen_cookie cookie;
34}; 34};
35 35
36/* TCP_METRIC_MAX includes 2 extra fields for userspace compatibility
37 * Kernel only stores RTT and RTTVAR in usec resolution
38 */
39#define TCP_METRIC_MAX_KERNEL (TCP_METRIC_MAX - 2)
40
36struct tcp_metrics_block { 41struct tcp_metrics_block {
37 struct tcp_metrics_block __rcu *tcpm_next; 42 struct tcp_metrics_block __rcu *tcpm_next;
38 struct inetpeer_addr tcpm_saddr; 43 struct inetpeer_addr tcpm_saddr;
@@ -41,7 +46,7 @@ struct tcp_metrics_block {
41 u32 tcpm_ts; 46 u32 tcpm_ts;
42 u32 tcpm_ts_stamp; 47 u32 tcpm_ts_stamp;
43 u32 tcpm_lock; 48 u32 tcpm_lock;
44 u32 tcpm_vals[TCP_METRIC_MAX + 1]; 49 u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1];
45 struct tcp_fastopen_metrics tcpm_fastopen; 50 struct tcp_fastopen_metrics tcpm_fastopen;
46 51
47 struct rcu_head rcu_head; 52 struct rcu_head rcu_head;
@@ -59,12 +64,6 @@ static u32 tcp_metric_get(struct tcp_metrics_block *tm,
59 return tm->tcpm_vals[idx]; 64 return tm->tcpm_vals[idx];
60} 65}
61 66
62static u32 tcp_metric_get_jiffies(struct tcp_metrics_block *tm,
63 enum tcp_metric_index idx)
64{
65 return msecs_to_jiffies(tm->tcpm_vals[idx]);
66}
67
68static void tcp_metric_set(struct tcp_metrics_block *tm, 67static void tcp_metric_set(struct tcp_metrics_block *tm,
69 enum tcp_metric_index idx, 68 enum tcp_metric_index idx,
70 u32 val) 69 u32 val)
@@ -72,13 +71,6 @@ static void tcp_metric_set(struct tcp_metrics_block *tm,
72 tm->tcpm_vals[idx] = val; 71 tm->tcpm_vals[idx] = val;
73} 72}
74 73
75static void tcp_metric_set_msecs(struct tcp_metrics_block *tm,
76 enum tcp_metric_index idx,
77 u32 val)
78{
79 tm->tcpm_vals[idx] = jiffies_to_msecs(val);
80}
81
82static bool addr_same(const struct inetpeer_addr *a, 74static bool addr_same(const struct inetpeer_addr *a,
83 const struct inetpeer_addr *b) 75 const struct inetpeer_addr *b)
84{ 76{
@@ -101,9 +93,11 @@ struct tcpm_hash_bucket {
101 93
102static DEFINE_SPINLOCK(tcp_metrics_lock); 94static DEFINE_SPINLOCK(tcp_metrics_lock);
103 95
104static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst, 96static void tcpm_suck_dst(struct tcp_metrics_block *tm,
97 const struct dst_entry *dst,
105 bool fastopen_clear) 98 bool fastopen_clear)
106{ 99{
100 u32 msval;
107 u32 val; 101 u32 val;
108 102
109 tm->tcpm_stamp = jiffies; 103 tm->tcpm_stamp = jiffies;
@@ -121,8 +115,11 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst,
121 val |= 1 << TCP_METRIC_REORDERING; 115 val |= 1 << TCP_METRIC_REORDERING;
122 tm->tcpm_lock = val; 116 tm->tcpm_lock = val;
123 117
124 tm->tcpm_vals[TCP_METRIC_RTT] = dst_metric_raw(dst, RTAX_RTT); 118 msval = dst_metric_raw(dst, RTAX_RTT);
125 tm->tcpm_vals[TCP_METRIC_RTTVAR] = dst_metric_raw(dst, RTAX_RTTVAR); 119 tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
120
121 msval = dst_metric_raw(dst, RTAX_RTTVAR);
122 tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;
126 tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); 123 tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
127 tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); 124 tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
128 tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); 125 tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
@@ -384,7 +381,7 @@ void tcp_update_metrics(struct sock *sk)
384 dst_confirm(dst); 381 dst_confirm(dst);
385 382
386 rcu_read_lock(); 383 rcu_read_lock();
387 if (icsk->icsk_backoff || !tp->srtt) { 384 if (icsk->icsk_backoff || !tp->srtt_us) {
388 /* This session failed to estimate rtt. Why? 385 /* This session failed to estimate rtt. Why?
389 * Probably, no packets returned in time. Reset our 386 * Probably, no packets returned in time. Reset our
390 * results. 387 * results.
@@ -399,8 +396,8 @@ void tcp_update_metrics(struct sock *sk)
399 if (!tm) 396 if (!tm)
400 goto out_unlock; 397 goto out_unlock;
401 398
402 rtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); 399 rtt = tcp_metric_get(tm, TCP_METRIC_RTT);
403 m = rtt - tp->srtt; 400 m = rtt - tp->srtt_us;
404 401
405 /* If newly calculated rtt larger than stored one, store new 402 /* If newly calculated rtt larger than stored one, store new
406 * one. Otherwise, use EWMA. Remember, rtt overestimation is 403 * one. Otherwise, use EWMA. Remember, rtt overestimation is
@@ -408,10 +405,10 @@ void tcp_update_metrics(struct sock *sk)
408 */ 405 */
409 if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) { 406 if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) {
410 if (m <= 0) 407 if (m <= 0)
411 rtt = tp->srtt; 408 rtt = tp->srtt_us;
412 else 409 else
413 rtt -= (m >> 3); 410 rtt -= (m >> 3);
414 tcp_metric_set_msecs(tm, TCP_METRIC_RTT, rtt); 411 tcp_metric_set(tm, TCP_METRIC_RTT, rtt);
415 } 412 }
416 413
417 if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) { 414 if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) {
@@ -422,16 +419,16 @@ void tcp_update_metrics(struct sock *sk)
422 419
423 /* Scale deviation to rttvar fixed point */ 420 /* Scale deviation to rttvar fixed point */
424 m >>= 1; 421 m >>= 1;
425 if (m < tp->mdev) 422 if (m < tp->mdev_us)
426 m = tp->mdev; 423 m = tp->mdev_us;
427 424
428 var = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR); 425 var = tcp_metric_get(tm, TCP_METRIC_RTTVAR);
429 if (m >= var) 426 if (m >= var)
430 var = m; 427 var = m;
431 else 428 else
432 var -= (var - m) >> 2; 429 var -= (var - m) >> 2;
433 430
434 tcp_metric_set_msecs(tm, TCP_METRIC_RTTVAR, var); 431 tcp_metric_set(tm, TCP_METRIC_RTTVAR, var);
435 } 432 }
436 433
437 if (tcp_in_initial_slowstart(tp)) { 434 if (tcp_in_initial_slowstart(tp)) {
@@ -528,7 +525,7 @@ void tcp_init_metrics(struct sock *sk)
528 tp->reordering = val; 525 tp->reordering = val;
529 } 526 }
530 527
531 crtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); 528 crtt = tcp_metric_get(tm, TCP_METRIC_RTT);
532 rcu_read_unlock(); 529 rcu_read_unlock();
533reset: 530reset:
534 /* The initial RTT measurement from the SYN/SYN-ACK is not ideal 531 /* The initial RTT measurement from the SYN/SYN-ACK is not ideal
@@ -551,18 +548,20 @@ reset:
551 * to low value, and then abruptly stops to do it and starts to delay 548 * to low value, and then abruptly stops to do it and starts to delay
552 * ACKs, wait for troubles. 549 * ACKs, wait for troubles.
553 */ 550 */
554 if (crtt > tp->srtt) { 551 if (crtt > tp->srtt_us) {
555 /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */ 552 /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
556 crtt >>= 3; 553 crtt /= 8 * USEC_PER_MSEC;
557 inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk)); 554 inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
558 } else if (tp->srtt == 0) { 555 } else if (tp->srtt_us == 0) {
559 /* RFC6298: 5.7 We've failed to get a valid RTT sample from 556 /* RFC6298: 5.7 We've failed to get a valid RTT sample from
560 * 3WHS. This is most likely due to retransmission, 557 * 3WHS. This is most likely due to retransmission,
561 * including spurious one. Reset the RTO back to 3secs 558 * including spurious one. Reset the RTO back to 3secs
562 * from the more aggressive 1sec to avoid more spurious 559 * from the more aggressive 1sec to avoid more spurious
563 * retransmission. 560 * retransmission.
564 */ 561 */
565 tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; 562 tp->rttvar_us = jiffies_to_usecs(TCP_TIMEOUT_FALLBACK);
563 tp->mdev_us = tp->mdev_max_us = tp->rttvar_us;
564
566 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; 565 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
567 } 566 }
568 /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been 567 /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
@@ -809,10 +808,26 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
809 nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS); 808 nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS);
810 if (!nest) 809 if (!nest)
811 goto nla_put_failure; 810 goto nla_put_failure;
812 for (i = 0; i < TCP_METRIC_MAX + 1; i++) { 811 for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
813 if (!tm->tcpm_vals[i]) 812 u32 val = tm->tcpm_vals[i];
813
814 if (!val)
814 continue; 815 continue;
815 if (nla_put_u32(msg, i + 1, tm->tcpm_vals[i]) < 0) 816 if (i == TCP_METRIC_RTT) {
817 if (nla_put_u32(msg, TCP_METRIC_RTT_US + 1,
818 val) < 0)
819 goto nla_put_failure;
820 n++;
821 val = max(val / 1000, 1U);
822 }
823 if (i == TCP_METRIC_RTTVAR) {
824 if (nla_put_u32(msg, TCP_METRIC_RTTVAR_US + 1,
825 val) < 0)
826 goto nla_put_failure;
827 n++;
828 val = max(val / 1000, 1U);
829 }
830 if (nla_put_u32(msg, i + 1, val) < 0)
816 goto nla_put_failure; 831 goto nla_put_failure;
817 n++; 832 n++;
818 } 833 }