diff options
Diffstat (limited to 'net/ipv4/tcp_metrics.c')
-rw-r--r-- | net/ipv4/tcp_metrics.c | 83 |
1 files changed, 49 insertions, 34 deletions
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index d547075d8300..dcaf72f10216 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c | |||
@@ -33,6 +33,11 @@ struct tcp_fastopen_metrics { | |||
33 | struct tcp_fastopen_cookie cookie; | 33 | struct tcp_fastopen_cookie cookie; |
34 | }; | 34 | }; |
35 | 35 | ||
36 | /* TCP_METRIC_MAX includes 2 extra fields for userspace compatibility | ||
37 | * Kernel only stores RTT and RTTVAR in usec resolution | ||
38 | */ | ||
39 | #define TCP_METRIC_MAX_KERNEL (TCP_METRIC_MAX - 2) | ||
40 | |||
36 | struct tcp_metrics_block { | 41 | struct tcp_metrics_block { |
37 | struct tcp_metrics_block __rcu *tcpm_next; | 42 | struct tcp_metrics_block __rcu *tcpm_next; |
38 | struct inetpeer_addr tcpm_saddr; | 43 | struct inetpeer_addr tcpm_saddr; |
@@ -41,7 +46,7 @@ struct tcp_metrics_block { | |||
41 | u32 tcpm_ts; | 46 | u32 tcpm_ts; |
42 | u32 tcpm_ts_stamp; | 47 | u32 tcpm_ts_stamp; |
43 | u32 tcpm_lock; | 48 | u32 tcpm_lock; |
44 | u32 tcpm_vals[TCP_METRIC_MAX + 1]; | 49 | u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1]; |
45 | struct tcp_fastopen_metrics tcpm_fastopen; | 50 | struct tcp_fastopen_metrics tcpm_fastopen; |
46 | 51 | ||
47 | struct rcu_head rcu_head; | 52 | struct rcu_head rcu_head; |
@@ -59,12 +64,6 @@ static u32 tcp_metric_get(struct tcp_metrics_block *tm, | |||
59 | return tm->tcpm_vals[idx]; | 64 | return tm->tcpm_vals[idx]; |
60 | } | 65 | } |
61 | 66 | ||
62 | static u32 tcp_metric_get_jiffies(struct tcp_metrics_block *tm, | ||
63 | enum tcp_metric_index idx) | ||
64 | { | ||
65 | return msecs_to_jiffies(tm->tcpm_vals[idx]); | ||
66 | } | ||
67 | |||
68 | static void tcp_metric_set(struct tcp_metrics_block *tm, | 67 | static void tcp_metric_set(struct tcp_metrics_block *tm, |
69 | enum tcp_metric_index idx, | 68 | enum tcp_metric_index idx, |
70 | u32 val) | 69 | u32 val) |
@@ -72,13 +71,6 @@ static void tcp_metric_set(struct tcp_metrics_block *tm, | |||
72 | tm->tcpm_vals[idx] = val; | 71 | tm->tcpm_vals[idx] = val; |
73 | } | 72 | } |
74 | 73 | ||
75 | static void tcp_metric_set_msecs(struct tcp_metrics_block *tm, | ||
76 | enum tcp_metric_index idx, | ||
77 | u32 val) | ||
78 | { | ||
79 | tm->tcpm_vals[idx] = jiffies_to_msecs(val); | ||
80 | } | ||
81 | |||
82 | static bool addr_same(const struct inetpeer_addr *a, | 74 | static bool addr_same(const struct inetpeer_addr *a, |
83 | const struct inetpeer_addr *b) | 75 | const struct inetpeer_addr *b) |
84 | { | 76 | { |
@@ -101,9 +93,11 @@ struct tcpm_hash_bucket { | |||
101 | 93 | ||
102 | static DEFINE_SPINLOCK(tcp_metrics_lock); | 94 | static DEFINE_SPINLOCK(tcp_metrics_lock); |
103 | 95 | ||
104 | static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst, | 96 | static void tcpm_suck_dst(struct tcp_metrics_block *tm, |
97 | const struct dst_entry *dst, | ||
105 | bool fastopen_clear) | 98 | bool fastopen_clear) |
106 | { | 99 | { |
100 | u32 msval; | ||
107 | u32 val; | 101 | u32 val; |
108 | 102 | ||
109 | tm->tcpm_stamp = jiffies; | 103 | tm->tcpm_stamp = jiffies; |
@@ -121,8 +115,11 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst, | |||
121 | val |= 1 << TCP_METRIC_REORDERING; | 115 | val |= 1 << TCP_METRIC_REORDERING; |
122 | tm->tcpm_lock = val; | 116 | tm->tcpm_lock = val; |
123 | 117 | ||
124 | tm->tcpm_vals[TCP_METRIC_RTT] = dst_metric_raw(dst, RTAX_RTT); | 118 | msval = dst_metric_raw(dst, RTAX_RTT); |
125 | tm->tcpm_vals[TCP_METRIC_RTTVAR] = dst_metric_raw(dst, RTAX_RTTVAR); | 119 | tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC; |
120 | |||
121 | msval = dst_metric_raw(dst, RTAX_RTTVAR); | ||
122 | tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC; | ||
126 | tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); | 123 | tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); |
127 | tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); | 124 | tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); |
128 | tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); | 125 | tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); |
@@ -384,7 +381,7 @@ void tcp_update_metrics(struct sock *sk) | |||
384 | dst_confirm(dst); | 381 | dst_confirm(dst); |
385 | 382 | ||
386 | rcu_read_lock(); | 383 | rcu_read_lock(); |
387 | if (icsk->icsk_backoff || !tp->srtt) { | 384 | if (icsk->icsk_backoff || !tp->srtt_us) { |
388 | /* This session failed to estimate rtt. Why? | 385 | /* This session failed to estimate rtt. Why? |
389 | * Probably, no packets returned in time. Reset our | 386 | * Probably, no packets returned in time. Reset our |
390 | * results. | 387 | * results. |
@@ -399,8 +396,8 @@ void tcp_update_metrics(struct sock *sk) | |||
399 | if (!tm) | 396 | if (!tm) |
400 | goto out_unlock; | 397 | goto out_unlock; |
401 | 398 | ||
402 | rtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); | 399 | rtt = tcp_metric_get(tm, TCP_METRIC_RTT); |
403 | m = rtt - tp->srtt; | 400 | m = rtt - tp->srtt_us; |
404 | 401 | ||
405 | /* If newly calculated rtt larger than stored one, store new | 402 | /* If newly calculated rtt larger than stored one, store new |
406 | * one. Otherwise, use EWMA. Remember, rtt overestimation is | 403 | * one. Otherwise, use EWMA. Remember, rtt overestimation is |
@@ -408,10 +405,10 @@ void tcp_update_metrics(struct sock *sk) | |||
408 | */ | 405 | */ |
409 | if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) { | 406 | if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) { |
410 | if (m <= 0) | 407 | if (m <= 0) |
411 | rtt = tp->srtt; | 408 | rtt = tp->srtt_us; |
412 | else | 409 | else |
413 | rtt -= (m >> 3); | 410 | rtt -= (m >> 3); |
414 | tcp_metric_set_msecs(tm, TCP_METRIC_RTT, rtt); | 411 | tcp_metric_set(tm, TCP_METRIC_RTT, rtt); |
415 | } | 412 | } |
416 | 413 | ||
417 | if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) { | 414 | if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) { |
@@ -422,16 +419,16 @@ void tcp_update_metrics(struct sock *sk) | |||
422 | 419 | ||
423 | /* Scale deviation to rttvar fixed point */ | 420 | /* Scale deviation to rttvar fixed point */ |
424 | m >>= 1; | 421 | m >>= 1; |
425 | if (m < tp->mdev) | 422 | if (m < tp->mdev_us) |
426 | m = tp->mdev; | 423 | m = tp->mdev_us; |
427 | 424 | ||
428 | var = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR); | 425 | var = tcp_metric_get(tm, TCP_METRIC_RTTVAR); |
429 | if (m >= var) | 426 | if (m >= var) |
430 | var = m; | 427 | var = m; |
431 | else | 428 | else |
432 | var -= (var - m) >> 2; | 429 | var -= (var - m) >> 2; |
433 | 430 | ||
434 | tcp_metric_set_msecs(tm, TCP_METRIC_RTTVAR, var); | 431 | tcp_metric_set(tm, TCP_METRIC_RTTVAR, var); |
435 | } | 432 | } |
436 | 433 | ||
437 | if (tcp_in_initial_slowstart(tp)) { | 434 | if (tcp_in_initial_slowstart(tp)) { |
@@ -528,7 +525,7 @@ void tcp_init_metrics(struct sock *sk) | |||
528 | tp->reordering = val; | 525 | tp->reordering = val; |
529 | } | 526 | } |
530 | 527 | ||
531 | crtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT); | 528 | crtt = tcp_metric_get(tm, TCP_METRIC_RTT); |
532 | rcu_read_unlock(); | 529 | rcu_read_unlock(); |
533 | reset: | 530 | reset: |
534 | /* The initial RTT measurement from the SYN/SYN-ACK is not ideal | 531 | /* The initial RTT measurement from the SYN/SYN-ACK is not ideal |
@@ -551,18 +548,20 @@ reset: | |||
551 | * to low value, and then abruptly stops to do it and starts to delay | 548 | * to low value, and then abruptly stops to do it and starts to delay |
552 | * ACKs, wait for troubles. | 549 | * ACKs, wait for troubles. |
553 | */ | 550 | */ |
554 | if (crtt > tp->srtt) { | 551 | if (crtt > tp->srtt_us) { |
555 | /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */ | 552 | /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */ |
556 | crtt >>= 3; | 553 | crtt /= 8 * USEC_PER_MSEC; |
557 | inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk)); | 554 | inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk)); |
558 | } else if (tp->srtt == 0) { | 555 | } else if (tp->srtt_us == 0) { |
559 | /* RFC6298: 5.7 We've failed to get a valid RTT sample from | 556 | /* RFC6298: 5.7 We've failed to get a valid RTT sample from |
560 | * 3WHS. This is most likely due to retransmission, | 557 | * 3WHS. This is most likely due to retransmission, |
561 | * including spurious one. Reset the RTO back to 3secs | 558 | * including spurious one. Reset the RTO back to 3secs |
562 | * from the more aggressive 1sec to avoid more spurious | 559 | * from the more aggressive 1sec to avoid more spurious |
563 | * retransmission. | 560 | * retransmission. |
564 | */ | 561 | */ |
565 | tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; | 562 | tp->rttvar_us = jiffies_to_usecs(TCP_TIMEOUT_FALLBACK); |
563 | tp->mdev_us = tp->mdev_max_us = tp->rttvar_us; | ||
564 | |||
566 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; | 565 | inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK; |
567 | } | 566 | } |
568 | /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been | 567 | /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been |
@@ -809,10 +808,26 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, | |||
809 | nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS); | 808 | nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS); |
810 | if (!nest) | 809 | if (!nest) |
811 | goto nla_put_failure; | 810 | goto nla_put_failure; |
812 | for (i = 0; i < TCP_METRIC_MAX + 1; i++) { | 811 | for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) { |
813 | if (!tm->tcpm_vals[i]) | 812 | u32 val = tm->tcpm_vals[i]; |
813 | |||
814 | if (!val) | ||
814 | continue; | 815 | continue; |
815 | if (nla_put_u32(msg, i + 1, tm->tcpm_vals[i]) < 0) | 816 | if (i == TCP_METRIC_RTT) { |
817 | if (nla_put_u32(msg, TCP_METRIC_RTT_US + 1, | ||
818 | val) < 0) | ||
819 | goto nla_put_failure; | ||
820 | n++; | ||
821 | val = max(val / 1000, 1U); | ||
822 | } | ||
823 | if (i == TCP_METRIC_RTTVAR) { | ||
824 | if (nla_put_u32(msg, TCP_METRIC_RTTVAR_US + 1, | ||
825 | val) < 0) | ||
826 | goto nla_put_failure; | ||
827 | n++; | ||
828 | val = max(val / 1000, 1U); | ||
829 | } | ||
830 | if (nla_put_u32(msg, i + 1, val) < 0) | ||
816 | goto nla_put_failure; | 831 | goto nla_put_failure; |
817 | n++; | 832 | n++; |
818 | } | 833 | } |