aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2016-09-19 23:39:16 -0400
committerDavid S. Miller <davem@davemloft.net>2016-09-21 00:23:00 -0400
commiteb8329e0a04db0061f714f033b4454326ba147f4 (patch)
tree802808773223d2aeac8d207debf6fe66fac31d39
parentd7722e8570fc0f1e003cee7cf37694041828918b (diff)
tcp: export data delivery rate
This commit export two new fields in struct tcp_info: tcpi_delivery_rate: The most recent goodput, as measured by tcp_rate_gen(). If the socket is limited by the sending application (e.g., no data to send), it reports the highest measurement instead of the most recent. The unit is bytes per second (like other rate fields in tcp_info). tcpi_delivery_rate_app_limited: A boolean indicating if the goodput was measured when the socket's throughput was limited by the sending application. This delivery rate information can be useful for applications that want to know the current throughput the TCP connection is seeing, e.g. adaptive bitrate video streaming. It can also be very useful for debugging or troubleshooting. Signed-off-by: Van Jacobson <vanj@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Nandita Dukkipati <nanditad@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/tcp.h5
-rw-r--r--include/uapi/linux/tcp.h3
-rw-r--r--net/ipv4/tcp.c11
-rw-r--r--net/ipv4/tcp_rate.c12
4 files changed, 28 insertions, 3 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index fdcd00ffcb66..a17ae7b85218 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -213,7 +213,8 @@ struct tcp_sock {
213 u8 reord; /* reordering detected */ 213 u8 reord; /* reordering detected */
214 } rack; 214 } rack;
215 u16 advmss; /* Advertised MSS */ 215 u16 advmss; /* Advertised MSS */
216 u8 unused; 216 u8 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */
217 unused:7;
217 u8 nonagle : 4,/* Disable Nagle algorithm? */ 218 u8 nonagle : 4,/* Disable Nagle algorithm? */
218 thin_lto : 1,/* Use linear timeouts for thin streams */ 219 thin_lto : 1,/* Use linear timeouts for thin streams */
219 thin_dupack : 1,/* Fast retransmit on first dupack */ 220 thin_dupack : 1,/* Fast retransmit on first dupack */
@@ -271,6 +272,8 @@ struct tcp_sock {
271 u32 app_limited; /* limited until "delivered" reaches this val */ 272 u32 app_limited; /* limited until "delivered" reaches this val */
272 struct skb_mstamp first_tx_mstamp; /* start of window send phase */ 273 struct skb_mstamp first_tx_mstamp; /* start of window send phase */
273 struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */ 274 struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */
275 u32 rate_delivered; /* saved rate sample: packets delivered */
276 u32 rate_interval_us; /* saved rate sample: time elapsed */
274 277
275 u32 rcv_wnd; /* Current receiver window */ 278 u32 rcv_wnd; /* Current receiver window */
276 u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ 279 u32 write_seq; /* Tail(+1) of data held in tcp send buffer */
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 482898fc433a..73ac0db487f8 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -167,6 +167,7 @@ struct tcp_info {
167 __u8 tcpi_backoff; 167 __u8 tcpi_backoff;
168 __u8 tcpi_options; 168 __u8 tcpi_options;
169 __u8 tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4; 169 __u8 tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4;
170 __u8 tcpi_delivery_rate_app_limited:1;
170 171
171 __u32 tcpi_rto; 172 __u32 tcpi_rto;
172 __u32 tcpi_ato; 173 __u32 tcpi_ato;
@@ -211,6 +212,8 @@ struct tcp_info {
211 __u32 tcpi_min_rtt; 212 __u32 tcpi_min_rtt;
212 __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */ 213 __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */
213 __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */ 214 __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */
215
216 __u64 tcpi_delivery_rate;
214}; 217};
215 218
216/* for TCP_MD5SIG socket option */ 219/* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2250f891f931..f253e5019d22 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2712,7 +2712,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2712{ 2712{
2713 const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ 2713 const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
2714 const struct inet_connection_sock *icsk = inet_csk(sk); 2714 const struct inet_connection_sock *icsk = inet_csk(sk);
2715 u32 now = tcp_time_stamp; 2715 u32 now = tcp_time_stamp, intv;
2716 unsigned int start; 2716 unsigned int start;
2717 int notsent_bytes; 2717 int notsent_bytes;
2718 u64 rate64; 2718 u64 rate64;
@@ -2802,6 +2802,15 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2802 info->tcpi_min_rtt = tcp_min_rtt(tp); 2802 info->tcpi_min_rtt = tcp_min_rtt(tp);
2803 info->tcpi_data_segs_in = tp->data_segs_in; 2803 info->tcpi_data_segs_in = tp->data_segs_in;
2804 info->tcpi_data_segs_out = tp->data_segs_out; 2804 info->tcpi_data_segs_out = tp->data_segs_out;
2805
2806 info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0;
2807 rate = READ_ONCE(tp->rate_delivered);
2808 intv = READ_ONCE(tp->rate_interval_us);
2809 if (rate && intv) {
2810 rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
2811 do_div(rate64, intv);
2812 put_unaligned(rate64, &info->tcpi_delivery_rate);
2813 }
2805} 2814}
2806EXPORT_SYMBOL_GPL(tcp_get_info); 2815EXPORT_SYMBOL_GPL(tcp_get_info);
2807 2816
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 52ff84be59ab..9be1581a5a08 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -149,12 +149,22 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
149 * for connections suffer heavy or prolonged losses. 149 * for connections suffer heavy or prolonged losses.
150 */ 150 */
151 if (unlikely(rs->interval_us < tcp_min_rtt(tp))) { 151 if (unlikely(rs->interval_us < tcp_min_rtt(tp))) {
152 rs->interval_us = -1;
153 if (!rs->is_retrans) 152 if (!rs->is_retrans)
154 pr_debug("tcp rate: %ld %d %u %u %u\n", 153 pr_debug("tcp rate: %ld %d %u %u %u\n",
155 rs->interval_us, rs->delivered, 154 rs->interval_us, rs->delivered,
156 inet_csk(sk)->icsk_ca_state, 155 inet_csk(sk)->icsk_ca_state,
157 tp->rx_opt.sack_ok, tcp_min_rtt(tp)); 156 tp->rx_opt.sack_ok, tcp_min_rtt(tp));
157 rs->interval_us = -1;
158 return;
159 }
160
161 /* Record the last non-app-limited or the highest app-limited bw */
162 if (!rs->is_app_limited ||
163 ((u64)rs->delivered * tp->rate_interval_us >=
164 (u64)tp->rate_delivered * rs->interval_us)) {
165 tp->rate_delivered = rs->delivered;
166 tp->rate_interval_us = rs->interval_us;
167 tp->rate_app_limited = rs->is_app_limited;
158 } 168 }
159} 169}
160 170