summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorFrancis Yan <francisyyan@gmail.com>2016-11-28 02:07:16 -0500
committerDavid S. Miller <davem@davemloft.net>2016-11-30 10:04:24 -0500
commitb0f71bd3e190df827d25d7f19bf09037567f14b7 (patch)
tree4f65ddefad045bc8a14d438a7ea6252212e51d3c /net
parent5615f88614a47d2b802e1d14d31b623696109276 (diff)
tcp: instrument how long TCP is limited by insufficient send buffer
This patch measures the amount of time when TCP runs out of new data to send to the network due to insufficient send buffer, while TCP is still busy delivering (i.e. write queue is not empty). The goal is to indicate either the send buffer autotuning or user SO_SNDBUF setting has resulted network under-utilization. The measurement starts conservatively by checking various conditions to minimize false claims (i.e. under-estimation is more likely). The measurement stops when the SOCK_NOSPACE flag is cleared. But it does not account the time elapsed till the next application write. Also the measurement only starts if the sender is still busy sending data, s.t. the limit accounted is part of the total busy time. Signed-off-by: Francis Yan <francisyyan@gmail.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp.c10
-rw-r--r--net/ipv4/tcp_input.c5
-rw-r--r--net/ipv4/tcp_output.c12
3 files changed, 24 insertions, 3 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 913f9bbfc030..259ffb50e429 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -996,8 +996,11 @@ do_error:
996 goto out; 996 goto out;
997out_err: 997out_err:
998 /* make sure we wake any epoll edge trigger waiter */ 998 /* make sure we wake any epoll edge trigger waiter */
999 if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) 999 if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
1000 err == -EAGAIN)) {
1000 sk->sk_write_space(sk); 1001 sk->sk_write_space(sk);
1002 tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
1003 }
1001 return sk_stream_error(sk, flags, err); 1004 return sk_stream_error(sk, flags, err);
1002} 1005}
1003 1006
@@ -1331,8 +1334,11 @@ do_error:
1331out_err: 1334out_err:
1332 err = sk_stream_error(sk, flags, err); 1335 err = sk_stream_error(sk, flags, err);
1333 /* make sure we wake any epoll edge trigger waiter */ 1336 /* make sure we wake any epoll edge trigger waiter */
1334 if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN)) 1337 if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
1338 err == -EAGAIN)) {
1335 sk->sk_write_space(sk); 1339 sk->sk_write_space(sk);
1340 tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
1341 }
1336 release_sock(sk); 1342 release_sock(sk);
1337 return err; 1343 return err;
1338} 1344}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a5d172761610..56fe736fd64d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5059,8 +5059,11 @@ static void tcp_check_space(struct sock *sk)
5059 /* pairs with tcp_poll() */ 5059 /* pairs with tcp_poll() */
5060 smp_mb__after_atomic(); 5060 smp_mb__after_atomic();
5061 if (sk->sk_socket && 5061 if (sk->sk_socket &&
5062 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) 5062 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
5063 tcp_new_space(sk); 5063 tcp_new_space(sk);
5064 if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
5065 tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
5066 }
5064 } 5067 }
5065} 5068}
5066 5069
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b74444cee24d..d3545d0cff75 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1514,6 +1514,18 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
1514 if (sysctl_tcp_slow_start_after_idle && 1514 if (sysctl_tcp_slow_start_after_idle &&
1515 (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto) 1515 (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
1516 tcp_cwnd_application_limited(sk); 1516 tcp_cwnd_application_limited(sk);
1517
1518 /* The following conditions together indicate the starvation
1519 * is caused by insufficient sender buffer:
1520 * 1) just sent some data (see tcp_write_xmit)
1521 * 2) not cwnd limited (this else condition)
1522 * 3) no more data to send (null tcp_send_head )
1523 * 4) application is hitting buffer limit (SOCK_NOSPACE)
1524 */
1525 if (!tcp_send_head(sk) && sk->sk_socket &&
1526 test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
1527 (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
1528 tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
1517 } 1529 }
1518} 1530}
1519 1531