diff options
author | Eric Dumazet <edumazet@google.com> | 2018-04-16 13:33:35 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-04-16 18:26:37 -0400 |
commit | d1361840f8c519eaee9a78ffe09e4f0a1b586846 (patch) | |
tree | 86a904ade99a93544e0817cda7dc842b12f9b833 | |
parent | 10b19aeac1700c3ba94fb50583a766d9cdaf1e9e (diff) |
tcp: fix SO_RCVLOWAT and RCVBUF autotuning
Applications might use SO_RCVLOWAT on TCP socket hoping to receive
one [E]POLLIN event only when a given amount of bytes are ready in socket
receive queue.
Problem is that receive autotuning is not aware of this constraint,
meaning sk_rcvbuf might be too small to allow all bytes to be stored.
Add a new (struct proto_ops)->set_rcvlowat method so that a protocol
can override the default setsockopt(SO_RCVLOWAT) behavior.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/net.h | 1 | ||||
-rw-r--r-- | include/net/tcp.h | 1 | ||||
-rw-r--r-- | net/core/sock.c | 5 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 21 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 1 |
6 files changed, 29 insertions, 1 deletions
diff --git a/include/linux/net.h b/include/linux/net.h index 2248a052061d..6554d3ba4396 100644 --- a/include/linux/net.h +++ b/include/linux/net.h | |||
@@ -197,6 +197,7 @@ struct proto_ops { | |||
197 | int offset, size_t size, int flags); | 197 | int offset, size_t size, int flags); |
198 | int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg, | 198 | int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg, |
199 | size_t size); | 199 | size_t size); |
200 | int (*set_rcvlowat)(struct sock *sk, int val); | ||
200 | }; | 201 | }; |
201 | 202 | ||
202 | #define DECLARE_SOCKADDR(type, dst, src) \ | 203 | #define DECLARE_SOCKADDR(type, dst, src) \ |
diff --git a/include/net/tcp.h b/include/net/tcp.h index 9c9b3768b350..b2318242cad8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -402,6 +402,7 @@ void tcp_set_keepalive(struct sock *sk, int val); | |||
402 | void tcp_syn_ack_timeout(const struct request_sock *req); | 402 | void tcp_syn_ack_timeout(const struct request_sock *req); |
403 | int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, | 403 | int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, |
404 | int flags, int *addr_len); | 404 | int flags, int *addr_len); |
405 | int tcp_set_rcvlowat(struct sock *sk, int val); | ||
405 | void tcp_parse_options(const struct net *net, const struct sk_buff *skb, | 406 | void tcp_parse_options(const struct net *net, const struct sk_buff *skb, |
406 | struct tcp_options_received *opt_rx, | 407 | struct tcp_options_received *opt_rx, |
407 | int estab, struct tcp_fastopen_cookie *foc); | 408 | int estab, struct tcp_fastopen_cookie *foc); |
diff --git a/net/core/sock.c b/net/core/sock.c index 6444525f610c..b2c3db169ca1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -905,7 +905,10 @@ set_rcvbuf: | |||
905 | case SO_RCVLOWAT: | 905 | case SO_RCVLOWAT: |
906 | if (val < 0) | 906 | if (val < 0) |
907 | val = INT_MAX; | 907 | val = INT_MAX; |
908 | sk->sk_rcvlowat = val ? : 1; | 908 | if (sock->ops->set_rcvlowat) |
909 | ret = sock->ops->set_rcvlowat(sk, val); | ||
910 | else | ||
911 | sk->sk_rcvlowat = val ? : 1; | ||
909 | break; | 912 | break; |
910 | 913 | ||
911 | case SO_RCVTIMEO: | 914 | case SO_RCVTIMEO: |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index eaed0367e669..f5c562aaef35 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -1006,6 +1006,7 @@ const struct proto_ops inet_stream_ops = { | |||
1006 | .compat_getsockopt = compat_sock_common_getsockopt, | 1006 | .compat_getsockopt = compat_sock_common_getsockopt, |
1007 | .compat_ioctl = inet_compat_ioctl, | 1007 | .compat_ioctl = inet_compat_ioctl, |
1008 | #endif | 1008 | #endif |
1009 | .set_rcvlowat = tcp_set_rcvlowat, | ||
1009 | }; | 1010 | }; |
1010 | EXPORT_SYMBOL(inet_stream_ops); | 1011 | EXPORT_SYMBOL(inet_stream_ops); |
1011 | 1012 | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bccc4c270087..0abd8d1d3d1d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -1701,6 +1701,27 @@ int tcp_peek_len(struct socket *sock) | |||
1701 | } | 1701 | } |
1702 | EXPORT_SYMBOL(tcp_peek_len); | 1702 | EXPORT_SYMBOL(tcp_peek_len); |
1703 | 1703 | ||
1704 | /* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */ | ||
1705 | int tcp_set_rcvlowat(struct sock *sk, int val) | ||
1706 | { | ||
1707 | sk->sk_rcvlowat = val ? : 1; | ||
1708 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) | ||
1709 | return 0; | ||
1710 | |||
1711 | /* val comes from user space and might be close to INT_MAX */ | ||
1712 | val <<= 1; | ||
1713 | if (val < 0) | ||
1714 | val = INT_MAX; | ||
1715 | |||
1716 | val = min(val, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); | ||
1717 | if (val > sk->sk_rcvbuf) { | ||
1718 | sk->sk_rcvbuf = val; | ||
1719 | tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val); | ||
1720 | } | ||
1721 | return 0; | ||
1722 | } | ||
1723 | EXPORT_SYMBOL(tcp_set_rcvlowat); | ||
1724 | |||
1704 | static void tcp_update_recv_tstamps(struct sk_buff *skb, | 1725 | static void tcp_update_recv_tstamps(struct sk_buff *skb, |
1705 | struct scm_timestamping *tss) | 1726 | struct scm_timestamping *tss) |
1706 | { | 1727 | { |
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 8da0b513f188..e70d59fb26e1 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c | |||
@@ -590,6 +590,7 @@ const struct proto_ops inet6_stream_ops = { | |||
590 | .compat_setsockopt = compat_sock_common_setsockopt, | 590 | .compat_setsockopt = compat_sock_common_setsockopt, |
591 | .compat_getsockopt = compat_sock_common_getsockopt, | 591 | .compat_getsockopt = compat_sock_common_getsockopt, |
592 | #endif | 592 | #endif |
593 | .set_rcvlowat = tcp_set_rcvlowat, | ||
593 | }; | 594 | }; |
594 | 595 | ||
595 | const struct proto_ops inet6_dgram_ops = { | 596 | const struct proto_ops inet6_dgram_ops = { |