summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2018-04-16 13:33:35 -0400
committerDavid S. Miller <davem@davemloft.net>2018-04-16 18:26:37 -0400
commitd1361840f8c519eaee9a78ffe09e4f0a1b586846 (patch)
tree86a904ade99a93544e0817cda7dc842b12f9b833
parent10b19aeac1700c3ba94fb50583a766d9cdaf1e9e (diff)
tcp: fix SO_RCVLOWAT and RCVBUF autotuning
Applications might use SO_RCVLOWAT on TCP socket hoping to receive one [E]POLLIN event only when a given amount of bytes are ready in socket receive queue. Problem is that receive autotuning is not aware of this constraint, meaning sk_rcvbuf might be too small to allow all bytes to be stored. Add a new (struct proto_ops)->set_rcvlowat method so that a protocol can override the default setsockopt(SO_RCVLOWAT) behavior. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/net.h1
-rw-r--r--include/net/tcp.h1
-rw-r--r--net/core/sock.c5
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/tcp.c21
-rw-r--r--net/ipv6/af_inet6.c1
6 files changed, 29 insertions, 1 deletions
diff --git a/include/linux/net.h b/include/linux/net.h
index 2248a052061d..6554d3ba4396 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -197,6 +197,7 @@ struct proto_ops {
197 int offset, size_t size, int flags); 197 int offset, size_t size, int flags);
198 int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg, 198 int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
199 size_t size); 199 size_t size);
200 int (*set_rcvlowat)(struct sock *sk, int val);
200}; 201};
201 202
202#define DECLARE_SOCKADDR(type, dst, src) \ 203#define DECLARE_SOCKADDR(type, dst, src) \
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9c9b3768b350..b2318242cad8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -402,6 +402,7 @@ void tcp_set_keepalive(struct sock *sk, int val);
402void tcp_syn_ack_timeout(const struct request_sock *req); 402void tcp_syn_ack_timeout(const struct request_sock *req);
403int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, 403int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
404 int flags, int *addr_len); 404 int flags, int *addr_len);
405int tcp_set_rcvlowat(struct sock *sk, int val);
405void tcp_parse_options(const struct net *net, const struct sk_buff *skb, 406void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
406 struct tcp_options_received *opt_rx, 407 struct tcp_options_received *opt_rx,
407 int estab, struct tcp_fastopen_cookie *foc); 408 int estab, struct tcp_fastopen_cookie *foc);
diff --git a/net/core/sock.c b/net/core/sock.c
index 6444525f610c..b2c3db169ca1 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -905,7 +905,10 @@ set_rcvbuf:
905 case SO_RCVLOWAT: 905 case SO_RCVLOWAT:
906 if (val < 0) 906 if (val < 0)
907 val = INT_MAX; 907 val = INT_MAX;
908 sk->sk_rcvlowat = val ? : 1; 908 if (sock->ops->set_rcvlowat)
909 ret = sock->ops->set_rcvlowat(sk, val);
910 else
911 sk->sk_rcvlowat = val ? : 1;
909 break; 912 break;
910 913
911 case SO_RCVTIMEO: 914 case SO_RCVTIMEO:
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index eaed0367e669..f5c562aaef35 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1006,6 +1006,7 @@ const struct proto_ops inet_stream_ops = {
1006 .compat_getsockopt = compat_sock_common_getsockopt, 1006 .compat_getsockopt = compat_sock_common_getsockopt,
1007 .compat_ioctl = inet_compat_ioctl, 1007 .compat_ioctl = inet_compat_ioctl,
1008#endif 1008#endif
1009 .set_rcvlowat = tcp_set_rcvlowat,
1009}; 1010};
1010EXPORT_SYMBOL(inet_stream_ops); 1011EXPORT_SYMBOL(inet_stream_ops);
1011 1012
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bccc4c270087..0abd8d1d3d1d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1701,6 +1701,27 @@ int tcp_peek_len(struct socket *sock)
1701} 1701}
1702EXPORT_SYMBOL(tcp_peek_len); 1702EXPORT_SYMBOL(tcp_peek_len);
1703 1703
1704/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
1705int tcp_set_rcvlowat(struct sock *sk, int val)
1706{
1707 sk->sk_rcvlowat = val ? : 1;
1708 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
1709 return 0;
1710
1711 /* val comes from user space and might be close to INT_MAX */
1712 val <<= 1;
1713 if (val < 0)
1714 val = INT_MAX;
1715
1716 val = min(val, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
1717 if (val > sk->sk_rcvbuf) {
1718 sk->sk_rcvbuf = val;
1719 tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
1720 }
1721 return 0;
1722}
1723EXPORT_SYMBOL(tcp_set_rcvlowat);
1724
1704static void tcp_update_recv_tstamps(struct sk_buff *skb, 1725static void tcp_update_recv_tstamps(struct sk_buff *skb,
1705 struct scm_timestamping *tss) 1726 struct scm_timestamping *tss)
1706{ 1727{
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 8da0b513f188..e70d59fb26e1 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -590,6 +590,7 @@ const struct proto_ops inet6_stream_ops = {
590 .compat_setsockopt = compat_sock_common_setsockopt, 590 .compat_setsockopt = compat_sock_common_setsockopt,
591 .compat_getsockopt = compat_sock_common_getsockopt, 591 .compat_getsockopt = compat_sock_common_getsockopt,
592#endif 592#endif
593 .set_rcvlowat = tcp_set_rcvlowat,
593}; 594};
594 595
595const struct proto_ops inet6_dgram_ops = { 596const struct proto_ops inet6_dgram_ops = {