aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorRainer Weikusat <rweikusat@mssgmbh.com>2008-06-18 01:28:05 -0400
committerDavid S. Miller <davem@davemloft.net>2008-06-18 01:28:05 -0400
commit3c73419c09a5ef73d56472dbfdade9e311496e9b (patch)
tree19dc2714a4649445bc7cbcd06c0d1851962d41fc /net
parent4552e1198a08198ce0b42e856845b5394c82c59c (diff)
af_unix: fix 'poll for write'/ connected DGRAM sockets
The unix_dgram_sendmsg routine implements a (somewhat crude) form of receiver-imposed flow control by comparing the length of the receive queue of the 'peer socket' with the max_ack_backlog value stored in the corresponding sock structure, either blocking the thread which caused the send-routine to be called or returning EAGAIN. This routine is used by both SOCK_DGRAM and SOCK_SEQPACKET sockets. The poll-implementation for these socket types is datagram_poll from core/datagram.c. A socket is deemed to be writeable by this routine when the memory presently consumed by datagrams owned by it is less than the configured socket send buffer size. This is always wrong for connected PF_UNIX non-stream sockets when the abovementioned receive queue is currently considered to be full. 'poll' will then return, indicating that the socket is writeable, but a subsequent write result in EAGAIN, effectively causing an (usual) application to 'poll for writeability by repeated send request with O_NONBLOCK set' until it has consumed its time quantum. The change below uses a suitably modified variant of the datagram_poll routines for both type of PF_UNIX sockets, which tests if the recv-queue of the peer a socket is connected to is presently considered to be 'full' as part of the 'is this socket writeable'-checking code. The socket being polled is additionally put onto the peer_wait wait queue associated with its peer, because the unix_dgram_sendmsg routine does a wake up on this queue after a datagram was received and the 'other wakeup call' is done implicitly as part of skb destruction, meaning, a process blocked in poll because of a full peer receive queue could otherwise sleep forever if no datagram owned by its socket was already sitting on this queue. Among this change is a small (inline) helper routine named 'unix_recvq_full', which consolidates the actual testing code (in three different places) into a single location. Signed-off-by: Rainer Weikusat <rweikusat@mssgmbh.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/unix/af_unix.c79
1 files changed, 70 insertions, 9 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e18cd3628db4..657835f227d3 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -169,6 +169,11 @@ static inline int unix_may_send(struct sock *sk, struct sock *osk)
169 return (unix_peer(osk) == NULL || unix_our_peer(sk, osk)); 169 return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
170} 170}
171 171
172static inline int unix_recvq_full(struct sock const *sk)
173{
174 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
175}
176
172static struct sock *unix_peer_get(struct sock *s) 177static struct sock *unix_peer_get(struct sock *s)
173{ 178{
174 struct sock *peer; 179 struct sock *peer;
@@ -482,6 +487,8 @@ static int unix_socketpair(struct socket *, struct socket *);
482static int unix_accept(struct socket *, struct socket *, int); 487static int unix_accept(struct socket *, struct socket *, int);
483static int unix_getname(struct socket *, struct sockaddr *, int *, int); 488static int unix_getname(struct socket *, struct sockaddr *, int *, int);
484static unsigned int unix_poll(struct file *, struct socket *, poll_table *); 489static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
490static unsigned int unix_datagram_poll(struct file *, struct socket *,
491 poll_table *);
485static int unix_ioctl(struct socket *, unsigned int, unsigned long); 492static int unix_ioctl(struct socket *, unsigned int, unsigned long);
486static int unix_shutdown(struct socket *, int); 493static int unix_shutdown(struct socket *, int);
487static int unix_stream_sendmsg(struct kiocb *, struct socket *, 494static int unix_stream_sendmsg(struct kiocb *, struct socket *,
@@ -527,7 +534,7 @@ static const struct proto_ops unix_dgram_ops = {
527 .socketpair = unix_socketpair, 534 .socketpair = unix_socketpair,
528 .accept = sock_no_accept, 535 .accept = sock_no_accept,
529 .getname = unix_getname, 536 .getname = unix_getname,
530 .poll = datagram_poll, 537 .poll = unix_datagram_poll,
531 .ioctl = unix_ioctl, 538 .ioctl = unix_ioctl,
532 .listen = sock_no_listen, 539 .listen = sock_no_listen,
533 .shutdown = unix_shutdown, 540 .shutdown = unix_shutdown,
@@ -548,7 +555,7 @@ static const struct proto_ops unix_seqpacket_ops = {
548 .socketpair = unix_socketpair, 555 .socketpair = unix_socketpair,
549 .accept = unix_accept, 556 .accept = unix_accept,
550 .getname = unix_getname, 557 .getname = unix_getname,
551 .poll = datagram_poll, 558 .poll = unix_datagram_poll,
552 .ioctl = unix_ioctl, 559 .ioctl = unix_ioctl,
553 .listen = unix_listen, 560 .listen = unix_listen,
554 .shutdown = unix_shutdown, 561 .shutdown = unix_shutdown,
@@ -983,8 +990,7 @@ static long unix_wait_for_peer(struct sock *other, long timeo)
983 990
984 sched = !sock_flag(other, SOCK_DEAD) && 991 sched = !sock_flag(other, SOCK_DEAD) &&
985 !(other->sk_shutdown & RCV_SHUTDOWN) && 992 !(other->sk_shutdown & RCV_SHUTDOWN) &&
986 (skb_queue_len(&other->sk_receive_queue) > 993 unix_recvq_full(other);
987 other->sk_max_ack_backlog);
988 994
989 unix_state_unlock(other); 995 unix_state_unlock(other);
990 996
@@ -1058,8 +1064,7 @@ restart:
1058 if (other->sk_state != TCP_LISTEN) 1064 if (other->sk_state != TCP_LISTEN)
1059 goto out_unlock; 1065 goto out_unlock;
1060 1066
1061 if (skb_queue_len(&other->sk_receive_queue) > 1067 if (unix_recvq_full(other)) {
1062 other->sk_max_ack_backlog) {
1063 err = -EAGAIN; 1068 err = -EAGAIN;
1064 if (!timeo) 1069 if (!timeo)
1065 goto out_unlock; 1070 goto out_unlock;
@@ -1428,9 +1433,7 @@ restart:
1428 goto out_unlock; 1433 goto out_unlock;
1429 } 1434 }
1430 1435
1431 if (unix_peer(other) != sk && 1436 if (unix_peer(other) != sk && unix_recvq_full(other)) {
1432 (skb_queue_len(&other->sk_receive_queue) >
1433 other->sk_max_ack_backlog)) {
1434 if (!timeo) { 1437 if (!timeo) {
1435 err = -EAGAIN; 1438 err = -EAGAIN;
1436 goto out_unlock; 1439 goto out_unlock;
@@ -1991,6 +1994,64 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl
1991 return mask; 1994 return mask;
1992} 1995}
1993 1996
1997static unsigned int unix_datagram_poll(struct file *file, struct socket *sock,
1998 poll_table *wait)
1999{
2000 struct sock *sk = sock->sk, *peer;
2001 unsigned int mask;
2002
2003 poll_wait(file, sk->sk_sleep, wait);
2004
2005 peer = unix_peer_get(sk);
2006 if (peer) {
2007 if (peer != sk) {
2008 /*
2009 * Writability of a connected socket additionally
2010 * depends on the state of the receive queue of the
2011 * peer.
2012 */
2013 poll_wait(file, &unix_sk(peer)->peer_wait, wait);
2014 } else {
2015 sock_put(peer);
2016 peer = NULL;
2017 }
2018 }
2019
2020 mask = 0;
2021
2022 /* exceptional events? */
2023 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2024 mask |= POLLERR;
2025 if (sk->sk_shutdown & RCV_SHUTDOWN)
2026 mask |= POLLRDHUP;
2027 if (sk->sk_shutdown == SHUTDOWN_MASK)
2028 mask |= POLLHUP;
2029
2030 /* readable? */
2031 if (!skb_queue_empty(&sk->sk_receive_queue) ||
2032 (sk->sk_shutdown & RCV_SHUTDOWN))
2033 mask |= POLLIN | POLLRDNORM;
2034
2035 /* Connection-based need to check for termination and startup */
2036 if (sk->sk_type == SOCK_SEQPACKET) {
2037 if (sk->sk_state == TCP_CLOSE)
2038 mask |= POLLHUP;
2039 /* connection hasn't started yet? */
2040 if (sk->sk_state == TCP_SYN_SENT)
2041 return mask;
2042 }
2043
2044 /* writable? */
2045 if (unix_writable(sk) && !(peer && unix_recvq_full(peer)))
2046 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2047 else
2048 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2049
2050 if (peer)
2051 sock_put(peer);
2052
2053 return mask;
2054}
1994 2055
1995#ifdef CONFIG_PROC_FS 2056#ifdef CONFIG_PROC_FS
1996static struct sock *first_unix_socket(int *i) 2057static struct sock *first_unix_socket(int *i)