diff options
| author | Eric Dumazet <eric.dumazet@gmail.com> | 2012-01-28 11:11:03 -0500 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2012-01-30 12:45:07 -0500 |
| commit | 6f01fd6e6f6809061b56e78f1e8d143099716d70 (patch) | |
| tree | bd6b09a01a5f1af65fc12c6ac533f2b1f4caa777 /net/unix | |
| parent | 5b35e1e6e9ca651e6b291c96d1106043c9af314a (diff) | |
af_unix: fix EPOLLET regression for stream sockets
Commit 0884d7aa24 (AF_UNIX: Fix poll blocking problem when reading from
a stream socket) added a regression for epoll() in Edge Triggered mode
(EPOLLET)
Appropriate fix is to use skb_peek()/skb_unlink() instead of
skb_dequeue(), and only call skb_unlink() when skb is fully consumed.
This remove the need to requeue a partial skb into sk_receive_queue head
and the extra sk->sk_data_ready() calls that added the regression.
This is safe because once skb is given to sk_receive_queue, it is not
modified by a writer, and readers are serialized by u->readlock mutex.
This also reduce number of spinlock acquisition for small reads or
MSG_PEEK users so should improve overall performance.
Reported-by: Nick Mathewson <nickm@freehaven.net>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Alexey Moiseytsev <himeraster@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/unix')
| -rw-r--r-- | net/unix/af_unix.c | 19 |
1 files changed, 4 insertions, 15 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index aad8fb699989..85d3bb7490aa 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
| @@ -1918,7 +1918,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
| 1918 | struct sk_buff *skb; | 1918 | struct sk_buff *skb; |
| 1919 | 1919 | ||
| 1920 | unix_state_lock(sk); | 1920 | unix_state_lock(sk); |
| 1921 | skb = skb_dequeue(&sk->sk_receive_queue); | 1921 | skb = skb_peek(&sk->sk_receive_queue); |
| 1922 | if (skb == NULL) { | 1922 | if (skb == NULL) { |
| 1923 | unix_sk(sk)->recursion_level = 0; | 1923 | unix_sk(sk)->recursion_level = 0; |
| 1924 | if (copied >= target) | 1924 | if (copied >= target) |
| @@ -1958,11 +1958,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
| 1958 | if (check_creds) { | 1958 | if (check_creds) { |
| 1959 | /* Never glue messages from different writers */ | 1959 | /* Never glue messages from different writers */ |
| 1960 | if ((UNIXCB(skb).pid != siocb->scm->pid) || | 1960 | if ((UNIXCB(skb).pid != siocb->scm->pid) || |
| 1961 | (UNIXCB(skb).cred != siocb->scm->cred)) { | 1961 | (UNIXCB(skb).cred != siocb->scm->cred)) |
| 1962 | skb_queue_head(&sk->sk_receive_queue, skb); | ||
| 1963 | sk->sk_data_ready(sk, skb->len); | ||
| 1964 | break; | 1962 | break; |
| 1965 | } | ||
| 1966 | } else { | 1963 | } else { |
| 1967 | /* Copy credentials */ | 1964 | /* Copy credentials */ |
| 1968 | scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); | 1965 | scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); |
| @@ -1977,8 +1974,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
| 1977 | 1974 | ||
| 1978 | chunk = min_t(unsigned int, skb->len, size); | 1975 | chunk = min_t(unsigned int, skb->len, size); |
| 1979 | if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { | 1976 | if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { |
| 1980 | skb_queue_head(&sk->sk_receive_queue, skb); | ||
| 1981 | sk->sk_data_ready(sk, skb->len); | ||
| 1982 | if (copied == 0) | 1977 | if (copied == 0) |
| 1983 | copied = -EFAULT; | 1978 | copied = -EFAULT; |
| 1984 | break; | 1979 | break; |
| @@ -1993,13 +1988,10 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
| 1993 | if (UNIXCB(skb).fp) | 1988 | if (UNIXCB(skb).fp) |
| 1994 | unix_detach_fds(siocb->scm, skb); | 1989 | unix_detach_fds(siocb->scm, skb); |
| 1995 | 1990 | ||
| 1996 | /* put the skb back if we didn't use it up.. */ | 1991 | if (skb->len) |
| 1997 | if (skb->len) { | ||
| 1998 | skb_queue_head(&sk->sk_receive_queue, skb); | ||
| 1999 | sk->sk_data_ready(sk, skb->len); | ||
| 2000 | break; | 1992 | break; |
| 2001 | } | ||
| 2002 | 1993 | ||
| 1994 | skb_unlink(skb, &sk->sk_receive_queue); | ||
| 2003 | consume_skb(skb); | 1995 | consume_skb(skb); |
| 2004 | 1996 | ||
| 2005 | if (siocb->scm->fp) | 1997 | if (siocb->scm->fp) |
| @@ -2010,9 +2002,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
| 2010 | if (UNIXCB(skb).fp) | 2002 | if (UNIXCB(skb).fp) |
| 2011 | siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); | 2003 | siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); |
| 2012 | 2004 | ||
| 2013 | /* put message back and return */ | ||
| 2014 | skb_queue_head(&sk->sk_receive_queue, skb); | ||
| 2015 | sk->sk_data_ready(sk, skb->len); | ||
| 2016 | break; | 2005 | break; |
| 2017 | } | 2006 | } |
| 2018 | } while (size); | 2007 | } while (size); |
