diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2012-01-28 11:11:03 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-01-30 12:45:07 -0500 |
commit | 6f01fd6e6f6809061b56e78f1e8d143099716d70 (patch) | |
tree | bd6b09a01a5f1af65fc12c6ac533f2b1f4caa777 /net | |
parent | 5b35e1e6e9ca651e6b291c96d1106043c9af314a (diff) |
af_unix: fix EPOLLET regression for stream sockets
Commit 0884d7aa24 (AF_UNIX: Fix poll blocking problem when reading from
a stream socket) added a regression for epoll() in Edge Triggered mode
(EPOLLET)
Appropriate fix is to use skb_peek()/skb_unlink() instead of
skb_dequeue(), and only call skb_unlink() when skb is fully consumed.
This remove the need to requeue a partial skb into sk_receive_queue head
and the extra sk->sk_data_ready() calls that added the regression.
This is safe because once skb is given to sk_receive_queue, it is not
modified by a writer, and readers are serialized by u->readlock mutex.
This also reduce number of spinlock acquisition for small reads or
MSG_PEEK users so should improve overall performance.
Reported-by: Nick Mathewson <nickm@freehaven.net>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Alexey Moiseytsev <himeraster@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/unix/af_unix.c | 19 |
1 files changed, 4 insertions, 15 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index aad8fb699989..85d3bb7490aa 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
@@ -1918,7 +1918,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1918 | struct sk_buff *skb; | 1918 | struct sk_buff *skb; |
1919 | 1919 | ||
1920 | unix_state_lock(sk); | 1920 | unix_state_lock(sk); |
1921 | skb = skb_dequeue(&sk->sk_receive_queue); | 1921 | skb = skb_peek(&sk->sk_receive_queue); |
1922 | if (skb == NULL) { | 1922 | if (skb == NULL) { |
1923 | unix_sk(sk)->recursion_level = 0; | 1923 | unix_sk(sk)->recursion_level = 0; |
1924 | if (copied >= target) | 1924 | if (copied >= target) |
@@ -1958,11 +1958,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1958 | if (check_creds) { | 1958 | if (check_creds) { |
1959 | /* Never glue messages from different writers */ | 1959 | /* Never glue messages from different writers */ |
1960 | if ((UNIXCB(skb).pid != siocb->scm->pid) || | 1960 | if ((UNIXCB(skb).pid != siocb->scm->pid) || |
1961 | (UNIXCB(skb).cred != siocb->scm->cred)) { | 1961 | (UNIXCB(skb).cred != siocb->scm->cred)) |
1962 | skb_queue_head(&sk->sk_receive_queue, skb); | ||
1963 | sk->sk_data_ready(sk, skb->len); | ||
1964 | break; | 1962 | break; |
1965 | } | ||
1966 | } else { | 1963 | } else { |
1967 | /* Copy credentials */ | 1964 | /* Copy credentials */ |
1968 | scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); | 1965 | scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); |
@@ -1977,8 +1974,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1977 | 1974 | ||
1978 | chunk = min_t(unsigned int, skb->len, size); | 1975 | chunk = min_t(unsigned int, skb->len, size); |
1979 | if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { | 1976 | if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { |
1980 | skb_queue_head(&sk->sk_receive_queue, skb); | ||
1981 | sk->sk_data_ready(sk, skb->len); | ||
1982 | if (copied == 0) | 1977 | if (copied == 0) |
1983 | copied = -EFAULT; | 1978 | copied = -EFAULT; |
1984 | break; | 1979 | break; |
@@ -1993,13 +1988,10 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1993 | if (UNIXCB(skb).fp) | 1988 | if (UNIXCB(skb).fp) |
1994 | unix_detach_fds(siocb->scm, skb); | 1989 | unix_detach_fds(siocb->scm, skb); |
1995 | 1990 | ||
1996 | /* put the skb back if we didn't use it up.. */ | 1991 | if (skb->len) |
1997 | if (skb->len) { | ||
1998 | skb_queue_head(&sk->sk_receive_queue, skb); | ||
1999 | sk->sk_data_ready(sk, skb->len); | ||
2000 | break; | 1992 | break; |
2001 | } | ||
2002 | 1993 | ||
1994 | skb_unlink(skb, &sk->sk_receive_queue); | ||
2003 | consume_skb(skb); | 1995 | consume_skb(skb); |
2004 | 1996 | ||
2005 | if (siocb->scm->fp) | 1997 | if (siocb->scm->fp) |
@@ -2010,9 +2002,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
2010 | if (UNIXCB(skb).fp) | 2002 | if (UNIXCB(skb).fp) |
2011 | siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); | 2003 | siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); |
2012 | 2004 | ||
2013 | /* put message back and return */ | ||
2014 | skb_queue_head(&sk->sk_receive_queue, skb); | ||
2015 | sk->sk_data_ready(sk, skb->len); | ||
2016 | break; | 2005 | break; |
2017 | } | 2006 | } |
2018 | } while (size); | 2007 | } while (size); |