aboutsummaryrefslogtreecommitdiffstats
path: root/net/unix
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2012-01-28 11:11:03 -0500
committerDavid S. Miller <davem@davemloft.net>2012-01-30 12:45:07 -0500
commit6f01fd6e6f6809061b56e78f1e8d143099716d70 (patch)
treebd6b09a01a5f1af65fc12c6ac533f2b1f4caa777 /net/unix
parent5b35e1e6e9ca651e6b291c96d1106043c9af314a (diff)
af_unix: fix EPOLLET regression for stream sockets
Commit 0884d7aa24 (AF_UNIX: Fix poll blocking problem when reading from a stream socket) added a regression for epoll() in Edge Triggered mode (EPOLLET) Appropriate fix is to use skb_peek()/skb_unlink() instead of skb_dequeue(), and only call skb_unlink() when skb is fully consumed. This remove the need to requeue a partial skb into sk_receive_queue head and the extra sk->sk_data_ready() calls that added the regression. This is safe because once skb is given to sk_receive_queue, it is not modified by a writer, and readers are serialized by u->readlock mutex. This also reduce number of spinlock acquisition for small reads or MSG_PEEK users so should improve overall performance. Reported-by: Nick Mathewson <nickm@freehaven.net> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Alexey Moiseytsev <himeraster@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/unix')
-rw-r--r--net/unix/af_unix.c19
1 files changed, 4 insertions, 15 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index aad8fb699989..85d3bb7490aa 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1918,7 +1918,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1918 struct sk_buff *skb; 1918 struct sk_buff *skb;
1919 1919
1920 unix_state_lock(sk); 1920 unix_state_lock(sk);
1921 skb = skb_dequeue(&sk->sk_receive_queue); 1921 skb = skb_peek(&sk->sk_receive_queue);
1922 if (skb == NULL) { 1922 if (skb == NULL) {
1923 unix_sk(sk)->recursion_level = 0; 1923 unix_sk(sk)->recursion_level = 0;
1924 if (copied >= target) 1924 if (copied >= target)
@@ -1958,11 +1958,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1958 if (check_creds) { 1958 if (check_creds) {
1959 /* Never glue messages from different writers */ 1959 /* Never glue messages from different writers */
1960 if ((UNIXCB(skb).pid != siocb->scm->pid) || 1960 if ((UNIXCB(skb).pid != siocb->scm->pid) ||
1961 (UNIXCB(skb).cred != siocb->scm->cred)) { 1961 (UNIXCB(skb).cred != siocb->scm->cred))
1962 skb_queue_head(&sk->sk_receive_queue, skb);
1963 sk->sk_data_ready(sk, skb->len);
1964 break; 1962 break;
1965 }
1966 } else { 1963 } else {
1967 /* Copy credentials */ 1964 /* Copy credentials */
1968 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); 1965 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
@@ -1977,8 +1974,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1977 1974
1978 chunk = min_t(unsigned int, skb->len, size); 1975 chunk = min_t(unsigned int, skb->len, size);
1979 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { 1976 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1980 skb_queue_head(&sk->sk_receive_queue, skb);
1981 sk->sk_data_ready(sk, skb->len);
1982 if (copied == 0) 1977 if (copied == 0)
1983 copied = -EFAULT; 1978 copied = -EFAULT;
1984 break; 1979 break;
@@ -1993,13 +1988,10 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1993 if (UNIXCB(skb).fp) 1988 if (UNIXCB(skb).fp)
1994 unix_detach_fds(siocb->scm, skb); 1989 unix_detach_fds(siocb->scm, skb);
1995 1990
1996 /* put the skb back if we didn't use it up.. */ 1991 if (skb->len)
1997 if (skb->len) {
1998 skb_queue_head(&sk->sk_receive_queue, skb);
1999 sk->sk_data_ready(sk, skb->len);
2000 break; 1992 break;
2001 }
2002 1993
1994 skb_unlink(skb, &sk->sk_receive_queue);
2003 consume_skb(skb); 1995 consume_skb(skb);
2004 1996
2005 if (siocb->scm->fp) 1997 if (siocb->scm->fp)
@@ -2010,9 +2002,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
2010 if (UNIXCB(skb).fp) 2002 if (UNIXCB(skb).fp)
2011 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); 2003 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2012 2004
2013 /* put message back and return */
2014 skb_queue_head(&sk->sk_receive_queue, skb);
2015 sk->sk_data_ready(sk, skb->len);
2016 break; 2005 break;
2017 } 2006 }
2018 } while (size); 2007 } while (size);