From ded34e0fe8fe8c2d595bfa30626654e4b87621e0 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 25 Mar 2013 03:18:33 +0000 Subject: unix: fix a race condition in unix_release() As reported by Jan, and others over the past few years, there is a race condition caused by unix_release setting the sock->sk pointer to NULL before properly marking the socket as dead/orphaned. This can cause a problem with the LSM hook security_unix_may_send() if there is another socket attempting to write to this partially released socket in between when sock->sk is set to NULL and it is marked as dead/orphaned. This patch fixes this by only setting sock->sk to NULL after the socket has been marked as dead; I also take the opportunity to make unix_release_sock() a void function as it only ever returned 0/success. Dave, I think this one should go on the -stable pile. Special thanks to Jan for coming up with a reproducer for this problem. Reported-by: Jan Stancek Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/unix/af_unix.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 51be64f163ec..f153a8d6e339 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -382,7 +382,7 @@ static void unix_sock_destructor(struct sock *sk) #endif } -static int unix_release_sock(struct sock *sk, int embrion) +static void unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); struct path path; @@ -451,8 +451,6 @@ static int unix_release_sock(struct sock *sk, int embrion) if (unix_tot_inflight) unix_gc(); /* Garbage collect fds */ - - return 0; } static void init_peercred(struct sock *sk) @@ -699,9 +697,10 @@ static int unix_release(struct socket *sock) if (!sk) return 0; + unix_release_sock(sk, 0); sock->sk = NULL; - return unix_release_sock(sk, 0); + return 0; } static int unix_autobind(struct socket *sock) -- cgit v1.2.2 From 14134f6584212d585b310ce95428014b653dfaf6 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Mon, 25 Mar 2013 17:02:04 +0000 Subject: af_unix: dont send SCM_CREDENTIAL when dest socket is NULL SCM_SCREDENTIALS should apply to write() syscalls only either source or destination socket asserted SOCK_PASSCRED. The original implememtation in maybe_add_creds is wrong, and breaks several LSB testcases ( i.e. /tset/LSB.os/netowkr/recvfrom/T.recvfrom). Origionally-authored-by: Karel Srot Signed-off-by: Ding Tianhong Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/unix/af_unix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f153a8d6e339..971282b6f6a3 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1412,8 +1412,8 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, if (UNIXCB(skb).cred) return; if (test_bit(SOCK_PASSCRED, &sock->flags) || - !other->sk_socket || - test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { + (other->sk_socket && + test_bit(SOCK_PASSCRED, &other->sk_socket->flags))) { UNIXCB(skb).pid = get_pid(task_tgid(current)); UNIXCB(skb).cred = get_current_cred(); } -- cgit v1.2.2 From 7d4c04fc170087119727119074e72445f2bb192b Mon Sep 17 00:00:00 2001 From: "Keller, Jacob E" Date: Thu, 28 Mar 2013 11:19:25 +0000 Subject: net: add option to enable error queue packets waking select Currently, when a socket receives something on the error queue it only wakes up the socket on select if it is in the "read" list, that is the socket has something to read. It is useful also to wake the socket if it is in the error list, which would enable software to wait on error queue packets without waking up for regular data on the socket. The main use case is for receiving timestamped transmit packets which return the timestamp to the socket via the error queue. This enables an application to select on the socket for the error queue only instead of for the regular traffic. -v2- * Added the SO_SELECT_ERR_QUEUE socket option to every architechture specific file * Modified every socket poll function that checks error queue Signed-off-by: Jacob Keller Cc: Jeffrey Kirsher Cc: Richard Cochran Cc: Matthew Vick Signed-off-by: David S. Miller --- net/unix/af_unix.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 971282b6f6a3..fb7a63ff71a5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2196,7 +2196,9 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; + if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) -- cgit v1.2.2 From 8facd5fb73c6e960555e5913743dfbb6c3d984a5 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 2 Apr 2013 13:55:40 -0700 Subject: net: fix smatch warnings inside datagram_poll Commit 7d4c04fc170087119727119074e72445f2bb192b ("net: add option to enable error queue packets waking select") has an issue due to operator precedence causing the bit-wise OR to bind to the sock_flags call instead of the result of the terniary conditional. This fixes the *_poll functions to work properly. The old code results in "mask |= POLLPRI" instead of what was intended, which is to only include POLLPRI when the socket option is enabled. Signed-off-by: Jacob Keller Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index fb7a63ff71a5..2e4d90044a52 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2197,7 +2197,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) mask |= POLLERR | - sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; -- cgit v1.2.2 From 25da0e3e9d3fb2b522bc2a598076735850310eb1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 3 Apr 2013 16:13:35 +0000 Subject: Revert "af_unix: dont send SCM_CREDENTIAL when dest socket is NULL" This reverts commit 14134f6584212d585b310ce95428014b653dfaf6. The problem that the above patch was meant to address is that af_unix messages are not being coallesced because we are sending unnecesarry credentials. Not sending credentials in maybe_add_creds totally breaks unconnected unix domain sockets that wish to send credentails to other sockets. In practice this break some versions of udev because they receive a message and the sending uid is bogus so they drop the message. Reported-by: Sven Joachim Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/unix/af_unix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 971282b6f6a3..f153a8d6e339 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1412,8 +1412,8 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, if (UNIXCB(skb).cred) return; if (test_bit(SOCK_PASSCRED, &sock->flags) || - (other->sk_socket && - test_bit(SOCK_PASSCRED, &other->sk_socket->flags))) { + !other->sk_socket || + test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { UNIXCB(skb).pid = get_pid(task_tgid(current)); UNIXCB(skb).cred = get_current_cred(); } -- cgit v1.2.2 From 0e82e7f6dfeec1013339612f74abc2cdd29d43d2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 3 Apr 2013 16:14:47 +0000 Subject: af_unix: If we don't care about credentials coallesce all messages It was reported that the following LSB test case failed https://lsbbugs.linuxfoundation.org/attachment.cgi?id=2144 because we were not coallescing unix stream messages when the application was expecting us to. The problem was that the first send was before the socket was accepted and thus sock->sk_socket was NULL in maybe_add_creds, and the second send after the socket was accepted had a non-NULL value for sk->socket and thus we could tell the credentials were not needed so we did not bother. The unnecessary credentials on the first message cause unix_stream_recvmsg to start verifying that all messages had the same credentials before coallescing and then the coallescing failed because the second message had no credentials. Ignoring credentials when we don't care in unix_stream_recvmsg fixes a long standing pessimization which would fail to coallesce messages when reading from a unix stream socket if the senders were different even if we did not care about their credentials. I have tested this and verified that the in the LSB test case mentioned above that the messages do coallesce now, while the were failing to coallesce without this change. Reported-by: Karel Srot Reported-by: Ding Tianhong Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f153a8d6e339..2db702d82e7d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1993,7 +1993,7 @@ again: if ((UNIXCB(skb).pid != siocb->scm->pid) || (UNIXCB(skb).cred != siocb->scm->cred)) break; - } else { + } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); check_creds = 1; -- cgit v1.2.2 From 6b0ee8c036ecb3ac92e18e6ca0dca7bff88beaf0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 3 Apr 2013 17:28:16 +0000 Subject: scm: Stop passing struct cred Now that uids and gids are completely encapsulated in kuid_t and kgid_t we no longer need to pass struct cred which allowed us to test both the uid and the user namespace for equality. Passing struct cred potentially allows us to pass the entire group list as BSD does but I don't believe the cost of cache line misses justifies retaining code for a future potential application. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/unix/af_unix.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 824eaf2c3afa..5ca1631de7ef 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1340,7 +1340,6 @@ static void unix_destruct_scm(struct sk_buff *skb) struct scm_cookie scm; memset(&scm, 0, sizeof(scm)); scm.pid = UNIXCB(skb).pid; - scm.cred = UNIXCB(skb).cred; if (UNIXCB(skb).fp) unix_detach_fds(&scm, skb); @@ -1391,8 +1390,8 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen int err = 0; UNIXCB(skb).pid = get_pid(scm->pid); - if (scm->cred) - UNIXCB(skb).cred = get_cred(scm->cred); + UNIXCB(skb).uid = scm->creds.uid; + UNIXCB(skb).gid = scm->creds.gid; UNIXCB(skb).fp = NULL; if (scm->fp && send_fds) err = unix_attach_fds(scm, skb); @@ -1409,13 +1408,13 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, const struct sock *other) { - if (UNIXCB(skb).cred) + if (UNIXCB(skb).pid) return; if (test_bit(SOCK_PASSCRED, &sock->flags) || !other->sk_socket || test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { UNIXCB(skb).pid = get_pid(task_tgid(current)); - UNIXCB(skb).cred = get_current_cred(); + current_euid_egid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } } @@ -1819,7 +1818,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, siocb->scm = &tmp_scm; memset(&tmp_scm, 0, sizeof(tmp_scm)); } - scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); + scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); unix_set_secdata(siocb->scm, skb); if (!(flags & MSG_PEEK)) { @@ -1991,11 +1990,12 @@ again: if (check_creds) { /* Never glue messages from different writers */ if ((UNIXCB(skb).pid != siocb->scm->pid) || - (UNIXCB(skb).cred != siocb->scm->cred)) + !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) || + !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid)) break; } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ - scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); + scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); check_creds = 1; } -- cgit v1.2.2 From 79f632c71bea0d0864d84d6a4ce78da5a9430f5b Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Mon, 29 Apr 2013 11:42:14 +0000 Subject: unix/stream: fix peeking with an offset larger than data in queue Currently, peeking on a unix stream socket with an offset larger than len of the data in the sk receive queue returns immediately with bogus data. This patch fixes this so that the behavior is the same as peeking with no offset on an empty queue: the caller blocks. Signed-off-by: Benjamin Poirier Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/unix/af_unix.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2db702d82e7d..1a02af0e3049 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1859,10 +1859,10 @@ out: } /* - * Sleep until data has arrive. But check for races.. + * Sleep until more data has arrived. But check for races.. */ - -static long unix_stream_data_wait(struct sock *sk, long timeo) +static long unix_stream_data_wait(struct sock *sk, long timeo, + struct sk_buff *last) { DEFINE_WAIT(wait); @@ -1871,7 +1871,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo) for (;;) { prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (!skb_queue_empty(&sk->sk_receive_queue) || + if (skb_peek_tail(&sk->sk_receive_queue) != last || sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN) || signal_pending(current) || @@ -1890,8 +1890,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo) return timeo; } - - static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) @@ -1936,14 +1934,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; } - skip = sk_peek_offset(sk, flags); - do { int chunk; - struct sk_buff *skb; + struct sk_buff *skb, *last; unix_state_lock(sk); - skb = skb_peek(&sk->sk_receive_queue); + last = skb = skb_peek(&sk->sk_receive_queue); again: if (skb == NULL) { unix_sk(sk)->recursion_level = 0; @@ -1966,7 +1962,7 @@ again: break; mutex_unlock(&u->readlock); - timeo = unix_stream_data_wait(sk, timeo); + timeo = unix_stream_data_wait(sk, timeo, last); if (signal_pending(current) || mutex_lock_interruptible(&u->readlock)) { @@ -1980,10 +1976,13 @@ again: break; } - if (skip >= skb->len) { + skip = sk_peek_offset(sk, flags); + while (skip >= skb->len) { skip -= skb->len; + last = skb; skb = skb_peek_next(skb, &sk->sk_receive_queue); - goto again; + if (!skb) + goto again; } unix_state_unlock(sk); -- cgit v1.2.2