From d6ae3bae3d1bf7a8bf367e29f2cac0788dcd0db5 Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Tue, 18 Jan 2011 06:39:15 +0000 Subject: af_unix: implement socket filter Linux Socket Filters can already be successfully attached and detached on unix sockets with setsockopt(sockfd, SOL_SOCKET, SO_{ATTACH,DETACH}_FILTER, ...). See: Documentation/networking/filter.txt But the filter was never used in the unix socket code so it did not work. This patch uses sk_filter() to filter buffers before delivery. This short program demonstrates the problem on SOCK_DGRAM. int main(void) { int i, j, ret; int sv[2]; struct pollfd fds[2]; char *message = "Hello world!"; char buffer[64]; struct sock_filter ins[32] = {{0,},}; struct sock_fprog filter; socketpair(AF_UNIX, SOCK_DGRAM, 0, sv); for (i = 0 ; i < 2 ; i++) { fds[i].fd = sv[i]; fds[i].events = POLLIN; fds[i].revents = 0; } for(j = 1 ; j < 13 ; j++) { /* Set a socket filter to truncate the message */ memset(ins, 0, sizeof(ins)); ins[0].code = BPF_RET|BPF_K; ins[0].k = j; filter.len = 1; filter.filter = ins; setsockopt(sv[1], SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter)); /* send a message */ send(sv[0], message, strlen(message) + 1, 0); /* The filter should let the message pass but truncated. */ poll(fds, 2, 0); /* Receive the truncated message*/ ret = recv(sv[1], buffer, 64, 0); printf("received %d bytes, expected %d\n", ret, j); } for (i = 0 ; i < 2 ; i++) close(sv[i]); return 0; } Signed-off-by: Alban Crequy Reviewed-by: Ian Molton Signed-off-by: David S. Miller --- net/unix/af_unix.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index dd419d286204..8d9bbba345a4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1475,6 +1475,12 @@ restart: goto out_free; } + if (sk_filter(other, skb) < 0) { + /* Toss the packet but do not return any error to the sender */ + err = len; + goto out_free; + } + unix_state_lock(other); err = -EPERM; if (!unix_may_send(sk, other)) -- cgit v1.2.2 From 7180a03118cac7256fb04f929fe34d0aeee92c40 Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Wed, 19 Jan 2011 04:56:36 +0000 Subject: af_unix: coding style: remove one level of indentation in unix_shutdown() Signed-off-by: Alban Crequy Reviewed-by: Ian Molton Signed-off-by: David S. Miller --- net/unix/af_unix.c | 60 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 29 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 8d9bbba345a4..d8d98d5b508c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1984,36 +1984,38 @@ static int unix_shutdown(struct socket *sock, int mode) mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN); - if (mode) { - unix_state_lock(sk); - sk->sk_shutdown |= mode; - other = unix_peer(sk); - if (other) - sock_hold(other); - unix_state_unlock(sk); - sk->sk_state_change(sk); - - if (other && - (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { - - int peer_mode = 0; - - if (mode&RCV_SHUTDOWN) - peer_mode |= SEND_SHUTDOWN; - if (mode&SEND_SHUTDOWN) - peer_mode |= RCV_SHUTDOWN; - unix_state_lock(other); - other->sk_shutdown |= peer_mode; - unix_state_unlock(other); - other->sk_state_change(other); - if (peer_mode == SHUTDOWN_MASK) - sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); - else if (peer_mode & RCV_SHUTDOWN) - sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); - } - if (other) - sock_put(other); + if (!mode) + return 0; + + unix_state_lock(sk); + sk->sk_shutdown |= mode; + other = unix_peer(sk); + if (other) + sock_hold(other); + unix_state_unlock(sk); + sk->sk_state_change(sk); + + if (other && + (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) { + + int peer_mode = 0; + + if (mode&RCV_SHUTDOWN) + peer_mode |= SEND_SHUTDOWN; + if (mode&SEND_SHUTDOWN) + peer_mode |= RCV_SHUTDOWN; + unix_state_lock(other); + other->sk_shutdown |= peer_mode; + unix_state_unlock(other); + other->sk_state_change(other); + if (peer_mode == SHUTDOWN_MASK) + sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); + else if (peer_mode & RCV_SHUTDOWN) + sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); } + if (other) + sock_put(other); + return 0; } -- cgit v1.2.2 From eaefd1105bc431ef329599e307a07f2a36ae7872 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 18 Feb 2011 03:26:36 +0000 Subject: net: add __rcu annotations to sk_wq and wq Add proper RCU annotations/verbs to sk_wq and wq members Fix __sctp_write_space() sk_sleep() abuse (and sock->wq access) Fix sunrpc sk_sleep() abuse too Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d8d98d5b508c..217fb7f34d52 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1171,7 +1171,7 @@ restart: newsk->sk_type = sk->sk_type; init_peercred(newsk); newu = unix_sk(newsk); - newsk->sk_wq = &newu->peer_wq; + RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); otheru = unix_sk(other); /* copy address information from listening to new sock*/ -- cgit v1.2.2 From b3ca9b02b00704053a38bfe4c31dbbb9c13595d0 Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Mon, 28 Feb 2011 04:50:55 +0000 Subject: net: fix multithreaded signal handling in unix recv routines The unix_dgram_recvmsg and unix_stream_recvmsg routines in net/af_unix.c utilize mutex_lock(&u->readlock) calls in order to serialize read operations of multiple threads on a single socket. This implies that, if all n threads of a process block in an AF_UNIX recv call trying to read data from the same socket, one of these threads will be sleeping in state TASK_INTERRUPTIBLE and all others in state TASK_UNINTERRUPTIBLE. Provided that a particular signal is supposed to be handled by a signal handler defined by the process and that none of this threads is blocking the signal, the complete_signal routine in kernel/signal.c will select the 'first' such thread it happens to encounter when deciding which thread to notify that a signal is supposed to be handled and if this is one of the TASK_UNINTERRUPTIBLE threads, the signal won't be handled until the one thread not blocking on the u->readlock mutex is woken up because some data to process has arrived (if this ever happens). The included patch fixes this by changing mutex_lock to mutex_lock_interruptible and handling possible error returns in the same way interruptions are handled by the actual receive-code. Signed-off-by: Rainer Weikusat Signed-off-by: David S. Miller --- net/unix/af_unix.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index dd419d286204..437a99e560e1 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1724,7 +1724,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_namelen = 0; - mutex_lock(&u->readlock); + err = mutex_lock_interruptible(&u->readlock); + if (err) { + err = sock_intr_errno(sock_rcvtimeo(sk, noblock)); + goto out; + } skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { @@ -1864,7 +1868,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, memset(&tmp_scm, 0, sizeof(tmp_scm)); } - mutex_lock(&u->readlock); + err = mutex_lock_interruptible(&u->readlock); + if (err) { + err = sock_intr_errno(timeo); + goto out; + } do { int chunk; @@ -1895,11 +1903,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, timeo = unix_stream_data_wait(sk, timeo); - if (signal_pending(current)) { + if (signal_pending(current) + || mutex_lock_interruptible(&u->readlock)) { err = sock_intr_errno(timeo); goto out; } - mutex_lock(&u->readlock); + continue; unlock: unix_state_unlock(sk); -- cgit v1.2.2 From 6118e35a7126c1062b1a0f6737b84b4fe4d5c8d4 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Fri, 4 Mar 2011 11:45:06 +0000 Subject: af_unix: remove unused struct sockaddr_un cruft Signed-off-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 217fb7f34d52..df5997d25826 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1567,7 +1567,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; struct sock *other = NULL; - struct sockaddr_un *sunaddr = msg->msg_name; int err, size; struct sk_buff *skb; int sent = 0; @@ -1590,7 +1589,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; goto out_err; } else { - sunaddr = NULL; err = -ENOTCONN; other = unix_peer(sk); if (!other) -- cgit v1.2.2 From c9c6cac0c2bdbda42e7b804838648d0bc60ddb13 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 Feb 2011 15:15:47 -0500 Subject: kill path_lookup() all remaining callers pass LOOKUP_PARENT to it, so flags argument can die; renamed to kern_path_parent() Signed-off-by: Al Viro --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index dd419d286204..d8c04a602cf1 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -850,7 +850,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) * Get the parent directory, calculate the hash for last * component. */ - err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); + err = kern_path_parent(sunaddr->sun_path, &nd); if (err) goto out_mknod_parent; -- cgit v1.2.2 From e5537bfc98f01561fbdfbd8a78f0dc3e2360491d Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Mon, 14 Mar 2011 15:25:33 -0700 Subject: af_unix: update locking comment We latch our state using a spinlock not a r/w kind of lock. Signed-off-by: Daniel Baluta Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 437a99e560e1..b213ce668c98 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1124,7 +1124,7 @@ restart: /* Latch our state. - It is tricky place. We need to grab write lock and cannot + It is tricky place. We need to grab our state lock and cannot drop lock on peer. It is dangerous because deadlock is possible. Connect to self case and simultaneous attempt to connect are eliminated by checking socket -- cgit v1.2.2 From 25985edcedea6396277003854657b5f3cb31a628 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 30 Mar 2011 22:57:33 -0300 Subject: Fix common misspellings Fixes generated by 'codespell' and manually reviewed. Signed-off-by: Lucas De Marchi --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1663e1a2efdd..3a43a8304768 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -207,7 +207,7 @@ static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp) /* * This may look like an off by one error but it is a bit more * subtle. 108 is the longest valid AF_UNIX path for a binding. - * sun_path[108] doesnt as such exist. However in kernel space + * sun_path[108] doesn't as such exist. However in kernel space * we are guaranteed that it is a valid memory location in our * kernel address buffer. */ -- cgit v1.2.2 From a05d2ad1c1f391c7f514a1d1e09b5417968a7d07 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 24 Apr 2011 01:54:57 +0000 Subject: af_unix: Only allow recv on connected seqpacket sockets. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the following oops discovered by Dan Aloni: > Anyway, the following is the output of the Oops that I got on the > Ubuntu kernel on which I first detected the problem > (2.6.37-12-generic). The Oops that followed will be more useful, I > guess. >[ 5594.669852] BUG: unable to handle kernel NULL pointer dereference > at           (null) > [ 5594.681606] IP: [] unix_dgram_recvmsg+0x1fb/0x420 > [ 5594.687576] PGD 2a05d067 PUD 2b951067 PMD 0 > [ 5594.693720] Oops: 0002 [#1] SMP > [ 5594.699888] last sysfs file: The bug was that unix domain sockets use a pseduo packet for connecting and accept uses that psudo packet to get the socket. In the buggy seqpacket case we were allowing unconnected sockets to call recvmsg and try to receive the pseudo packet. That is always wrong and as of commit 7361c36c5 the pseudo packet had become enough different from a normal packet that the kernel started oopsing. Do for seqpacket_recv what was done for seqpacket_send in 2.5 and only allow it on connected seqpacket sockets. Cc: stable@kernel.org Tested-by: Dan Aloni Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/unix/af_unix.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3a43a8304768..b1d75beb7e20 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -524,6 +524,8 @@ static int unix_dgram_connect(struct socket *, struct sockaddr *, int, int); static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t); +static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *, + struct msghdr *, size_t, int); static const struct proto_ops unix_stream_ops = { .family = PF_UNIX, @@ -583,7 +585,7 @@ static const struct proto_ops unix_seqpacket_ops = { .setsockopt = sock_no_setsockopt, .getsockopt = sock_no_getsockopt, .sendmsg = unix_seqpacket_sendmsg, - .recvmsg = unix_dgram_recvmsg, + .recvmsg = unix_seqpacket_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, }; @@ -1699,6 +1701,18 @@ static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock, return unix_dgram_sendmsg(kiocb, sock, msg, len); } +static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t size, + int flags) +{ + struct sock *sk = sock->sk; + + if (sk->sk_state != TCP_ESTABLISHED) + return -ENOTCONN; + + return unix_dgram_recvmsg(iocb, sock, msg, size, flags); +} + static void unix_copy_addr(struct msghdr *msg, struct sock *sk) { struct unix_sock *u = unix_sk(sk); -- cgit v1.2.2