aboutsummaryrefslogtreecommitdiffstats
path: root/net/unix/af_unix.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/unix/af_unix.c')
-rw-r--r--net/unix/af_unix.c414
1 files changed, 319 insertions, 95 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 955ec152cb71..c5bf5ef2bf89 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -326,6 +326,118 @@ found:
326 return s; 326 return s;
327} 327}
328 328
329/* Support code for asymmetrically connected dgram sockets
330 *
331 * If a datagram socket is connected to a socket not itself connected
332 * to the first socket (eg, /dev/log), clients may only enqueue more
333 * messages if the present receive queue of the server socket is not
334 * "too large". This means there's a second writeability condition
335 * poll and sendmsg need to test. The dgram recv code will do a wake
336 * up on the peer_wait wait queue of a socket upon reception of a
337 * datagram which needs to be propagated to sleeping would-be writers
338 * since these might not have sent anything so far. This can't be
339 * accomplished via poll_wait because the lifetime of the server
340 * socket might be less than that of its clients if these break their
341 * association with it or if the server socket is closed while clients
342 * are still connected to it and there's no way to inform "a polling
343 * implementation" that it should let go of a certain wait queue
344 *
345 * In order to propagate a wake up, a wait_queue_t of the client
346 * socket is enqueued on the peer_wait queue of the server socket
347 * whose wake function does a wake_up on the ordinary client socket
348 * wait queue. This connection is established whenever a write (or
349 * poll for write) hit the flow control condition and broken when the
350 * association to the server socket is dissolved or after a wake up
351 * was relayed.
352 */
353
354static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
355 void *key)
356{
357 struct unix_sock *u;
358 wait_queue_head_t *u_sleep;
359
360 u = container_of(q, struct unix_sock, peer_wake);
361
362 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
363 q);
364 u->peer_wake.private = NULL;
365
366 /* relaying can only happen while the wq still exists */
367 u_sleep = sk_sleep(&u->sk);
368 if (u_sleep)
369 wake_up_interruptible_poll(u_sleep, key);
370
371 return 0;
372}
373
374static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
375{
376 struct unix_sock *u, *u_other;
377 int rc;
378
379 u = unix_sk(sk);
380 u_other = unix_sk(other);
381 rc = 0;
382 spin_lock(&u_other->peer_wait.lock);
383
384 if (!u->peer_wake.private) {
385 u->peer_wake.private = other;
386 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
387
388 rc = 1;
389 }
390
391 spin_unlock(&u_other->peer_wait.lock);
392 return rc;
393}
394
395static void unix_dgram_peer_wake_disconnect(struct sock *sk,
396 struct sock *other)
397{
398 struct unix_sock *u, *u_other;
399
400 u = unix_sk(sk);
401 u_other = unix_sk(other);
402 spin_lock(&u_other->peer_wait.lock);
403
404 if (u->peer_wake.private == other) {
405 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
406 u->peer_wake.private = NULL;
407 }
408
409 spin_unlock(&u_other->peer_wait.lock);
410}
411
412static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
413 struct sock *other)
414{
415 unix_dgram_peer_wake_disconnect(sk, other);
416 wake_up_interruptible_poll(sk_sleep(sk),
417 POLLOUT |
418 POLLWRNORM |
419 POLLWRBAND);
420}
421
422/* preconditions:
423 * - unix_peer(sk) == other
424 * - association is stable
425 */
426static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
427{
428 int connected;
429
430 connected = unix_dgram_peer_wake_connect(sk, other);
431
432 if (unix_recvq_full(other))
433 return 1;
434
435 if (connected)
436 unix_dgram_peer_wake_disconnect(sk, other);
437
438 return 0;
439}
440
329static int unix_writable(const struct sock *sk) 441static int unix_writable(const struct sock *sk)
330{ 442{
331 return sk->sk_state != TCP_LISTEN && 443 return sk->sk_state != TCP_LISTEN &&
@@ -339,7 +451,7 @@ static void unix_write_space(struct sock *sk)
339 rcu_read_lock(); 451 rcu_read_lock();
340 if (unix_writable(sk)) { 452 if (unix_writable(sk)) {
341 wq = rcu_dereference(sk->sk_wq); 453 wq = rcu_dereference(sk->sk_wq);
342 if (wq_has_sleeper(wq)) 454 if (skwq_has_sleeper(wq))
343 wake_up_interruptible_sync_poll(&wq->wait, 455 wake_up_interruptible_sync_poll(&wq->wait,
344 POLLOUT | POLLWRNORM | POLLWRBAND); 456 POLLOUT | POLLWRNORM | POLLWRBAND);
345 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 457 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
@@ -431,6 +543,8 @@ static void unix_release_sock(struct sock *sk, int embrion)
431 skpair->sk_state_change(skpair); 543 skpair->sk_state_change(skpair);
432 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 544 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
433 } 545 }
546
547 unix_dgram_peer_wake_disconnect(sk, skpair);
434 sock_put(skpair); /* It may now die */ 548 sock_put(skpair); /* It may now die */
435 unix_peer(sk) = NULL; 549 unix_peer(sk) = NULL;
436 } 550 }
@@ -666,6 +780,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
666 INIT_LIST_HEAD(&u->link); 780 INIT_LIST_HEAD(&u->link);
667 mutex_init(&u->readlock); /* single task reading lock */ 781 mutex_init(&u->readlock); /* single task reading lock */
668 init_waitqueue_head(&u->peer_wait); 782 init_waitqueue_head(&u->peer_wait);
783 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
669 unix_insert_socket(unix_sockets_unbound(sk), sk); 784 unix_insert_socket(unix_sockets_unbound(sk), sk);
670out: 785out:
671 if (sk == NULL) 786 if (sk == NULL)
@@ -838,32 +953,20 @@ fail:
838 return NULL; 953 return NULL;
839} 954}
840 955
841static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) 956static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode,
957 struct path *res)
842{ 958{
843 struct dentry *dentry; 959 int err;
844 struct path path;
845 int err = 0;
846 /*
847 * Get the parent directory, calculate the hash for last
848 * component.
849 */
850 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
851 err = PTR_ERR(dentry);
852 if (IS_ERR(dentry))
853 return err;
854 960
855 /* 961 err = security_path_mknod(path, dentry, mode, 0);
856 * All right, let's create it.
857 */
858 err = security_path_mknod(&path, dentry, mode, 0);
859 if (!err) { 962 if (!err) {
860 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0); 963 err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
861 if (!err) { 964 if (!err) {
862 res->mnt = mntget(path.mnt); 965 res->mnt = mntget(path->mnt);
863 res->dentry = dget(dentry); 966 res->dentry = dget(dentry);
864 } 967 }
865 } 968 }
866 done_path_create(&path, dentry); 969
867 return err; 970 return err;
868} 971}
869 972
@@ -874,10 +977,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
874 struct unix_sock *u = unix_sk(sk); 977 struct unix_sock *u = unix_sk(sk);
875 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 978 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
876 char *sun_path = sunaddr->sun_path; 979 char *sun_path = sunaddr->sun_path;
877 int err; 980 int err, name_err;
878 unsigned int hash; 981 unsigned int hash;
879 struct unix_address *addr; 982 struct unix_address *addr;
880 struct hlist_head *list; 983 struct hlist_head *list;
984 struct path path;
985 struct dentry *dentry;
881 986
882 err = -EINVAL; 987 err = -EINVAL;
883 if (sunaddr->sun_family != AF_UNIX) 988 if (sunaddr->sun_family != AF_UNIX)
@@ -893,14 +998,34 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
893 goto out; 998 goto out;
894 addr_len = err; 999 addr_len = err;
895 1000
1001 name_err = 0;
1002 dentry = NULL;
1003 if (sun_path[0]) {
1004 /* Get the parent directory, calculate the hash for last
1005 * component.
1006 */
1007 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
1008
1009 if (IS_ERR(dentry)) {
1010 /* delay report until after 'already bound' check */
1011 name_err = PTR_ERR(dentry);
1012 dentry = NULL;
1013 }
1014 }
1015
896 err = mutex_lock_interruptible(&u->readlock); 1016 err = mutex_lock_interruptible(&u->readlock);
897 if (err) 1017 if (err)
898 goto out; 1018 goto out_path;
899 1019
900 err = -EINVAL; 1020 err = -EINVAL;
901 if (u->addr) 1021 if (u->addr)
902 goto out_up; 1022 goto out_up;
903 1023
1024 if (name_err) {
1025 err = name_err == -EEXIST ? -EADDRINUSE : name_err;
1026 goto out_up;
1027 }
1028
904 err = -ENOMEM; 1029 err = -ENOMEM;
905 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); 1030 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
906 if (!addr) 1031 if (!addr)
@@ -911,11 +1036,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
911 addr->hash = hash ^ sk->sk_type; 1036 addr->hash = hash ^ sk->sk_type;
912 atomic_set(&addr->refcnt, 1); 1037 atomic_set(&addr->refcnt, 1);
913 1038
914 if (sun_path[0]) { 1039 if (dentry) {
915 struct path path; 1040 struct path u_path;
916 umode_t mode = S_IFSOCK | 1041 umode_t mode = S_IFSOCK |
917 (SOCK_INODE(sock)->i_mode & ~current_umask()); 1042 (SOCK_INODE(sock)->i_mode & ~current_umask());
918 err = unix_mknod(sun_path, mode, &path); 1043 err = unix_mknod(dentry, &path, mode, &u_path);
919 if (err) { 1044 if (err) {
920 if (err == -EEXIST) 1045 if (err == -EEXIST)
921 err = -EADDRINUSE; 1046 err = -EADDRINUSE;
@@ -923,9 +1048,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
923 goto out_up; 1048 goto out_up;
924 } 1049 }
925 addr->hash = UNIX_HASH_SIZE; 1050 addr->hash = UNIX_HASH_SIZE;
926 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1); 1051 hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
927 spin_lock(&unix_table_lock); 1052 spin_lock(&unix_table_lock);
928 u->path = path; 1053 u->path = u_path;
929 list = &unix_socket_table[hash]; 1054 list = &unix_socket_table[hash];
930 } else { 1055 } else {
931 spin_lock(&unix_table_lock); 1056 spin_lock(&unix_table_lock);
@@ -948,6 +1073,10 @@ out_unlock:
948 spin_unlock(&unix_table_lock); 1073 spin_unlock(&unix_table_lock);
949out_up: 1074out_up:
950 mutex_unlock(&u->readlock); 1075 mutex_unlock(&u->readlock);
1076out_path:
1077 if (dentry)
1078 done_path_create(&path, dentry);
1079
951out: 1080out:
952 return err; 1081 return err;
953} 1082}
@@ -1033,6 +1162,8 @@ restart:
1033 if (unix_peer(sk)) { 1162 if (unix_peer(sk)) {
1034 struct sock *old_peer = unix_peer(sk); 1163 struct sock *old_peer = unix_peer(sk);
1035 unix_peer(sk) = other; 1164 unix_peer(sk) = other;
1165 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1166
1036 unix_state_double_unlock(sk, other); 1167 unix_state_double_unlock(sk, other);
1037 1168
1038 if (other != old_peer) 1169 if (other != old_peer)
@@ -1382,6 +1513,21 @@ static void unix_destruct_scm(struct sk_buff *skb)
1382 sock_wfree(skb); 1513 sock_wfree(skb);
1383} 1514}
1384 1515
1516/*
1517 * The "user->unix_inflight" variable is protected by the garbage
1518 * collection lock, and we just read it locklessly here. If you go
1519 * over the limit, there might be a tiny race in actually noticing
1520 * it across threads. Tough.
1521 */
1522static inline bool too_many_unix_fds(struct task_struct *p)
1523{
1524 struct user_struct *user = current_user();
1525
1526 if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1527 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1528 return false;
1529}
1530
1385#define MAX_RECURSION_LEVEL 4 1531#define MAX_RECURSION_LEVEL 4
1386 1532
1387static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) 1533static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
@@ -1390,6 +1536,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1390 unsigned char max_level = 0; 1536 unsigned char max_level = 0;
1391 int unix_sock_count = 0; 1537 int unix_sock_count = 0;
1392 1538
1539 if (too_many_unix_fds(current))
1540 return -ETOOMANYREFS;
1541
1393 for (i = scm->fp->count - 1; i >= 0; i--) { 1542 for (i = scm->fp->count - 1; i >= 0; i--) {
1394 struct sock *sk = unix_get_socket(scm->fp->fp[i]); 1543 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1395 1544
@@ -1411,10 +1560,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1411 if (!UNIXCB(skb).fp) 1560 if (!UNIXCB(skb).fp)
1412 return -ENOMEM; 1561 return -ENOMEM;
1413 1562
1414 if (unix_sock_count) { 1563 for (i = scm->fp->count - 1; i >= 0; i--)
1415 for (i = scm->fp->count - 1; i >= 0; i--) 1564 unix_inflight(scm->fp->fp[i]);
1416 unix_inflight(scm->fp->fp[i]);
1417 }
1418 return max_level; 1565 return max_level;
1419} 1566}
1420 1567
@@ -1434,6 +1581,14 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
1434 return err; 1581 return err;
1435} 1582}
1436 1583
1584static bool unix_passcred_enabled(const struct socket *sock,
1585 const struct sock *other)
1586{
1587 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1588 !other->sk_socket ||
1589 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1590}
1591
1437/* 1592/*
1438 * Some apps rely on write() giving SCM_CREDENTIALS 1593 * Some apps rely on write() giving SCM_CREDENTIALS
1439 * We include credentials if source or destination socket 1594 * We include credentials if source or destination socket
@@ -1444,14 +1599,41 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1444{ 1599{
1445 if (UNIXCB(skb).pid) 1600 if (UNIXCB(skb).pid)
1446 return; 1601 return;
1447 if (test_bit(SOCK_PASSCRED, &sock->flags) || 1602 if (unix_passcred_enabled(sock, other)) {
1448 !other->sk_socket ||
1449 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1450 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1603 UNIXCB(skb).pid = get_pid(task_tgid(current));
1451 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); 1604 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1452 } 1605 }
1453} 1606}
1454 1607
1608static int maybe_init_creds(struct scm_cookie *scm,
1609 struct socket *socket,
1610 const struct sock *other)
1611{
1612 int err;
1613 struct msghdr msg = { .msg_controllen = 0 };
1614
1615 err = scm_send(socket, &msg, scm, false);
1616 if (err)
1617 return err;
1618
1619 if (unix_passcred_enabled(socket, other)) {
1620 scm->pid = get_pid(task_tgid(current));
1621 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1622 }
1623 return err;
1624}
1625
1626static bool unix_skb_scm_eq(struct sk_buff *skb,
1627 struct scm_cookie *scm)
1628{
1629 const struct unix_skb_parms *u = &UNIXCB(skb);
1630
1631 return u->pid == scm->pid &&
1632 uid_eq(u->uid, scm->creds.uid) &&
1633 gid_eq(u->gid, scm->creds.gid) &&
1634 unix_secdata_eq(scm, skb);
1635}
1636
1455/* 1637/*
1456 * Send AF_UNIX data. 1638 * Send AF_UNIX data.
1457 */ 1639 */
@@ -1472,6 +1654,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1472 struct scm_cookie scm; 1654 struct scm_cookie scm;
1473 int max_level; 1655 int max_level;
1474 int data_len = 0; 1656 int data_len = 0;
1657 int sk_locked;
1475 1658
1476 wait_for_unix_gc(); 1659 wait_for_unix_gc();
1477 err = scm_send(sock, msg, &scm, false); 1660 err = scm_send(sock, msg, &scm, false);
@@ -1550,12 +1733,14 @@ restart:
1550 goto out_free; 1733 goto out_free;
1551 } 1734 }
1552 1735
1736 sk_locked = 0;
1553 unix_state_lock(other); 1737 unix_state_lock(other);
1738restart_locked:
1554 err = -EPERM; 1739 err = -EPERM;
1555 if (!unix_may_send(sk, other)) 1740 if (!unix_may_send(sk, other))
1556 goto out_unlock; 1741 goto out_unlock;
1557 1742
1558 if (sock_flag(other, SOCK_DEAD)) { 1743 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1559 /* 1744 /*
1560 * Check with 1003.1g - what should 1745 * Check with 1003.1g - what should
1561 * datagram error 1746 * datagram error
@@ -1563,10 +1748,14 @@ restart:
1563 unix_state_unlock(other); 1748 unix_state_unlock(other);
1564 sock_put(other); 1749 sock_put(other);
1565 1750
1751 if (!sk_locked)
1752 unix_state_lock(sk);
1753
1566 err = 0; 1754 err = 0;
1567 unix_state_lock(sk);
1568 if (unix_peer(sk) == other) { 1755 if (unix_peer(sk) == other) {
1569 unix_peer(sk) = NULL; 1756 unix_peer(sk) = NULL;
1757 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1758
1570 unix_state_unlock(sk); 1759 unix_state_unlock(sk);
1571 1760
1572 unix_dgram_disconnected(sk, other); 1761 unix_dgram_disconnected(sk, other);
@@ -1592,21 +1781,38 @@ restart:
1592 goto out_unlock; 1781 goto out_unlock;
1593 } 1782 }
1594 1783
1595 if (unix_peer(other) != sk && unix_recvq_full(other)) { 1784 if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1596 if (!timeo) { 1785 if (timeo) {
1597 err = -EAGAIN; 1786 timeo = unix_wait_for_peer(other, timeo);
1598 goto out_unlock; 1787
1788 err = sock_intr_errno(timeo);
1789 if (signal_pending(current))
1790 goto out_free;
1791
1792 goto restart;
1599 } 1793 }
1600 1794
1601 timeo = unix_wait_for_peer(other, timeo); 1795 if (!sk_locked) {
1796 unix_state_unlock(other);
1797 unix_state_double_lock(sk, other);
1798 }
1602 1799
1603 err = sock_intr_errno(timeo); 1800 if (unix_peer(sk) != other ||
1604 if (signal_pending(current)) 1801 unix_dgram_peer_wake_me(sk, other)) {
1605 goto out_free; 1802 err = -EAGAIN;
1803 sk_locked = 1;
1804 goto out_unlock;
1805 }
1606 1806
1607 goto restart; 1807 if (!sk_locked) {
1808 sk_locked = 1;
1809 goto restart_locked;
1810 }
1608 } 1811 }
1609 1812
1813 if (unlikely(sk_locked))
1814 unix_state_unlock(sk);
1815
1610 if (sock_flag(other, SOCK_RCVTSTAMP)) 1816 if (sock_flag(other, SOCK_RCVTSTAMP))
1611 __net_timestamp(skb); 1817 __net_timestamp(skb);
1612 maybe_add_creds(skb, sock, other); 1818 maybe_add_creds(skb, sock, other);
@@ -1620,6 +1826,8 @@ restart:
1620 return len; 1826 return len;
1621 1827
1622out_unlock: 1828out_unlock:
1829 if (sk_locked)
1830 unix_state_unlock(sk);
1623 unix_state_unlock(other); 1831 unix_state_unlock(other);
1624out_free: 1832out_free:
1625 kfree_skb(skb); 1833 kfree_skb(skb);
@@ -1741,8 +1949,10 @@ out_err:
1741static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, 1949static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1742 int offset, size_t size, int flags) 1950 int offset, size_t size, int flags)
1743{ 1951{
1744 int err = 0; 1952 int err;
1745 bool send_sigpipe = true; 1953 bool send_sigpipe = false;
1954 bool init_scm = true;
1955 struct scm_cookie scm;
1746 struct sock *other, *sk = socket->sk; 1956 struct sock *other, *sk = socket->sk;
1747 struct sk_buff *skb, *newskb = NULL, *tail = NULL; 1957 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1748 1958
@@ -1760,7 +1970,7 @@ alloc_skb:
1760 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, 1970 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1761 &err, 0); 1971 &err, 0);
1762 if (!newskb) 1972 if (!newskb)
1763 return err; 1973 goto err;
1764 } 1974 }
1765 1975
1766 /* we must acquire readlock as we modify already present 1976 /* we must acquire readlock as we modify already present
@@ -1769,12 +1979,12 @@ alloc_skb:
1769 err = mutex_lock_interruptible(&unix_sk(other)->readlock); 1979 err = mutex_lock_interruptible(&unix_sk(other)->readlock);
1770 if (err) { 1980 if (err) {
1771 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; 1981 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1772 send_sigpipe = false;
1773 goto err; 1982 goto err;
1774 } 1983 }
1775 1984
1776 if (sk->sk_shutdown & SEND_SHUTDOWN) { 1985 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1777 err = -EPIPE; 1986 err = -EPIPE;
1987 send_sigpipe = true;
1778 goto err_unlock; 1988 goto err_unlock;
1779 } 1989 }
1780 1990
@@ -1783,17 +1993,27 @@ alloc_skb:
1783 if (sock_flag(other, SOCK_DEAD) || 1993 if (sock_flag(other, SOCK_DEAD) ||
1784 other->sk_shutdown & RCV_SHUTDOWN) { 1994 other->sk_shutdown & RCV_SHUTDOWN) {
1785 err = -EPIPE; 1995 err = -EPIPE;
1996 send_sigpipe = true;
1786 goto err_state_unlock; 1997 goto err_state_unlock;
1787 } 1998 }
1788 1999
2000 if (init_scm) {
2001 err = maybe_init_creds(&scm, socket, other);
2002 if (err)
2003 goto err_state_unlock;
2004 init_scm = false;
2005 }
2006
1789 skb = skb_peek_tail(&other->sk_receive_queue); 2007 skb = skb_peek_tail(&other->sk_receive_queue);
1790 if (tail && tail == skb) { 2008 if (tail && tail == skb) {
1791 skb = newskb; 2009 skb = newskb;
1792 } else if (!skb) { 2010 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1793 if (newskb) 2011 if (newskb) {
1794 skb = newskb; 2012 skb = newskb;
1795 else 2013 } else {
2014 tail = skb;
1796 goto alloc_skb; 2015 goto alloc_skb;
2016 }
1797 } else if (newskb) { 2017 } else if (newskb) {
1798 /* this is fast path, we don't necessarily need to 2018 /* this is fast path, we don't necessarily need to
1799 * call to kfree_skb even though with newskb == NULL 2019 * call to kfree_skb even though with newskb == NULL
@@ -1814,6 +2034,9 @@ alloc_skb:
1814 atomic_add(size, &sk->sk_wmem_alloc); 2034 atomic_add(size, &sk->sk_wmem_alloc);
1815 2035
1816 if (newskb) { 2036 if (newskb) {
2037 err = unix_scm_to_skb(&scm, skb, false);
2038 if (err)
2039 goto err_state_unlock;
1817 spin_lock(&other->sk_receive_queue.lock); 2040 spin_lock(&other->sk_receive_queue.lock);
1818 __skb_queue_tail(&other->sk_receive_queue, newskb); 2041 __skb_queue_tail(&other->sk_receive_queue, newskb);
1819 spin_unlock(&other->sk_receive_queue.lock); 2042 spin_unlock(&other->sk_receive_queue.lock);
@@ -1823,7 +2046,7 @@ alloc_skb:
1823 mutex_unlock(&unix_sk(other)->readlock); 2046 mutex_unlock(&unix_sk(other)->readlock);
1824 2047
1825 other->sk_data_ready(other); 2048 other->sk_data_ready(other);
1826 2049 scm_destroy(&scm);
1827 return size; 2050 return size;
1828 2051
1829err_state_unlock: 2052err_state_unlock:
@@ -1834,6 +2057,8 @@ err:
1834 kfree_skb(newskb); 2057 kfree_skb(newskb);
1835 if (send_sigpipe && !(flags & MSG_NOSIGNAL)) 2058 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
1836 send_sig(SIGPIPE, current, 0); 2059 send_sig(SIGPIPE, current, 0);
2060 if (!init_scm)
2061 scm_destroy(&scm);
1837 return err; 2062 return err;
1838} 2063}
1839 2064
@@ -1883,8 +2108,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
1883 struct scm_cookie scm; 2108 struct scm_cookie scm;
1884 struct sock *sk = sock->sk; 2109 struct sock *sk = sock->sk;
1885 struct unix_sock *u = unix_sk(sk); 2110 struct unix_sock *u = unix_sk(sk);
1886 int noblock = flags & MSG_DONTWAIT; 2111 struct sk_buff *skb, *last;
1887 struct sk_buff *skb; 2112 long timeo;
1888 int err; 2113 int err;
1889 int peeked, skip; 2114 int peeked, skip;
1890 2115
@@ -1892,30 +2117,38 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
1892 if (flags&MSG_OOB) 2117 if (flags&MSG_OOB)
1893 goto out; 2118 goto out;
1894 2119
1895 err = mutex_lock_interruptible(&u->readlock); 2120 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1896 if (unlikely(err)) {
1897 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
1898 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
1899 */
1900 err = noblock ? -EAGAIN : -ERESTARTSYS;
1901 goto out;
1902 }
1903 2121
1904 skip = sk_peek_offset(sk, flags); 2122 do {
2123 mutex_lock(&u->readlock);
1905 2124
1906 skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err); 2125 skip = sk_peek_offset(sk, flags);
1907 if (!skb) { 2126 skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
2127 &last);
2128 if (skb)
2129 break;
2130
2131 mutex_unlock(&u->readlock);
2132
2133 if (err != -EAGAIN)
2134 break;
2135 } while (timeo &&
2136 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2137
2138 if (!skb) { /* implies readlock unlocked */
1908 unix_state_lock(sk); 2139 unix_state_lock(sk);
1909 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ 2140 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1910 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN && 2141 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1911 (sk->sk_shutdown & RCV_SHUTDOWN)) 2142 (sk->sk_shutdown & RCV_SHUTDOWN))
1912 err = 0; 2143 err = 0;
1913 unix_state_unlock(sk); 2144 unix_state_unlock(sk);
1914 goto out_unlock; 2145 goto out;
1915 } 2146 }
1916 2147
1917 wake_up_interruptible_sync_poll(&u->peer_wait, 2148 if (wq_has_sleeper(&u->peer_wait))
1918 POLLOUT | POLLWRNORM | POLLWRBAND); 2149 wake_up_interruptible_sync_poll(&u->peer_wait,
2150 POLLOUT | POLLWRNORM |
2151 POLLWRBAND);
1919 2152
1920 if (msg->msg_name) 2153 if (msg->msg_name)
1921 unix_copy_addr(msg, skb->sk); 2154 unix_copy_addr(msg, skb->sk);
@@ -1967,7 +2200,6 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
1967 2200
1968out_free: 2201out_free:
1969 skb_free_datagram(sk, skb); 2202 skb_free_datagram(sk, skb);
1970out_unlock:
1971 mutex_unlock(&u->readlock); 2203 mutex_unlock(&u->readlock);
1972out: 2204out:
1973 return err; 2205 return err;
@@ -1996,7 +2228,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
1996 !timeo) 2228 !timeo)
1997 break; 2229 break;
1998 2230
1999 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 2231 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2000 unix_state_unlock(sk); 2232 unix_state_unlock(sk);
2001 timeo = freezable_schedule_timeout(timeo); 2233 timeo = freezable_schedule_timeout(timeo);
2002 unix_state_lock(sk); 2234 unix_state_lock(sk);
@@ -2004,7 +2236,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
2004 if (sock_flag(sk, SOCK_DEAD)) 2236 if (sock_flag(sk, SOCK_DEAD))
2005 break; 2237 break;
2006 2238
2007 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 2239 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2008 } 2240 }
2009 2241
2010 finish_wait(sk_sleep(sk), &wait); 2242 finish_wait(sk_sleep(sk), &wait);
@@ -2061,14 +2293,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
2061 /* Lock the socket to prevent queue disordering 2293 /* Lock the socket to prevent queue disordering
2062 * while sleeps in memcpy_tomsg 2294 * while sleeps in memcpy_tomsg
2063 */ 2295 */
2064 err = mutex_lock_interruptible(&u->readlock); 2296 mutex_lock(&u->readlock);
2065 if (unlikely(err)) {
2066 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
2067 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
2068 */
2069 err = noblock ? -EAGAIN : -ERESTARTSYS;
2070 goto out;
2071 }
2072 2297
2073 if (flags & MSG_PEEK) 2298 if (flags & MSG_PEEK)
2074 skip = sk_peek_offset(sk, flags); 2299 skip = sk_peek_offset(sk, flags);
@@ -2112,12 +2337,12 @@ again:
2112 timeo = unix_stream_data_wait(sk, timeo, last, 2337 timeo = unix_stream_data_wait(sk, timeo, last,
2113 last_len); 2338 last_len);
2114 2339
2115 if (signal_pending(current) || 2340 if (signal_pending(current)) {
2116 mutex_lock_interruptible(&u->readlock)) {
2117 err = sock_intr_errno(timeo); 2341 err = sock_intr_errno(timeo);
2118 goto out; 2342 goto out;
2119 } 2343 }
2120 2344
2345 mutex_lock(&u->readlock);
2121 continue; 2346 continue;
2122unlock: 2347unlock:
2123 unix_state_unlock(sk); 2348 unix_state_unlock(sk);
@@ -2137,10 +2362,7 @@ unlock:
2137 2362
2138 if (check_creds) { 2363 if (check_creds) {
2139 /* Never glue messages from different writers */ 2364 /* Never glue messages from different writers */
2140 if ((UNIXCB(skb).pid != scm.pid) || 2365 if (!unix_skb_scm_eq(skb, &scm))
2141 !uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
2142 !gid_eq(UNIXCB(skb).gid, scm.creds.gid) ||
2143 !unix_secdata_eq(&scm, skb))
2144 break; 2366 break;
2145 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 2367 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2146 /* Copy credentials */ 2368 /* Copy credentials */
@@ -2476,20 +2698,22 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2476 return mask; 2698 return mask;
2477 2699
2478 writable = unix_writable(sk); 2700 writable = unix_writable(sk);
2479 other = unix_peer_get(sk); 2701 if (writable) {
2480 if (other) { 2702 unix_state_lock(sk);
2481 if (unix_peer(other) != sk) { 2703
2482 sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); 2704 other = unix_peer(sk);
2483 if (unix_recvq_full(other)) 2705 if (other && unix_peer(other) != sk &&
2484 writable = 0; 2706 unix_recvq_full(other) &&
2485 } 2707 unix_dgram_peer_wake_me(sk, other))
2486 sock_put(other); 2708 writable = 0;
2709
2710 unix_state_unlock(sk);
2487 } 2711 }
2488 2712
2489 if (writable) 2713 if (writable)
2490 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 2714 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2491 else 2715 else
2492 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 2716 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2493 2717
2494 return mask; 2718 return mask;
2495} 2719}