aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRainer Weikusat <rweikusat@mobileactivedefense.com>2015-11-20 17:07:23 -0500
committerDavid S. Miller <davem@davemloft.net>2015-11-23 12:29:58 -0500
commit7d267278a9ece963d77eefec61630223fce08c6c (patch)
tree307652e3130a7a5d9537ac1d53f98754af8bb0a0
parent3b13758f51de30618d9c7f3fc174d8d1a3cb13cd (diff)
unix: avoid use-after-free in ep_remove_wait_queue
Rainer Weikusat <rweikusat@mobileactivedefense.com> writes: An AF_UNIX datagram socket being the client in an n:1 association with some server socket is only allowed to send messages to the server if the receive queue of this socket contains at most sk_max_ack_backlog datagrams. This implies that prospective writers might be forced to go to sleep despite none of the message presently enqueued on the server receive queue were sent by them. In order to ensure that these will be woken up once space becomes again available, the present unix_dgram_poll routine does a second sock_poll_wait call with the peer_wait wait queue of the server socket as queue argument (unix_dgram_recvmsg does a wake up on this queue after a datagram was received). This is inherently problematic because the server socket is only guaranteed to remain alive for as long as the client still holds a reference to it. In case the connection is dissolved via connect or by the dead peer detection logic in unix_dgram_sendmsg, the server socket may be freed despite "the polling mechanism" (in particular, epoll) still has a pointer to the corresponding peer_wait queue. There's no way to forcibly deregister a wait queue with epoll. Based on an idea by Jason Baron, the patch below changes the code such that a wait_queue_t belonging to the client socket is enqueued on the peer_wait queue of the server whenever the peer receive queue full condition is detected by either a sendmsg or a poll. A wake up on the peer queue is then relayed to the ordinary wait queue of the client socket via wake function. The connection to the peer wait queue is again dissolved if either a wake up is about to be relayed or the client socket reconnects or a dead peer is detected or the client socket is itself closed. This enables removing the second sock_poll_wait from unix_dgram_poll, thus avoiding the use-after-free, while still ensuring that no blocked writer sleeps forever. Signed-off-by: Rainer Weikusat <rweikusat@mobileactivedefense.com> Fixes: ec0d215f9420 ("af_unix: fix 'poll for write'/connected DGRAM sockets") Reviewed-by: Jason Baron <jbaron@akamai.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/af_unix.h1
-rw-r--r--net/unix/af_unix.c183
2 files changed, 165 insertions, 19 deletions
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index b36d837c701e..2a91a0561a47 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -62,6 +62,7 @@ struct unix_sock {
62#define UNIX_GC_CANDIDATE 0 62#define UNIX_GC_CANDIDATE 0
63#define UNIX_GC_MAYBE_CYCLE 1 63#define UNIX_GC_MAYBE_CYCLE 1
64 struct socket_wq peer_wq; 64 struct socket_wq peer_wq;
65 wait_queue_t peer_wake;
65}; 66};
66 67
67static inline struct unix_sock *unix_sk(const struct sock *sk) 68static inline struct unix_sock *unix_sk(const struct sock *sk)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 955ec152cb71..4e95bdf973d9 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -326,6 +326,118 @@ found:
326 return s; 326 return s;
327} 327}
328 328
329/* Support code for asymmetrically connected dgram sockets
330 *
331 * If a datagram socket is connected to a socket not itself connected
332 * to the first socket (eg, /dev/log), clients may only enqueue more
333 * messages if the present receive queue of the server socket is not
334 * "too large". This means there's a second writeability condition
335 * poll and sendmsg need to test. The dgram recv code will do a wake
336 * up on the peer_wait wait queue of a socket upon reception of a
337 * datagram which needs to be propagated to sleeping would-be writers
338 * since these might not have sent anything so far. This can't be
339 * accomplished via poll_wait because the lifetime of the server
340 * socket might be less than that of its clients if these break their
341 * association with it or if the server socket is closed while clients
342 * are still connected to it and there's no way to inform "a polling
343 * implementation" that it should let go of a certain wait queue
344 *
345 * In order to propagate a wake up, a wait_queue_t of the client
346 * socket is enqueued on the peer_wait queue of the server socket
347 * whose wake function does a wake_up on the ordinary client socket
348 * wait queue. This connection is established whenever a write (or
349 * poll for write) hit the flow control condition and broken when the
350 * association to the server socket is dissolved or after a wake up
351 * was relayed.
352 */
353
354static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
355 void *key)
356{
357 struct unix_sock *u;
358 wait_queue_head_t *u_sleep;
359
360 u = container_of(q, struct unix_sock, peer_wake);
361
362 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
363 q);
364 u->peer_wake.private = NULL;
365
366 /* relaying can only happen while the wq still exists */
367 u_sleep = sk_sleep(&u->sk);
368 if (u_sleep)
369 wake_up_interruptible_poll(u_sleep, key);
370
371 return 0;
372}
373
374static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
375{
376 struct unix_sock *u, *u_other;
377 int rc;
378
379 u = unix_sk(sk);
380 u_other = unix_sk(other);
381 rc = 0;
382 spin_lock(&u_other->peer_wait.lock);
383
384 if (!u->peer_wake.private) {
385 u->peer_wake.private = other;
386 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
387
388 rc = 1;
389 }
390
391 spin_unlock(&u_other->peer_wait.lock);
392 return rc;
393}
394
395static void unix_dgram_peer_wake_disconnect(struct sock *sk,
396 struct sock *other)
397{
398 struct unix_sock *u, *u_other;
399
400 u = unix_sk(sk);
401 u_other = unix_sk(other);
402 spin_lock(&u_other->peer_wait.lock);
403
404 if (u->peer_wake.private == other) {
405 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
406 u->peer_wake.private = NULL;
407 }
408
409 spin_unlock(&u_other->peer_wait.lock);
410}
411
412static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
413 struct sock *other)
414{
415 unix_dgram_peer_wake_disconnect(sk, other);
416 wake_up_interruptible_poll(sk_sleep(sk),
417 POLLOUT |
418 POLLWRNORM |
419 POLLWRBAND);
420}
421
422/* preconditions:
423 * - unix_peer(sk) == other
424 * - association is stable
425 */
426static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
427{
428 int connected;
429
430 connected = unix_dgram_peer_wake_connect(sk, other);
431
432 if (unix_recvq_full(other))
433 return 1;
434
435 if (connected)
436 unix_dgram_peer_wake_disconnect(sk, other);
437
438 return 0;
439}
440
329static int unix_writable(const struct sock *sk) 441static int unix_writable(const struct sock *sk)
330{ 442{
331 return sk->sk_state != TCP_LISTEN && 443 return sk->sk_state != TCP_LISTEN &&
@@ -431,6 +543,8 @@ static void unix_release_sock(struct sock *sk, int embrion)
431 skpair->sk_state_change(skpair); 543 skpair->sk_state_change(skpair);
432 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 544 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
433 } 545 }
546
547 unix_dgram_peer_wake_disconnect(sk, skpair);
434 sock_put(skpair); /* It may now die */ 548 sock_put(skpair); /* It may now die */
435 unix_peer(sk) = NULL; 549 unix_peer(sk) = NULL;
436 } 550 }
@@ -666,6 +780,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
666 INIT_LIST_HEAD(&u->link); 780 INIT_LIST_HEAD(&u->link);
667 mutex_init(&u->readlock); /* single task reading lock */ 781 mutex_init(&u->readlock); /* single task reading lock */
668 init_waitqueue_head(&u->peer_wait); 782 init_waitqueue_head(&u->peer_wait);
783 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
669 unix_insert_socket(unix_sockets_unbound(sk), sk); 784 unix_insert_socket(unix_sockets_unbound(sk), sk);
670out: 785out:
671 if (sk == NULL) 786 if (sk == NULL)
@@ -1033,6 +1148,8 @@ restart:
1033 if (unix_peer(sk)) { 1148 if (unix_peer(sk)) {
1034 struct sock *old_peer = unix_peer(sk); 1149 struct sock *old_peer = unix_peer(sk);
1035 unix_peer(sk) = other; 1150 unix_peer(sk) = other;
1151 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1152
1036 unix_state_double_unlock(sk, other); 1153 unix_state_double_unlock(sk, other);
1037 1154
1038 if (other != old_peer) 1155 if (other != old_peer)
@@ -1472,6 +1589,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1472 struct scm_cookie scm; 1589 struct scm_cookie scm;
1473 int max_level; 1590 int max_level;
1474 int data_len = 0; 1591 int data_len = 0;
1592 int sk_locked;
1475 1593
1476 wait_for_unix_gc(); 1594 wait_for_unix_gc();
1477 err = scm_send(sock, msg, &scm, false); 1595 err = scm_send(sock, msg, &scm, false);
@@ -1550,12 +1668,14 @@ restart:
1550 goto out_free; 1668 goto out_free;
1551 } 1669 }
1552 1670
1671 sk_locked = 0;
1553 unix_state_lock(other); 1672 unix_state_lock(other);
1673restart_locked:
1554 err = -EPERM; 1674 err = -EPERM;
1555 if (!unix_may_send(sk, other)) 1675 if (!unix_may_send(sk, other))
1556 goto out_unlock; 1676 goto out_unlock;
1557 1677
1558 if (sock_flag(other, SOCK_DEAD)) { 1678 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1559 /* 1679 /*
1560 * Check with 1003.1g - what should 1680 * Check with 1003.1g - what should
1561 * datagram error 1681 * datagram error
@@ -1563,10 +1683,14 @@ restart:
1563 unix_state_unlock(other); 1683 unix_state_unlock(other);
1564 sock_put(other); 1684 sock_put(other);
1565 1685
1686 if (!sk_locked)
1687 unix_state_lock(sk);
1688
1566 err = 0; 1689 err = 0;
1567 unix_state_lock(sk);
1568 if (unix_peer(sk) == other) { 1690 if (unix_peer(sk) == other) {
1569 unix_peer(sk) = NULL; 1691 unix_peer(sk) = NULL;
1692 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1693
1570 unix_state_unlock(sk); 1694 unix_state_unlock(sk);
1571 1695
1572 unix_dgram_disconnected(sk, other); 1696 unix_dgram_disconnected(sk, other);
@@ -1592,21 +1716,38 @@ restart:
1592 goto out_unlock; 1716 goto out_unlock;
1593 } 1717 }
1594 1718
1595 if (unix_peer(other) != sk && unix_recvq_full(other)) { 1719 if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1596 if (!timeo) { 1720 if (timeo) {
1597 err = -EAGAIN; 1721 timeo = unix_wait_for_peer(other, timeo);
1598 goto out_unlock; 1722
1723 err = sock_intr_errno(timeo);
1724 if (signal_pending(current))
1725 goto out_free;
1726
1727 goto restart;
1599 } 1728 }
1600 1729
1601 timeo = unix_wait_for_peer(other, timeo); 1730 if (!sk_locked) {
1731 unix_state_unlock(other);
1732 unix_state_double_lock(sk, other);
1733 }
1602 1734
1603 err = sock_intr_errno(timeo); 1735 if (unix_peer(sk) != other ||
1604 if (signal_pending(current)) 1736 unix_dgram_peer_wake_me(sk, other)) {
1605 goto out_free; 1737 err = -EAGAIN;
1738 sk_locked = 1;
1739 goto out_unlock;
1740 }
1606 1741
1607 goto restart; 1742 if (!sk_locked) {
1743 sk_locked = 1;
1744 goto restart_locked;
1745 }
1608 } 1746 }
1609 1747
1748 if (unlikely(sk_locked))
1749 unix_state_unlock(sk);
1750
1610 if (sock_flag(other, SOCK_RCVTSTAMP)) 1751 if (sock_flag(other, SOCK_RCVTSTAMP))
1611 __net_timestamp(skb); 1752 __net_timestamp(skb);
1612 maybe_add_creds(skb, sock, other); 1753 maybe_add_creds(skb, sock, other);
@@ -1620,6 +1761,8 @@ restart:
1620 return len; 1761 return len;
1621 1762
1622out_unlock: 1763out_unlock:
1764 if (sk_locked)
1765 unix_state_unlock(sk);
1623 unix_state_unlock(other); 1766 unix_state_unlock(other);
1624out_free: 1767out_free:
1625 kfree_skb(skb); 1768 kfree_skb(skb);
@@ -2476,14 +2619,16 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2476 return mask; 2619 return mask;
2477 2620
2478 writable = unix_writable(sk); 2621 writable = unix_writable(sk);
2479 other = unix_peer_get(sk); 2622 if (writable) {
2480 if (other) { 2623 unix_state_lock(sk);
2481 if (unix_peer(other) != sk) { 2624
2482 sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); 2625 other = unix_peer(sk);
2483 if (unix_recvq_full(other)) 2626 if (other && unix_peer(other) != sk &&
2484 writable = 0; 2627 unix_recvq_full(other) &&
2485 } 2628 unix_dgram_peer_wake_me(sk, other))
2486 sock_put(other); 2629 writable = 0;
2630
2631 unix_state_unlock(sk);
2487 } 2632 }
2488 2633
2489 if (writable) 2634 if (writable)