aboutsummaryrefslogtreecommitdiffstats
path: root/net/unix
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2013-08-08 17:37:32 -0400
committerDavid S. Miller <davem@davemloft.net>2013-08-10 04:16:44 -0400
commite370a7236321773245c5522d8bb299380830d3b2 (patch)
treee374e13b5fcccdd9aa28fcb5ab0dd3df05b4d384 /net/unix
parent149479d019e06df5a7f4096f95c00cfb1380309c (diff)
af_unix: improve STREAM behavior with fragmented memory
unix_stream_sendmsg() currently uses order-2 allocations, and we had numerous reports this can fail. The __GFP_REPEAT flag present in sock_alloc_send_pskb() is not helping. This patch extends the work done in commit eb6a24816b247c ("af_unix: reduce high order page allocations) for datagram sockets. This opens the possibility of zero copy IO (splice() and friends) The trick is to not use skb_pull() anymore in recvmsg() path, and instead add a @consumed field in UNIXCB() to track amount of already read payload in the skb. There is a performance regression for large sends because of extra page allocations that will be addressed in a follow-up patch, allowing sock_alloc_send_pskb() to attempt high order page allocations. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: David Rientjes <rientjes@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/unix')
-rw-r--r--net/unix/af_unix.c65
1 files changed, 30 insertions, 35 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c4ce243824bb..99dc760cdd95 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1596,6 +1596,10 @@ out:
1596 return err; 1596 return err;
1597} 1597}
1598 1598
1599/* We use paged skbs for stream sockets, and limit occupancy to 32768
1600 * bytes, and a minimun of a full page.
1601 */
1602#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1599 1603
1600static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, 1604static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1601 struct msghdr *msg, size_t len) 1605 struct msghdr *msg, size_t len)
@@ -1609,6 +1613,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1609 struct scm_cookie tmp_scm; 1613 struct scm_cookie tmp_scm;
1610 bool fds_sent = false; 1614 bool fds_sent = false;
1611 int max_level; 1615 int max_level;
1616 int data_len;
1612 1617
1613 if (NULL == siocb->scm) 1618 if (NULL == siocb->scm)
1614 siocb->scm = &tmp_scm; 1619 siocb->scm = &tmp_scm;
@@ -1635,40 +1640,21 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1635 goto pipe_err; 1640 goto pipe_err;
1636 1641
1637 while (sent < len) { 1642 while (sent < len) {
1638 /* 1643 size = len - sent;
1639 * Optimisation for the fact that under 0.01% of X
1640 * messages typically need breaking up.
1641 */
1642
1643 size = len-sent;
1644 1644
1645 /* Keep two messages in the pipe so it schedules better */ 1645 /* Keep two messages in the pipe so it schedules better */
1646 if (size > ((sk->sk_sndbuf >> 1) - 64)) 1646 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1647 size = (sk->sk_sndbuf >> 1) - 64;
1648 1647
1649 if (size > SKB_MAX_ALLOC) 1648 /* allow fallback to order-0 allocations */
1650 size = SKB_MAX_ALLOC; 1649 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1651
1652 /*
1653 * Grab a buffer
1654 */
1655 1650
1656 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT, 1651 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1657 &err);
1658 1652
1659 if (skb == NULL) 1653 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1654 msg->msg_flags & MSG_DONTWAIT, &err);
1655 if (!skb)
1660 goto out_err; 1656 goto out_err;
1661 1657
1662 /*
1663 * If you pass two values to the sock_alloc_send_skb
1664 * it tries to grab the large buffer with GFP_NOFS
1665 * (which can fail easily), and if it fails grab the
1666 * fallback size buffer which is under a page and will
1667 * succeed. [Alan]
1668 */
1669 size = min_t(int, size, skb_tailroom(skb));
1670
1671
1672 /* Only send the fds in the first buffer */ 1658 /* Only send the fds in the first buffer */
1673 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent); 1659 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1674 if (err < 0) { 1660 if (err < 0) {
@@ -1678,7 +1664,10 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1678 max_level = err + 1; 1664 max_level = err + 1;
1679 fds_sent = true; 1665 fds_sent = true;
1680 1666
1681 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 1667 skb_put(skb, size - data_len);
1668 skb->data_len = data_len;
1669 skb->len = size;
1670 err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, size);
1682 if (err) { 1671 if (err) {
1683 kfree_skb(skb); 1672 kfree_skb(skb);
1684 goto out_err; 1673 goto out_err;
@@ -1890,6 +1879,11 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
1890 return timeo; 1879 return timeo;
1891} 1880}
1892 1881
1882static unsigned int unix_skb_len(const struct sk_buff *skb)
1883{
1884 return skb->len - UNIXCB(skb).consumed;
1885}
1886
1893static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, 1887static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1894 struct msghdr *msg, size_t size, 1888 struct msghdr *msg, size_t size,
1895 int flags) 1889 int flags)
@@ -1977,8 +1971,8 @@ again:
1977 } 1971 }
1978 1972
1979 skip = sk_peek_offset(sk, flags); 1973 skip = sk_peek_offset(sk, flags);
1980 while (skip >= skb->len) { 1974 while (skip >= unix_skb_len(skb)) {
1981 skip -= skb->len; 1975 skip -= unix_skb_len(skb);
1982 last = skb; 1976 last = skb;
1983 skb = skb_peek_next(skb, &sk->sk_receive_queue); 1977 skb = skb_peek_next(skb, &sk->sk_receive_queue);
1984 if (!skb) 1978 if (!skb)
@@ -2005,8 +1999,9 @@ again:
2005 sunaddr = NULL; 1999 sunaddr = NULL;
2006 } 2000 }
2007 2001
2008 chunk = min_t(unsigned int, skb->len - skip, size); 2002 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2009 if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) { 2003 if (skb_copy_datagram_iovec(skb, UNIXCB(skb).consumed + skip,
2004 msg->msg_iov, chunk)) {
2010 if (copied == 0) 2005 if (copied == 0)
2011 copied = -EFAULT; 2006 copied = -EFAULT;
2012 break; 2007 break;
@@ -2016,14 +2011,14 @@ again:
2016 2011
2017 /* Mark read part of skb as used */ 2012 /* Mark read part of skb as used */
2018 if (!(flags & MSG_PEEK)) { 2013 if (!(flags & MSG_PEEK)) {
2019 skb_pull(skb, chunk); 2014 UNIXCB(skb).consumed += chunk;
2020 2015
2021 sk_peek_offset_bwd(sk, chunk); 2016 sk_peek_offset_bwd(sk, chunk);
2022 2017
2023 if (UNIXCB(skb).fp) 2018 if (UNIXCB(skb).fp)
2024 unix_detach_fds(siocb->scm, skb); 2019 unix_detach_fds(siocb->scm, skb);
2025 2020
2026 if (skb->len) 2021 if (unix_skb_len(skb))
2027 break; 2022 break;
2028 2023
2029 skb_unlink(skb, &sk->sk_receive_queue); 2024 skb_unlink(skb, &sk->sk_receive_queue);
@@ -2107,7 +2102,7 @@ long unix_inq_len(struct sock *sk)
2107 if (sk->sk_type == SOCK_STREAM || 2102 if (sk->sk_type == SOCK_STREAM ||
2108 sk->sk_type == SOCK_SEQPACKET) { 2103 sk->sk_type == SOCK_SEQPACKET) {
2109 skb_queue_walk(&sk->sk_receive_queue, skb) 2104 skb_queue_walk(&sk->sk_receive_queue, skb)
2110 amount += skb->len; 2105 amount += unix_skb_len(skb);
2111 } else { 2106 } else {
2112 skb = skb_peek(&sk->sk_receive_queue); 2107 skb = skb_peek(&sk->sk_receive_queue);
2113 if (skb) 2108 if (skb)