aboutsummaryrefslogtreecommitdiffstats
path: root/net/unix
diff options
context:
space:
mode:
authorTim Chen <tim.c.chen@linux.intel.com>2011-08-22 10:57:26 -0400
committerDavid S. Miller <davem@davemloft.net>2011-08-24 22:41:13 -0400
commit0856a304091b33a8e8f9f9c98e776f425af2b625 (patch)
treecc521e8e13db01813e0e1b4b67ecc20a18551212 /net/unix
parent6af29ccc223b0feb6fc6112281c3fa3cdb1afddf (diff)
Scm: Remove unnecessary pid & credential references in Unix socket's send and receive path
Patch series 109f6e39..7361c36c back in 2.6.36 added functionality to allow credentials to work across pid namespaces for packets sent via UNIX sockets. However, the atomic reference counts on pid and credentials caused plenty of cache bouncing when there are numerous threads of the same pid sharing a UNIX socket. This patch mitigates the problem by eliminating extraneous reference counts on pid and credentials on both send and receive path of UNIX sockets. I found a 2x improvement in hackbench's threaded case. On the receive path in unix_dgram_recvmsg, currently there is an increment of reference count on pid and credentials in scm_set_cred. Then there are two decrement of the reference counts. Once in scm_recv and once when skb_free_datagram call skb->destructor function unix_destruct_scm. One pair of increment and decrement of ref count on pid and credentials can be eliminated from the receive path. Until we destroy the skb, we already set a reference when we created the skb on the send side. On the send path, there are two increments of ref count on pid and credentials, once in scm_send and once in unix_scm_to_skb. Then there is a decrement of the reference counts in scm_destroy's call to scm_destroy_cred at the end of unix_dgram_sendmsg functions. One pair of increment and decrement of the reference counts can be removed so we only need to increment the ref counts once. By incorporating these changes, for hackbench running on a 4 socket NHM-EX machine with 40 cores, the execution of hackbench on 50 groups of 20 threads sped up by factor of 2. Hackbench command used for testing: ./hackbench 50 thread 2000 Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/unix')
-rw-r--r--net/unix/af_unix.c45
1 files changed, 29 insertions, 16 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ec68e1c05b85..e6d9d1014ed2 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1378,11 +1378,17 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1378 return max_level; 1378 return max_level;
1379} 1379}
1380 1380
1381static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1381static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb,
1382 bool send_fds, bool ref)
1382{ 1383{
1383 int err = 0; 1384 int err = 0;
1384 UNIXCB(skb).pid = get_pid(scm->pid); 1385 if (ref) {
1385 UNIXCB(skb).cred = get_cred(scm->cred); 1386 UNIXCB(skb).pid = get_pid(scm->pid);
1387 UNIXCB(skb).cred = get_cred(scm->cred);
1388 } else {
1389 UNIXCB(skb).pid = scm->pid;
1390 UNIXCB(skb).cred = scm->cred;
1391 }
1386 UNIXCB(skb).fp = NULL; 1392 UNIXCB(skb).fp = NULL;
1387 if (scm->fp && send_fds) 1393 if (scm->fp && send_fds)
1388 err = unix_attach_fds(scm, skb); 1394 err = unix_attach_fds(scm, skb);
@@ -1407,7 +1413,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1407 int namelen = 0; /* fake GCC */ 1413 int namelen = 0; /* fake GCC */
1408 int err; 1414 int err;
1409 unsigned hash; 1415 unsigned hash;
1410 struct sk_buff *skb; 1416 struct sk_buff *skb = NULL;
1411 long timeo; 1417 long timeo;
1412 struct scm_cookie tmp_scm; 1418 struct scm_cookie tmp_scm;
1413 int max_level; 1419 int max_level;
@@ -1448,7 +1454,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1448 if (skb == NULL) 1454 if (skb == NULL)
1449 goto out; 1455 goto out;
1450 1456
1451 err = unix_scm_to_skb(siocb->scm, skb, true); 1457 err = unix_scm_to_skb(siocb->scm, skb, true, false);
1452 if (err < 0) 1458 if (err < 0)
1453 goto out_free; 1459 goto out_free;
1454 max_level = err + 1; 1460 max_level = err + 1;
@@ -1544,7 +1550,7 @@ restart:
1544 unix_state_unlock(other); 1550 unix_state_unlock(other);
1545 other->sk_data_ready(other, len); 1551 other->sk_data_ready(other, len);
1546 sock_put(other); 1552 sock_put(other);
1547 scm_destroy(siocb->scm); 1553 scm_release(siocb->scm);
1548 return len; 1554 return len;
1549 1555
1550out_unlock: 1556out_unlock:
@@ -1554,7 +1560,8 @@ out_free:
1554out: 1560out:
1555 if (other) 1561 if (other)
1556 sock_put(other); 1562 sock_put(other);
1557 scm_destroy(siocb->scm); 1563 if (skb == NULL)
1564 scm_destroy(siocb->scm);
1558 return err; 1565 return err;
1559} 1566}
1560 1567
@@ -1566,7 +1573,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1566 struct sock *sk = sock->sk; 1573 struct sock *sk = sock->sk;
1567 struct sock *other = NULL; 1574 struct sock *other = NULL;
1568 int err, size; 1575 int err, size;
1569 struct sk_buff *skb; 1576 struct sk_buff *skb = NULL;
1570 int sent = 0; 1577 int sent = 0;
1571 struct scm_cookie tmp_scm; 1578 struct scm_cookie tmp_scm;
1572 bool fds_sent = false; 1579 bool fds_sent = false;
@@ -1631,11 +1638,11 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1631 size = min_t(int, size, skb_tailroom(skb)); 1638 size = min_t(int, size, skb_tailroom(skb));
1632 1639
1633 1640
1634 /* Only send the fds in the first buffer */ 1641 /* Only send the fds and no ref to pid in the first buffer */
1635 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent); 1642 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent, fds_sent);
1636 if (err < 0) { 1643 if (err < 0) {
1637 kfree_skb(skb); 1644 kfree_skb(skb);
1638 goto out_err; 1645 goto out;
1639 } 1646 }
1640 max_level = err + 1; 1647 max_level = err + 1;
1641 fds_sent = true; 1648 fds_sent = true;
@@ -1643,7 +1650,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1643 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 1650 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1644 if (err) { 1651 if (err) {
1645 kfree_skb(skb); 1652 kfree_skb(skb);
1646 goto out_err; 1653 goto out;
1647 } 1654 }
1648 1655
1649 unix_state_lock(other); 1656 unix_state_lock(other);
@@ -1660,7 +1667,10 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1660 sent += size; 1667 sent += size;
1661 } 1668 }
1662 1669
1663 scm_destroy(siocb->scm); 1670 if (skb)
1671 scm_release(siocb->scm);
1672 else
1673 scm_destroy(siocb->scm);
1664 siocb->scm = NULL; 1674 siocb->scm = NULL;
1665 1675
1666 return sent; 1676 return sent;
@@ -1673,7 +1683,9 @@ pipe_err:
1673 send_sig(SIGPIPE, current, 0); 1683 send_sig(SIGPIPE, current, 0);
1674 err = -EPIPE; 1684 err = -EPIPE;
1675out_err: 1685out_err:
1676 scm_destroy(siocb->scm); 1686 if (skb == NULL)
1687 scm_destroy(siocb->scm);
1688out:
1677 siocb->scm = NULL; 1689 siocb->scm = NULL;
1678 return sent ? : err; 1690 return sent ? : err;
1679} 1691}
@@ -1777,7 +1789,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1777 siocb->scm = &tmp_scm; 1789 siocb->scm = &tmp_scm;
1778 memset(&tmp_scm, 0, sizeof(tmp_scm)); 1790 memset(&tmp_scm, 0, sizeof(tmp_scm));
1779 } 1791 }
1780 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); 1792 scm_set_cred_noref(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1781 unix_set_secdata(siocb->scm, skb); 1793 unix_set_secdata(siocb->scm, skb);
1782 1794
1783 if (!(flags & MSG_PEEK)) { 1795 if (!(flags & MSG_PEEK)) {
@@ -1939,7 +1951,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1939 } 1951 }
1940 } else { 1952 } else {
1941 /* Copy credentials */ 1953 /* Copy credentials */
1942 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); 1954 scm_set_cred_noref(siocb->scm, UNIXCB(skb).pid,
1955 UNIXCB(skb).cred);
1943 check_creds = 1; 1956 check_creds = 1;
1944 } 1957 }
1945 1958