diff options
| author | Eric Dumazet <eric.dumazet@gmail.com> | 2011-09-19 01:52:27 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2011-09-28 13:29:50 -0400 |
| commit | 16e5726269611b71c930054ffe9b858c1cea88eb (patch) | |
| tree | 50d25ec97d639b418964bad2f45774d657468c74 | |
| parent | a9e9fd7182332d0cf5f3e601df3e71dd431b70d7 (diff) | |
af_unix: dont send SCM_CREDENTIALS by default
Since commit 7361c36c5224 (af_unix: Allow credentials to work across
user and pid namespaces) af_unix performance dropped a lot.
This is because we now take a reference on pid and cred in each write(),
and release them in read(), usually done from another process,
eventually from another cpu. This triggers false sharing.
# Events: 154K cycles
#
# Overhead Command Shared Object Symbol
# ........ ....... .................. .........................
#
10.40% hackbench [kernel.kallsyms] [k] put_pid
8.60% hackbench [kernel.kallsyms] [k] unix_stream_recvmsg
7.87% hackbench [kernel.kallsyms] [k] unix_stream_sendmsg
6.11% hackbench [kernel.kallsyms] [k] do_raw_spin_lock
4.95% hackbench [kernel.kallsyms] [k] unix_scm_to_skb
4.87% hackbench [kernel.kallsyms] [k] pid_nr_ns
4.34% hackbench [kernel.kallsyms] [k] cred_to_ucred
2.39% hackbench [kernel.kallsyms] [k] unix_destruct_scm
2.24% hackbench [kernel.kallsyms] [k] sub_preempt_count
1.75% hackbench [kernel.kallsyms] [k] fget_light
1.51% hackbench [kernel.kallsyms] [k]
__mutex_lock_interruptible_slowpath
1.42% hackbench [kernel.kallsyms] [k] sock_alloc_send_pskb
This patch includes SCM_CREDENTIALS information in a af_unix message/skb
only if requested by the sender, [man 7 unix for details how to include
ancillary data using sendmsg() system call]
Note: This might break buggy applications that expected SCM_CREDENTIAL
from an unaware write() system call, and receiver not using SO_PASSCRED
socket option.
If SOCK_PASSCRED is set on source or destination socket, we still
include credentials for mere write() syscalls.
Performance boost in hackbench : more than 50% gain on a 16 thread
machine (2 quad-core cpus, 2 threads per core)
hackbench 20 thread 2000
4.228 sec instead of 9.102 sec
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
| -rw-r--r-- | include/net/scm.h | 5 | ||||
| -rw-r--r-- | net/core/scm.c | 10 | ||||
| -rw-r--r-- | net/netlink/af_netlink.c | 5 | ||||
| -rw-r--r-- | net/unix/af_unix.c | 24 |
4 files changed, 33 insertions, 11 deletions
diff --git a/include/net/scm.h b/include/net/scm.h index 745460fa2f02..d456f4c71a32 100644 --- a/include/net/scm.h +++ b/include/net/scm.h | |||
| @@ -49,7 +49,7 @@ static __inline__ void scm_set_cred(struct scm_cookie *scm, | |||
| 49 | struct pid *pid, const struct cred *cred) | 49 | struct pid *pid, const struct cred *cred) |
| 50 | { | 50 | { |
| 51 | scm->pid = get_pid(pid); | 51 | scm->pid = get_pid(pid); |
| 52 | scm->cred = get_cred(cred); | 52 | scm->cred = cred ? get_cred(cred) : NULL; |
| 53 | cred_to_ucred(pid, cred, &scm->creds); | 53 | cred_to_ucred(pid, cred, &scm->creds); |
| 54 | } | 54 | } |
| 55 | 55 | ||
| @@ -73,8 +73,7 @@ static __inline__ void scm_destroy(struct scm_cookie *scm) | |||
| 73 | static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, | 73 | static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, |
| 74 | struct scm_cookie *scm) | 74 | struct scm_cookie *scm) |
| 75 | { | 75 | { |
| 76 | scm_set_cred(scm, task_tgid(current), current_cred()); | 76 | memset(scm, 0, sizeof(*scm)); |
| 77 | scm->fp = NULL; | ||
| 78 | unix_get_peersec_dgram(sock, scm); | 77 | unix_get_peersec_dgram(sock, scm); |
| 79 | if (msg->msg_controllen <= 0) | 78 | if (msg->msg_controllen <= 0) |
| 80 | return 0; | 79 | return 0; |
diff --git a/net/core/scm.c b/net/core/scm.c index 811b53fb330e..ff52ad0a5150 100644 --- a/net/core/scm.c +++ b/net/core/scm.c | |||
| @@ -173,7 +173,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) | |||
| 173 | if (err) | 173 | if (err) |
| 174 | goto error; | 174 | goto error; |
| 175 | 175 | ||
| 176 | if (pid_vnr(p->pid) != p->creds.pid) { | 176 | if (!p->pid || pid_vnr(p->pid) != p->creds.pid) { |
| 177 | struct pid *pid; | 177 | struct pid *pid; |
| 178 | err = -ESRCH; | 178 | err = -ESRCH; |
| 179 | pid = find_get_pid(p->creds.pid); | 179 | pid = find_get_pid(p->creds.pid); |
| @@ -183,8 +183,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) | |||
| 183 | p->pid = pid; | 183 | p->pid = pid; |
| 184 | } | 184 | } |
| 185 | 185 | ||
| 186 | if ((p->cred->euid != p->creds.uid) || | 186 | if (!p->cred || |
| 187 | (p->cred->egid != p->creds.gid)) { | 187 | (p->cred->euid != p->creds.uid) || |
| 188 | (p->cred->egid != p->creds.gid)) { | ||
| 188 | struct cred *cred; | 189 | struct cred *cred; |
| 189 | err = -ENOMEM; | 190 | err = -ENOMEM; |
| 190 | cred = prepare_creds(); | 191 | cred = prepare_creds(); |
| @@ -193,7 +194,8 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) | |||
| 193 | 194 | ||
| 194 | cred->uid = cred->euid = p->creds.uid; | 195 | cred->uid = cred->euid = p->creds.uid; |
| 195 | cred->gid = cred->egid = p->creds.gid; | 196 | cred->gid = cred->egid = p->creds.gid; |
| 196 | put_cred(p->cred); | 197 | if (p->cred) |
| 198 | put_cred(p->cred); | ||
| 197 | p->cred = cred; | 199 | p->cred = cred; |
| 198 | } | 200 | } |
| 199 | break; | 201 | break; |
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4330db99fabf..1201b6d4183d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
| @@ -1324,10 +1324,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
| 1324 | if (msg->msg_flags&MSG_OOB) | 1324 | if (msg->msg_flags&MSG_OOB) |
| 1325 | return -EOPNOTSUPP; | 1325 | return -EOPNOTSUPP; |
| 1326 | 1326 | ||
| 1327 | if (NULL == siocb->scm) { | 1327 | if (NULL == siocb->scm) |
| 1328 | siocb->scm = &scm; | 1328 | siocb->scm = &scm; |
| 1329 | memset(&scm, 0, sizeof(scm)); | 1329 | |
| 1330 | } | ||
| 1331 | err = scm_send(sock, msg, siocb->scm); | 1330 | err = scm_send(sock, msg, siocb->scm); |
| 1332 | if (err < 0) | 1331 | if (err < 0) |
| 1333 | return err; | 1332 | return err; |
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ec68e1c05b85..466fbcc5cf77 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
| @@ -1381,8 +1381,10 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) | |||
| 1381 | static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) | 1381 | static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) |
| 1382 | { | 1382 | { |
| 1383 | int err = 0; | 1383 | int err = 0; |
| 1384 | |||
| 1384 | UNIXCB(skb).pid = get_pid(scm->pid); | 1385 | UNIXCB(skb).pid = get_pid(scm->pid); |
| 1385 | UNIXCB(skb).cred = get_cred(scm->cred); | 1386 | if (scm->cred) |
| 1387 | UNIXCB(skb).cred = get_cred(scm->cred); | ||
| 1386 | UNIXCB(skb).fp = NULL; | 1388 | UNIXCB(skb).fp = NULL; |
| 1387 | if (scm->fp && send_fds) | 1389 | if (scm->fp && send_fds) |
| 1388 | err = unix_attach_fds(scm, skb); | 1390 | err = unix_attach_fds(scm, skb); |
| @@ -1392,6 +1394,24 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen | |||
| 1392 | } | 1394 | } |
| 1393 | 1395 | ||
| 1394 | /* | 1396 | /* |
| 1397 | * Some apps rely on write() giving SCM_CREDENTIALS | ||
| 1398 | * We include credentials if source or destination socket | ||
| 1399 | * asserted SOCK_PASSCRED. | ||
| 1400 | */ | ||
| 1401 | static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, | ||
| 1402 | const struct sock *other) | ||
| 1403 | { | ||
| 1404 | if (UNIXCB(skb).cred) | ||
| 1405 | return; | ||
| 1406 | if (test_bit(SOCK_PASSCRED, &sock->flags) || | ||
| 1407 | !other->sk_socket || | ||
| 1408 | test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { | ||
| 1409 | UNIXCB(skb).pid = get_pid(task_tgid(current)); | ||
| 1410 | UNIXCB(skb).cred = get_current_cred(); | ||
| 1411 | } | ||
| 1412 | } | ||
| 1413 | |||
| 1414 | /* | ||
| 1395 | * Send AF_UNIX data. | 1415 | * Send AF_UNIX data. |
| 1396 | */ | 1416 | */ |
| 1397 | 1417 | ||
| @@ -1538,6 +1558,7 @@ restart: | |||
| 1538 | 1558 | ||
| 1539 | if (sock_flag(other, SOCK_RCVTSTAMP)) | 1559 | if (sock_flag(other, SOCK_RCVTSTAMP)) |
| 1540 | __net_timestamp(skb); | 1560 | __net_timestamp(skb); |
| 1561 | maybe_add_creds(skb, sock, other); | ||
| 1541 | skb_queue_tail(&other->sk_receive_queue, skb); | 1562 | skb_queue_tail(&other->sk_receive_queue, skb); |
| 1542 | if (max_level > unix_sk(other)->recursion_level) | 1563 | if (max_level > unix_sk(other)->recursion_level) |
| 1543 | unix_sk(other)->recursion_level = max_level; | 1564 | unix_sk(other)->recursion_level = max_level; |
| @@ -1652,6 +1673,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
| 1652 | (other->sk_shutdown & RCV_SHUTDOWN)) | 1673 | (other->sk_shutdown & RCV_SHUTDOWN)) |
| 1653 | goto pipe_err_free; | 1674 | goto pipe_err_free; |
| 1654 | 1675 | ||
| 1676 | maybe_add_creds(skb, sock, other); | ||
| 1655 | skb_queue_tail(&other->sk_receive_queue, skb); | 1677 | skb_queue_tail(&other->sk_receive_queue, skb); |
| 1656 | if (max_level > unix_sk(other)->recursion_level) | 1678 | if (max_level > unix_sk(other)->recursion_level) |
| 1657 | unix_sk(other)->recursion_level = max_level; | 1679 | unix_sk(other)->recursion_level = max_level; |
