aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <eric.dumazet@gmail.com>2011-09-19 01:52:27 -0400
committerDavid S. Miller <davem@davemloft.net>2011-09-28 13:29:50 -0400
commit16e5726269611b71c930054ffe9b858c1cea88eb (patch)
tree50d25ec97d639b418964bad2f45774d657468c74
parenta9e9fd7182332d0cf5f3e601df3e71dd431b70d7 (diff)
af_unix: dont send SCM_CREDENTIALS by default
Since commit 7361c36c5224 (af_unix: Allow credentials to work across user and pid namespaces) af_unix performance dropped a lot. This is because we now take a reference on pid and cred in each write(), and release them in read(), usually done from another process, eventually from another cpu. This triggers false sharing. # Events: 154K cycles # # Overhead Command Shared Object Symbol # ........ ....... .................. ......................... # 10.40% hackbench [kernel.kallsyms] [k] put_pid 8.60% hackbench [kernel.kallsyms] [k] unix_stream_recvmsg 7.87% hackbench [kernel.kallsyms] [k] unix_stream_sendmsg 6.11% hackbench [kernel.kallsyms] [k] do_raw_spin_lock 4.95% hackbench [kernel.kallsyms] [k] unix_scm_to_skb 4.87% hackbench [kernel.kallsyms] [k] pid_nr_ns 4.34% hackbench [kernel.kallsyms] [k] cred_to_ucred 2.39% hackbench [kernel.kallsyms] [k] unix_destruct_scm 2.24% hackbench [kernel.kallsyms] [k] sub_preempt_count 1.75% hackbench [kernel.kallsyms] [k] fget_light 1.51% hackbench [kernel.kallsyms] [k] __mutex_lock_interruptible_slowpath 1.42% hackbench [kernel.kallsyms] [k] sock_alloc_send_pskb This patch includes SCM_CREDENTIALS information in a af_unix message/skb only if requested by the sender, [man 7 unix for details how to include ancillary data using sendmsg() system call] Note: This might break buggy applications that expected SCM_CREDENTIAL from an unaware write() system call, and receiver not using SO_PASSCRED socket option. If SOCK_PASSCRED is set on source or destination socket, we still include credentials for mere write() syscalls. Performance boost in hackbench : more than 50% gain on a 16 thread machine (2 quad-core cpus, 2 threads per core) hackbench 20 thread 2000 4.228 sec instead of 9.102 sec Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Acked-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/scm.h5
-rw-r--r--net/core/scm.c10
-rw-r--r--net/netlink/af_netlink.c5
-rw-r--r--net/unix/af_unix.c24
4 files changed, 33 insertions, 11 deletions
diff --git a/include/net/scm.h b/include/net/scm.h
index 745460fa2f02..d456f4c71a32 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -49,7 +49,7 @@ static __inline__ void scm_set_cred(struct scm_cookie *scm,
49 struct pid *pid, const struct cred *cred) 49 struct pid *pid, const struct cred *cred)
50{ 50{
51 scm->pid = get_pid(pid); 51 scm->pid = get_pid(pid);
52 scm->cred = get_cred(cred); 52 scm->cred = cred ? get_cred(cred) : NULL;
53 cred_to_ucred(pid, cred, &scm->creds); 53 cred_to_ucred(pid, cred, &scm->creds);
54} 54}
55 55
@@ -73,8 +73,7 @@ static __inline__ void scm_destroy(struct scm_cookie *scm)
73static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, 73static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
74 struct scm_cookie *scm) 74 struct scm_cookie *scm)
75{ 75{
76 scm_set_cred(scm, task_tgid(current), current_cred()); 76 memset(scm, 0, sizeof(*scm));
77 scm->fp = NULL;
78 unix_get_peersec_dgram(sock, scm); 77 unix_get_peersec_dgram(sock, scm);
79 if (msg->msg_controllen <= 0) 78 if (msg->msg_controllen <= 0)
80 return 0; 79 return 0;
diff --git a/net/core/scm.c b/net/core/scm.c
index 811b53fb330e..ff52ad0a5150 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -173,7 +173,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
173 if (err) 173 if (err)
174 goto error; 174 goto error;
175 175
176 if (pid_vnr(p->pid) != p->creds.pid) { 176 if (!p->pid || pid_vnr(p->pid) != p->creds.pid) {
177 struct pid *pid; 177 struct pid *pid;
178 err = -ESRCH; 178 err = -ESRCH;
179 pid = find_get_pid(p->creds.pid); 179 pid = find_get_pid(p->creds.pid);
@@ -183,8 +183,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
183 p->pid = pid; 183 p->pid = pid;
184 } 184 }
185 185
186 if ((p->cred->euid != p->creds.uid) || 186 if (!p->cred ||
187 (p->cred->egid != p->creds.gid)) { 187 (p->cred->euid != p->creds.uid) ||
188 (p->cred->egid != p->creds.gid)) {
188 struct cred *cred; 189 struct cred *cred;
189 err = -ENOMEM; 190 err = -ENOMEM;
190 cred = prepare_creds(); 191 cred = prepare_creds();
@@ -193,7 +194,8 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
193 194
194 cred->uid = cred->euid = p->creds.uid; 195 cred->uid = cred->euid = p->creds.uid;
195 cred->gid = cred->egid = p->creds.gid; 196 cred->gid = cred->egid = p->creds.gid;
196 put_cred(p->cred); 197 if (p->cred)
198 put_cred(p->cred);
197 p->cred = cred; 199 p->cred = cred;
198 } 200 }
199 break; 201 break;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4330db99fabf..1201b6d4183d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1324,10 +1324,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1324 if (msg->msg_flags&MSG_OOB) 1324 if (msg->msg_flags&MSG_OOB)
1325 return -EOPNOTSUPP; 1325 return -EOPNOTSUPP;
1326 1326
1327 if (NULL == siocb->scm) { 1327 if (NULL == siocb->scm)
1328 siocb->scm = &scm; 1328 siocb->scm = &scm;
1329 memset(&scm, 0, sizeof(scm)); 1329
1330 }
1331 err = scm_send(sock, msg, siocb->scm); 1330 err = scm_send(sock, msg, siocb->scm);
1332 if (err < 0) 1331 if (err < 0)
1333 return err; 1332 return err;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ec68e1c05b85..466fbcc5cf77 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1381,8 +1381,10 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1381static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) 1381static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1382{ 1382{
1383 int err = 0; 1383 int err = 0;
1384
1384 UNIXCB(skb).pid = get_pid(scm->pid); 1385 UNIXCB(skb).pid = get_pid(scm->pid);
1385 UNIXCB(skb).cred = get_cred(scm->cred); 1386 if (scm->cred)
1387 UNIXCB(skb).cred = get_cred(scm->cred);
1386 UNIXCB(skb).fp = NULL; 1388 UNIXCB(skb).fp = NULL;
1387 if (scm->fp && send_fds) 1389 if (scm->fp && send_fds)
1388 err = unix_attach_fds(scm, skb); 1390 err = unix_attach_fds(scm, skb);
@@ -1392,6 +1394,24 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
1392} 1394}
1393 1395
1394/* 1396/*
1397 * Some apps rely on write() giving SCM_CREDENTIALS
1398 * We include credentials if source or destination socket
1399 * asserted SOCK_PASSCRED.
1400 */
1401static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1402 const struct sock *other)
1403{
1404 if (UNIXCB(skb).cred)
1405 return;
1406 if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1407 !other->sk_socket ||
1408 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1409 UNIXCB(skb).pid = get_pid(task_tgid(current));
1410 UNIXCB(skb).cred = get_current_cred();
1411 }
1412}
1413
1414/*
1395 * Send AF_UNIX data. 1415 * Send AF_UNIX data.
1396 */ 1416 */
1397 1417
@@ -1538,6 +1558,7 @@ restart:
1538 1558
1539 if (sock_flag(other, SOCK_RCVTSTAMP)) 1559 if (sock_flag(other, SOCK_RCVTSTAMP))
1540 __net_timestamp(skb); 1560 __net_timestamp(skb);
1561 maybe_add_creds(skb, sock, other);
1541 skb_queue_tail(&other->sk_receive_queue, skb); 1562 skb_queue_tail(&other->sk_receive_queue, skb);
1542 if (max_level > unix_sk(other)->recursion_level) 1563 if (max_level > unix_sk(other)->recursion_level)
1543 unix_sk(other)->recursion_level = max_level; 1564 unix_sk(other)->recursion_level = max_level;
@@ -1652,6 +1673,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1652 (other->sk_shutdown & RCV_SHUTDOWN)) 1673 (other->sk_shutdown & RCV_SHUTDOWN))
1653 goto pipe_err_free; 1674 goto pipe_err_free;
1654 1675
1676 maybe_add_creds(skb, sock, other);
1655 skb_queue_tail(&other->sk_receive_queue, skb); 1677 skb_queue_tail(&other->sk_receive_queue, skb);
1656 if (max_level > unix_sk(other)->recursion_level) 1678 if (max_level > unix_sk(other)->recursion_level)
1657 unix_sk(other)->recursion_level = max_level; 1679 unix_sk(other)->recursion_level = max_level;