diff options
author | Rainer Weikusat <rweikusat@mobileactivedefense.com> | 2016-01-03 13:56:38 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-01-04 23:22:49 -0500 |
commit | c845acb324aa85a39650a14e7696982ceea75dc1 (patch) | |
tree | 59ae338a572730c8a519eb69f50be9783b3a8e7d /net/unix/af_unix.c | |
parent | b5bdacf3bb027ba0af4d61b38ec289bfc8b64372 (diff) |
af_unix: Fix splice-bind deadlock
On 2015/11/06, Dmitry Vyukov reported a deadlock involving the splice
system call and AF_UNIX sockets,
http://lists.openwall.net/netdev/2015/11/06/24
The situation was analyzed as
(a while ago) A: socketpair()
B: splice() from a pipe to /mnt/regular_file
does sb_start_write() on /mnt
C: try to freeze /mnt
wait for B to finish with /mnt
A: bind() try to bind our socket to /mnt/new_socket_name
lock our socket, see it not bound yet
decide that it needs to create something in /mnt
try to do sb_start_write() on /mnt, block (it's
waiting for C).
D: splice() from the same pipe to our socket
lock the pipe, see that socket is connected
try to lock the socket, block waiting for A
B: get around to actually feeding a chunk from
pipe to file, try to lock the pipe. Deadlock.
on 2015/11/10 by Al Viro,
http://lists.openwall.net/netdev/2015/11/10/4
The patch fixes this by removing the kern_path_create related code from
unix_mknod and executing it as part of unix_bind prior acquiring the
readlock of the socket in question. This means that A (as used above)
will sb_start_write on /mnt before it acquires the readlock, hence, it
won't indirectly block B which first did a sb_start_write and then
waited for a thread trying to acquire the readlock. Consequently, A
being blocked by C waiting for B won't cause a deadlock anymore
(effectively, both A and B acquire two locks in opposite order in the
situation described above).
Dmitry Vyukov(<dvyukov@google.com>) tested the original patch.
Signed-off-by: Rainer Weikusat <rweikusat@mobileactivedefense.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/unix/af_unix.c')
-rw-r--r-- | net/unix/af_unix.c | 66 |
1 files changed, 40 insertions, 26 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a4631477cedf..ef05cd9403d4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
@@ -953,32 +953,20 @@ fail: | |||
953 | return NULL; | 953 | return NULL; |
954 | } | 954 | } |
955 | 955 | ||
956 | static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) | 956 | static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode, |
957 | struct path *res) | ||
957 | { | 958 | { |
958 | struct dentry *dentry; | 959 | int err; |
959 | struct path path; | ||
960 | int err = 0; | ||
961 | /* | ||
962 | * Get the parent directory, calculate the hash for last | ||
963 | * component. | ||
964 | */ | ||
965 | dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); | ||
966 | err = PTR_ERR(dentry); | ||
967 | if (IS_ERR(dentry)) | ||
968 | return err; | ||
969 | 960 | ||
970 | /* | 961 | err = security_path_mknod(path, dentry, mode, 0); |
971 | * All right, let's create it. | ||
972 | */ | ||
973 | err = security_path_mknod(&path, dentry, mode, 0); | ||
974 | if (!err) { | 962 | if (!err) { |
975 | err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0); | 963 | err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0); |
976 | if (!err) { | 964 | if (!err) { |
977 | res->mnt = mntget(path.mnt); | 965 | res->mnt = mntget(path->mnt); |
978 | res->dentry = dget(dentry); | 966 | res->dentry = dget(dentry); |
979 | } | 967 | } |
980 | } | 968 | } |
981 | done_path_create(&path, dentry); | 969 | |
982 | return err; | 970 | return err; |
983 | } | 971 | } |
984 | 972 | ||
@@ -989,10 +977,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | |||
989 | struct unix_sock *u = unix_sk(sk); | 977 | struct unix_sock *u = unix_sk(sk); |
990 | struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; | 978 | struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; |
991 | char *sun_path = sunaddr->sun_path; | 979 | char *sun_path = sunaddr->sun_path; |
992 | int err; | 980 | int err, name_err; |
993 | unsigned int hash; | 981 | unsigned int hash; |
994 | struct unix_address *addr; | 982 | struct unix_address *addr; |
995 | struct hlist_head *list; | 983 | struct hlist_head *list; |
984 | struct path path; | ||
985 | struct dentry *dentry; | ||
996 | 986 | ||
997 | err = -EINVAL; | 987 | err = -EINVAL; |
998 | if (sunaddr->sun_family != AF_UNIX) | 988 | if (sunaddr->sun_family != AF_UNIX) |
@@ -1008,14 +998,34 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | |||
1008 | goto out; | 998 | goto out; |
1009 | addr_len = err; | 999 | addr_len = err; |
1010 | 1000 | ||
1001 | name_err = 0; | ||
1002 | dentry = NULL; | ||
1003 | if (sun_path[0]) { | ||
1004 | /* Get the parent directory, calculate the hash for last | ||
1005 | * component. | ||
1006 | */ | ||
1007 | dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); | ||
1008 | |||
1009 | if (IS_ERR(dentry)) { | ||
1010 | /* delay report until after 'already bound' check */ | ||
1011 | name_err = PTR_ERR(dentry); | ||
1012 | dentry = NULL; | ||
1013 | } | ||
1014 | } | ||
1015 | |||
1011 | err = mutex_lock_interruptible(&u->readlock); | 1016 | err = mutex_lock_interruptible(&u->readlock); |
1012 | if (err) | 1017 | if (err) |
1013 | goto out; | 1018 | goto out_path; |
1014 | 1019 | ||
1015 | err = -EINVAL; | 1020 | err = -EINVAL; |
1016 | if (u->addr) | 1021 | if (u->addr) |
1017 | goto out_up; | 1022 | goto out_up; |
1018 | 1023 | ||
1024 | if (name_err) { | ||
1025 | err = name_err == -EEXIST ? -EADDRINUSE : name_err; | ||
1026 | goto out_up; | ||
1027 | } | ||
1028 | |||
1019 | err = -ENOMEM; | 1029 | err = -ENOMEM; |
1020 | addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); | 1030 | addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL); |
1021 | if (!addr) | 1031 | if (!addr) |
@@ -1026,11 +1036,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | |||
1026 | addr->hash = hash ^ sk->sk_type; | 1036 | addr->hash = hash ^ sk->sk_type; |
1027 | atomic_set(&addr->refcnt, 1); | 1037 | atomic_set(&addr->refcnt, 1); |
1028 | 1038 | ||
1029 | if (sun_path[0]) { | 1039 | if (dentry) { |
1030 | struct path path; | 1040 | struct path u_path; |
1031 | umode_t mode = S_IFSOCK | | 1041 | umode_t mode = S_IFSOCK | |
1032 | (SOCK_INODE(sock)->i_mode & ~current_umask()); | 1042 | (SOCK_INODE(sock)->i_mode & ~current_umask()); |
1033 | err = unix_mknod(sun_path, mode, &path); | 1043 | err = unix_mknod(dentry, &path, mode, &u_path); |
1034 | if (err) { | 1044 | if (err) { |
1035 | if (err == -EEXIST) | 1045 | if (err == -EEXIST) |
1036 | err = -EADDRINUSE; | 1046 | err = -EADDRINUSE; |
@@ -1038,9 +1048,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | |||
1038 | goto out_up; | 1048 | goto out_up; |
1039 | } | 1049 | } |
1040 | addr->hash = UNIX_HASH_SIZE; | 1050 | addr->hash = UNIX_HASH_SIZE; |
1041 | hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1); | 1051 | hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1); |
1042 | spin_lock(&unix_table_lock); | 1052 | spin_lock(&unix_table_lock); |
1043 | u->path = path; | 1053 | u->path = u_path; |
1044 | list = &unix_socket_table[hash]; | 1054 | list = &unix_socket_table[hash]; |
1045 | } else { | 1055 | } else { |
1046 | spin_lock(&unix_table_lock); | 1056 | spin_lock(&unix_table_lock); |
@@ -1063,6 +1073,10 @@ out_unlock: | |||
1063 | spin_unlock(&unix_table_lock); | 1073 | spin_unlock(&unix_table_lock); |
1064 | out_up: | 1074 | out_up: |
1065 | mutex_unlock(&u->readlock); | 1075 | mutex_unlock(&u->readlock); |
1076 | out_path: | ||
1077 | if (dentry) | ||
1078 | done_path_create(&path, dentry); | ||
1079 | |||
1066 | out: | 1080 | out: |
1067 | return err; | 1081 | return err; |
1068 | } | 1082 | } |