aboutsummaryrefslogtreecommitdiffstats
path: root/net/tipc/socket.c
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2017-03-09 03:09:05 -0500
committerDavid S. Miller <davem@davemloft.net>2017-03-09 21:23:27 -0500
commitcdfbabfb2f0ce983fdaa42f20e5f7842178fc01e (patch)
tree30aae04d074592571b8cb36d001d5d10c7181652 /net/tipc/socket.c
parent81dca07b3be974e81cff250a19282473d67a870b (diff)
net: Work around lockdep limitation in sockets that use sockets
Lockdep issues a circular dependency warning when AFS issues an operation through AF_RXRPC from a context in which the VFS/VM holds the mmap_sem. The theory lockdep comes up with is as follows: (1) If the pagefault handler decides it needs to read pages from AFS, it calls AFS with mmap_sem held and AFS begins an AF_RXRPC call, but creating a call requires the socket lock: mmap_sem must be taken before sk_lock-AF_RXRPC (2) afs_open_socket() opens an AF_RXRPC socket and binds it. rxrpc_bind() binds the underlying UDP socket whilst holding its socket lock. inet_bind() takes its own socket lock: sk_lock-AF_RXRPC must be taken before sk_lock-AF_INET (3) Reading from a TCP socket into a userspace buffer might cause a fault and thus cause the kernel to take the mmap_sem, but the TCP socket is locked whilst doing this: sk_lock-AF_INET must be taken before mmap_sem However, lockdep's theory is wrong in this instance because it deals only with lock classes and not individual locks. The AF_INET lock in (2) isn't really equivalent to the AF_INET lock in (3) as the former deals with a socket entirely internal to the kernel that never sees userspace. This is a limitation in the design of lockdep. Fix the general case by: (1) Double up all the locking keys used in sockets so that one set are used if the socket is created by userspace and the other set is used if the socket is created by the kernel. (2) Store the kern parameter passed to sk_alloc() in a variable in the sock struct (sk_kern_sock). This informs sock_lock_init(), sock_init_data() and sk_clone_lock() as to the lock keys to be used. Note that the child created by sk_clone_lock() inherits the parent's kern setting. (3) Add a 'kern' parameter to ->accept() that is analogous to the one passed in to ->create() that distinguishes whether kernel_accept() or sys_accept4() was the caller and can be passed to sk_alloc(). Note that a lot of accept functions merely dequeue an already allocated socket. I haven't touched these as the new socket already exists before we get the parameter. Note also that there are a couple of places where I've made the accepted socket unconditionally kernel-based: irda_accept() rds_rcp_accept_one() tcp_accept_from_sock() because they follow a sock_create_kern() and accept off of that. Whilst creating this, I noticed that lustre and ocfs don't create sockets through sock_create_kern() and thus they aren't marked as for-kernel, though they appear to be internal. I wonder if these should do that so that they use the new set of lock keys. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/tipc/socket.c')
-rw-r--r--net/tipc/socket.c8
1 files changed, 5 insertions, 3 deletions
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 43e4045e72bc..7130e73bd42c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -115,7 +115,8 @@ static void tipc_data_ready(struct sock *sk);
115static void tipc_write_space(struct sock *sk); 115static void tipc_write_space(struct sock *sk);
116static void tipc_sock_destruct(struct sock *sk); 116static void tipc_sock_destruct(struct sock *sk);
117static int tipc_release(struct socket *sock); 117static int tipc_release(struct socket *sock);
118static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags); 118static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
119 bool kern);
119static void tipc_sk_timeout(unsigned long data); 120static void tipc_sk_timeout(unsigned long data);
120static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, 121static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
121 struct tipc_name_seq const *seq); 122 struct tipc_name_seq const *seq);
@@ -2029,7 +2030,8 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
2029 * 2030 *
2030 * Returns 0 on success, errno otherwise 2031 * Returns 0 on success, errno otherwise
2031 */ 2032 */
2032static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) 2033static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
2034 bool kern)
2033{ 2035{
2034 struct sock *new_sk, *sk = sock->sk; 2036 struct sock *new_sk, *sk = sock->sk;
2035 struct sk_buff *buf; 2037 struct sk_buff *buf;
@@ -2051,7 +2053,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
2051 2053
2052 buf = skb_peek(&sk->sk_receive_queue); 2054 buf = skb_peek(&sk->sk_receive_queue);
2053 2055
2054 res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0); 2056 res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern);
2055 if (res) 2057 if (res)
2056 goto exit; 2058 goto exit;
2057 security_sk_clone(sock->sk, new_sock->sk); 2059 security_sk_clone(sock->sk, new_sock->sk);