aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2017-03-09 03:09:05 -0500
committerDavid S. Miller <davem@davemloft.net>2017-03-09 21:23:27 -0500
commitcdfbabfb2f0ce983fdaa42f20e5f7842178fc01e (patch)
tree30aae04d074592571b8cb36d001d5d10c7181652 /include
parent81dca07b3be974e81cff250a19282473d67a870b (diff)
net: Work around lockdep limitation in sockets that use sockets
Lockdep issues a circular dependency warning when AFS issues an operation through AF_RXRPC from a context in which the VFS/VM holds the mmap_sem. The theory lockdep comes up with is as follows: (1) If the pagefault handler decides it needs to read pages from AFS, it calls AFS with mmap_sem held and AFS begins an AF_RXRPC call, but creating a call requires the socket lock: mmap_sem must be taken before sk_lock-AF_RXRPC (2) afs_open_socket() opens an AF_RXRPC socket and binds it. rxrpc_bind() binds the underlying UDP socket whilst holding its socket lock. inet_bind() takes its own socket lock: sk_lock-AF_RXRPC must be taken before sk_lock-AF_INET (3) Reading from a TCP socket into a userspace buffer might cause a fault and thus cause the kernel to take the mmap_sem, but the TCP socket is locked whilst doing this: sk_lock-AF_INET must be taken before mmap_sem However, lockdep's theory is wrong in this instance because it deals only with lock classes and not individual locks. The AF_INET lock in (2) isn't really equivalent to the AF_INET lock in (3) as the former deals with a socket entirely internal to the kernel that never sees userspace. This is a limitation in the design of lockdep. Fix the general case by: (1) Double up all the locking keys used in sockets so that one set are used if the socket is created by userspace and the other set is used if the socket is created by the kernel. (2) Store the kern parameter passed to sk_alloc() in a variable in the sock struct (sk_kern_sock). This informs sock_lock_init(), sock_init_data() and sk_clone_lock() as to the lock keys to be used. Note that the child created by sk_clone_lock() inherits the parent's kern setting. (3) Add a 'kern' parameter to ->accept() that is analogous to the one passed in to ->create() that distinguishes whether kernel_accept() or sys_accept4() was the caller and can be passed to sk_alloc(). Note that a lot of accept functions merely dequeue an already allocated socket. I haven't touched these as the new socket already exists before we get the parameter. Note also that there are a couple of places where I've made the accepted socket unconditionally kernel-based: irda_accept() rds_rcp_accept_one() tcp_accept_from_sock() because they follow a sock_create_kern() and accept off of that. Whilst creating this, I noticed that lustre and ocfs don't create sockets through sock_create_kern() and thus they aren't marked as for-kernel, though they appear to be internal. I wonder if these should do that so that they use the new set of lock keys. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/crypto/if_alg.h2
-rw-r--r--include/linux/net.h2
-rw-r--r--include/net/inet_common.h3
-rw-r--r--include/net/inet_connection_sock.h2
-rw-r--r--include/net/sctp/structs.h3
-rw-r--r--include/net/sock.h9
6 files changed, 13 insertions, 8 deletions
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index a2bfd7843f18..e2b9c6fe2714 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -73,7 +73,7 @@ int af_alg_unregister_type(const struct af_alg_type *type);
73 73
74int af_alg_release(struct socket *sock); 74int af_alg_release(struct socket *sock);
75void af_alg_release_parent(struct sock *sk); 75void af_alg_release_parent(struct sock *sk);
76int af_alg_accept(struct sock *sk, struct socket *newsock); 76int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern);
77 77
78int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len); 78int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
79void af_alg_free_sg(struct af_alg_sgl *sgl); 79void af_alg_free_sg(struct af_alg_sgl *sgl);
diff --git a/include/linux/net.h b/include/linux/net.h
index cd0c8bd0a1de..0620f5e18c96 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -146,7 +146,7 @@ struct proto_ops {
146 int (*socketpair)(struct socket *sock1, 146 int (*socketpair)(struct socket *sock1,
147 struct socket *sock2); 147 struct socket *sock2);
148 int (*accept) (struct socket *sock, 148 int (*accept) (struct socket *sock,
149 struct socket *newsock, int flags); 149 struct socket *newsock, int flags, bool kern);
150 int (*getname) (struct socket *sock, 150 int (*getname) (struct socket *sock,
151 struct sockaddr *addr, 151 struct sockaddr *addr,
152 int *sockaddr_len, int peer); 152 int *sockaddr_len, int peer);
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index b7952d55b9c0..f39ae697347f 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -20,7 +20,8 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
20 int addr_len, int flags, int is_sendmsg); 20 int addr_len, int flags, int is_sendmsg);
21int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, 21int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
22 int addr_len, int flags); 22 int addr_len, int flags);
23int inet_accept(struct socket *sock, struct socket *newsock, int flags); 23int inet_accept(struct socket *sock, struct socket *newsock, int flags,
24 bool kern);
24int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size); 25int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
25ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, 26ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
26 size_t size, int flags); 27 size_t size, int flags);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 826f198374f8..c7a577976bec 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -258,7 +258,7 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk,
258 return (unsigned long)min_t(u64, when, max_when); 258 return (unsigned long)min_t(u64, when, max_when);
259} 259}
260 260
261struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); 261struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern);
262 262
263int inet_csk_get_port(struct sock *sk, unsigned short snum); 263int inet_csk_get_port(struct sock *sk, unsigned short snum);
264 264
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index a244db5e5ff7..07a0b128625a 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -476,7 +476,8 @@ struct sctp_pf {
476 int (*send_verify) (struct sctp_sock *, union sctp_addr *); 476 int (*send_verify) (struct sctp_sock *, union sctp_addr *);
477 int (*supported_addrs)(const struct sctp_sock *, __be16 *); 477 int (*supported_addrs)(const struct sctp_sock *, __be16 *);
478 struct sock *(*create_accept_sk) (struct sock *sk, 478 struct sock *(*create_accept_sk) (struct sock *sk,
479 struct sctp_association *asoc); 479 struct sctp_association *asoc,
480 bool kern);
480 int (*addr_to_user)(struct sctp_sock *sk, union sctp_addr *addr); 481 int (*addr_to_user)(struct sctp_sock *sk, union sctp_addr *addr);
481 void (*to_sk_saddr)(union sctp_addr *, struct sock *sk); 482 void (*to_sk_saddr)(union sctp_addr *, struct sock *sk);
482 void (*to_sk_daddr)(union sctp_addr *, struct sock *sk); 483 void (*to_sk_daddr)(union sctp_addr *, struct sock *sk);
diff --git a/include/net/sock.h b/include/net/sock.h
index 5e5997654db6..03252d53975d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -236,6 +236,7 @@ struct sock_common {
236 * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN 236 * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
237 * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings 237 * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
238 * @sk_lock: synchronizer 238 * @sk_lock: synchronizer
239 * @sk_kern_sock: True if sock is using kernel lock classes
239 * @sk_rcvbuf: size of receive buffer in bytes 240 * @sk_rcvbuf: size of receive buffer in bytes
240 * @sk_wq: sock wait queue and async head 241 * @sk_wq: sock wait queue and async head
241 * @sk_rx_dst: receive input route used by early demux 242 * @sk_rx_dst: receive input route used by early demux
@@ -430,7 +431,8 @@ struct sock {
430#endif 431#endif
431 432
432 kmemcheck_bitfield_begin(flags); 433 kmemcheck_bitfield_begin(flags);
433 unsigned int sk_padding : 2, 434 unsigned int sk_padding : 1,
435 sk_kern_sock : 1,
434 sk_no_check_tx : 1, 436 sk_no_check_tx : 1,
435 sk_no_check_rx : 1, 437 sk_no_check_rx : 1,
436 sk_userlocks : 4, 438 sk_userlocks : 4,
@@ -1015,7 +1017,8 @@ struct proto {
1015 int addr_len); 1017 int addr_len);
1016 int (*disconnect)(struct sock *sk, int flags); 1018 int (*disconnect)(struct sock *sk, int flags);
1017 1019
1018 struct sock * (*accept)(struct sock *sk, int flags, int *err); 1020 struct sock * (*accept)(struct sock *sk, int flags, int *err,
1021 bool kern);
1019 1022
1020 int (*ioctl)(struct sock *sk, int cmd, 1023 int (*ioctl)(struct sock *sk, int cmd,
1021 unsigned long arg); 1024 unsigned long arg);
@@ -1573,7 +1576,7 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
1573int sock_no_bind(struct socket *, struct sockaddr *, int); 1576int sock_no_bind(struct socket *, struct sockaddr *, int);
1574int sock_no_connect(struct socket *, struct sockaddr *, int, int); 1577int sock_no_connect(struct socket *, struct sockaddr *, int, int);
1575int sock_no_socketpair(struct socket *, struct socket *); 1578int sock_no_socketpair(struct socket *, struct socket *);
1576int sock_no_accept(struct socket *, struct socket *, int); 1579int sock_no_accept(struct socket *, struct socket *, int, bool);
1577int sock_no_getname(struct socket *, struct sockaddr *, int *, int); 1580int sock_no_getname(struct socket *, struct sockaddr *, int *, int);
1578unsigned int sock_no_poll(struct file *, struct socket *, 1581unsigned int sock_no_poll(struct file *, struct socket *,
1579 struct poll_table_struct *); 1582 struct poll_table_struct *);