aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2018-01-30 13:01:40 -0500
committerPablo Neira Ayuso <pablo@netfilter.org>2018-01-31 10:37:47 -0500
commit3f34cfae1238848fd53f25e5c8fd59da57901f4b (patch)
tree5e4fbb38e28d3c593756dceff02cf6dc23b6504a
parent1a38956cce5eabd7b74f94bab70265e4df83165e (diff)
netfilter: on sockopt() acquire sock lock only in the required scope
Syzbot reported several deadlocks in the netfilter area caused by rtnl lock and socket lock being acquired with a different order on different code paths, leading to backtraces like the following one: ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc9+ #212 Not tainted ------------------------------------------------------ syzkaller041579/3682 is trying to acquire lock: (sk_lock-AF_INET6){+.+.}, at: [<000000008775e4dd>] lock_sock include/net/sock.h:1463 [inline] (sk_lock-AF_INET6){+.+.}, at: [<000000008775e4dd>] do_ipv6_setsockopt.isra.8+0x3c5/0x39d0 net/ipv6/ipv6_sockglue.c:167 but task is already holding lock: (rtnl_mutex){+.+.}, at: [<000000004342eaa9>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (rtnl_mutex){+.+.}: __mutex_lock_common kernel/locking/mutex.c:756 [inline] __mutex_lock+0x16f/0x1a80 kernel/locking/mutex.c:893 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908 rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 register_netdevice_notifier+0xad/0x860 net/core/dev.c:1607 tee_tg_check+0x1a0/0x280 net/netfilter/xt_TEE.c:106 xt_check_target+0x22c/0x7d0 net/netfilter/x_tables.c:845 check_target net/ipv6/netfilter/ip6_tables.c:538 [inline] find_check_entry.isra.7+0x935/0xcf0 net/ipv6/netfilter/ip6_tables.c:580 translate_table+0xf52/0x1690 net/ipv6/netfilter/ip6_tables.c:749 do_replace net/ipv6/netfilter/ip6_tables.c:1165 [inline] do_ip6t_set_ctl+0x370/0x5f0 net/ipv6/netfilter/ip6_tables.c:1691 nf_sockopt net/netfilter/nf_sockopt.c:106 [inline] nf_setsockopt+0x67/0xc0 net/netfilter/nf_sockopt.c:115 ipv6_setsockopt+0x115/0x150 net/ipv6/ipv6_sockglue.c:928 udpv6_setsockopt+0x45/0x80 net/ipv6/udp.c:1422 sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2978 SYSC_setsockopt net/socket.c:1849 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1828 entry_SYSCALL_64_fastpath+0x29/0xa0 -> #0 (sk_lock-AF_INET6){+.+.}: lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:3914 lock_sock_nested+0xc2/0x110 net/core/sock.c:2780 lock_sock include/net/sock.h:1463 [inline] do_ipv6_setsockopt.isra.8+0x3c5/0x39d0 net/ipv6/ipv6_sockglue.c:167 ipv6_setsockopt+0xd7/0x150 net/ipv6/ipv6_sockglue.c:922 udpv6_setsockopt+0x45/0x80 net/ipv6/udp.c:1422 sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2978 SYSC_setsockopt net/socket.c:1849 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1828 entry_SYSCALL_64_fastpath+0x29/0xa0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(rtnl_mutex); lock(sk_lock-AF_INET6); lock(rtnl_mutex); lock(sk_lock-AF_INET6); *** DEADLOCK *** 1 lock held by syzkaller041579/3682: #0: (rtnl_mutex){+.+.}, at: [<000000004342eaa9>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 The problem, as Florian noted, is that nf_setsockopt() is always called with the socket held, even if the lock itself is required only for very tight scopes and only for some operation. This patch addresses the issues moving the lock_sock() call only where really needed, namely in ipv*_getorigdst(), so that nf_setsockopt() does not need anymore to acquire both locks. Fixes: 22265a5c3c10 ("netfilter: xt_TEE: resolve oif using netdevice notifiers") Reported-by: syzbot+a4c2dc980ac1af699b36@syzkaller.appspotmail.com Suggested-by: Florian Westphal <fw@strlen.de> Signed-off-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
-rw-r--r--net/ipv4/ip_sockglue.c14
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c6
-rw-r--r--net/ipv6/ipv6_sockglue.c17
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c18
4 files changed, 26 insertions, 29 deletions
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 60fb1eb7d7d8..c7df4969f80a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1251,11 +1251,8 @@ int ip_setsockopt(struct sock *sk, int level,
1251 if (err == -ENOPROTOOPT && optname != IP_HDRINCL && 1251 if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
1252 optname != IP_IPSEC_POLICY && 1252 optname != IP_IPSEC_POLICY &&
1253 optname != IP_XFRM_POLICY && 1253 optname != IP_XFRM_POLICY &&
1254 !ip_mroute_opt(optname)) { 1254 !ip_mroute_opt(optname))
1255 lock_sock(sk);
1256 err = nf_setsockopt(sk, PF_INET, optname, optval, optlen); 1255 err = nf_setsockopt(sk, PF_INET, optname, optval, optlen);
1257 release_sock(sk);
1258 }
1259#endif 1256#endif
1260 return err; 1257 return err;
1261} 1258}
@@ -1280,12 +1277,9 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname,
1280 if (err == -ENOPROTOOPT && optname != IP_HDRINCL && 1277 if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
1281 optname != IP_IPSEC_POLICY && 1278 optname != IP_IPSEC_POLICY &&
1282 optname != IP_XFRM_POLICY && 1279 optname != IP_XFRM_POLICY &&
1283 !ip_mroute_opt(optname)) { 1280 !ip_mroute_opt(optname))
1284 lock_sock(sk); 1281 err = compat_nf_setsockopt(sk, PF_INET, optname, optval,
1285 err = compat_nf_setsockopt(sk, PF_INET, optname, 1282 optlen);
1286 optval, optlen);
1287 release_sock(sk);
1288 }
1289#endif 1283#endif
1290 return err; 1284 return err;
1291} 1285}
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 89af9d88ca21..a5727036a8a8 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -218,15 +218,19 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
218 struct nf_conntrack_tuple tuple; 218 struct nf_conntrack_tuple tuple;
219 219
220 memset(&tuple, 0, sizeof(tuple)); 220 memset(&tuple, 0, sizeof(tuple));
221
222 lock_sock(sk);
221 tuple.src.u3.ip = inet->inet_rcv_saddr; 223 tuple.src.u3.ip = inet->inet_rcv_saddr;
222 tuple.src.u.tcp.port = inet->inet_sport; 224 tuple.src.u.tcp.port = inet->inet_sport;
223 tuple.dst.u3.ip = inet->inet_daddr; 225 tuple.dst.u3.ip = inet->inet_daddr;
224 tuple.dst.u.tcp.port = inet->inet_dport; 226 tuple.dst.u.tcp.port = inet->inet_dport;
225 tuple.src.l3num = PF_INET; 227 tuple.src.l3num = PF_INET;
226 tuple.dst.protonum = sk->sk_protocol; 228 tuple.dst.protonum = sk->sk_protocol;
229 release_sock(sk);
227 230
228 /* We only do TCP and SCTP at the moment: is there a better way? */ 231 /* We only do TCP and SCTP at the moment: is there a better way? */
229 if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) { 232 if (tuple.dst.protonum != IPPROTO_TCP &&
233 tuple.dst.protonum != IPPROTO_SCTP) {
230 pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n"); 234 pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
231 return -ENOPROTOOPT; 235 return -ENOPROTOOPT;
232 } 236 }
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 2d4680e0376f..4b16c6dede4f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -923,12 +923,8 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
923#ifdef CONFIG_NETFILTER 923#ifdef CONFIG_NETFILTER
924 /* we need to exclude all possible ENOPROTOOPTs except default case */ 924 /* we need to exclude all possible ENOPROTOOPTs except default case */
925 if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY && 925 if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY &&
926 optname != IPV6_XFRM_POLICY) { 926 optname != IPV6_XFRM_POLICY)
927 lock_sock(sk); 927 err = nf_setsockopt(sk, PF_INET6, optname, optval, optlen);
928 err = nf_setsockopt(sk, PF_INET6, optname, optval,
929 optlen);
930 release_sock(sk);
931 }
932#endif 928#endif
933 return err; 929 return err;
934} 930}
@@ -958,12 +954,9 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
958#ifdef CONFIG_NETFILTER 954#ifdef CONFIG_NETFILTER
959 /* we need to exclude all possible ENOPROTOOPTs except default case */ 955 /* we need to exclude all possible ENOPROTOOPTs except default case */
960 if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY && 956 if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY &&
961 optname != IPV6_XFRM_POLICY) { 957 optname != IPV6_XFRM_POLICY)
962 lock_sock(sk); 958 err = compat_nf_setsockopt(sk, PF_INET6, optname, optval,
963 err = compat_nf_setsockopt(sk, PF_INET6, optname, 959 optlen);
964 optval, optlen);
965 release_sock(sk);
966 }
967#endif 960#endif
968 return err; 961 return err;
969} 962}
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 3b80a38f62b8..5863579800c1 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -226,20 +226,27 @@ static const struct nf_hook_ops ipv6_conntrack_ops[] = {
226static int 226static int
227ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) 227ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
228{ 228{
229 const struct inet_sock *inet = inet_sk(sk); 229 struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
230 const struct ipv6_pinfo *inet6 = inet6_sk(sk); 230 const struct ipv6_pinfo *inet6 = inet6_sk(sk);
231 const struct inet_sock *inet = inet_sk(sk);
231 const struct nf_conntrack_tuple_hash *h; 232 const struct nf_conntrack_tuple_hash *h;
232 struct sockaddr_in6 sin6; 233 struct sockaddr_in6 sin6;
233 struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
234 struct nf_conn *ct; 234 struct nf_conn *ct;
235 __be32 flow_label;
236 int bound_dev_if;
235 237
238 lock_sock(sk);
236 tuple.src.u3.in6 = sk->sk_v6_rcv_saddr; 239 tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
237 tuple.src.u.tcp.port = inet->inet_sport; 240 tuple.src.u.tcp.port = inet->inet_sport;
238 tuple.dst.u3.in6 = sk->sk_v6_daddr; 241 tuple.dst.u3.in6 = sk->sk_v6_daddr;
239 tuple.dst.u.tcp.port = inet->inet_dport; 242 tuple.dst.u.tcp.port = inet->inet_dport;
240 tuple.dst.protonum = sk->sk_protocol; 243 tuple.dst.protonum = sk->sk_protocol;
244 bound_dev_if = sk->sk_bound_dev_if;
245 flow_label = inet6->flow_label;
246 release_sock(sk);
241 247
242 if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) 248 if (tuple.dst.protonum != IPPROTO_TCP &&
249 tuple.dst.protonum != IPPROTO_SCTP)
243 return -ENOPROTOOPT; 250 return -ENOPROTOOPT;
244 251
245 if (*len < 0 || (unsigned int) *len < sizeof(sin6)) 252 if (*len < 0 || (unsigned int) *len < sizeof(sin6))
@@ -257,14 +264,13 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
257 264
258 sin6.sin6_family = AF_INET6; 265 sin6.sin6_family = AF_INET6;
259 sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; 266 sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
260 sin6.sin6_flowinfo = inet6->flow_label & IPV6_FLOWINFO_MASK; 267 sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK;
261 memcpy(&sin6.sin6_addr, 268 memcpy(&sin6.sin6_addr,
262 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6, 269 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
263 sizeof(sin6.sin6_addr)); 270 sizeof(sin6.sin6_addr));
264 271
265 nf_ct_put(ct); 272 nf_ct_put(ct);
266 sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, 273 sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if);
267 sk->sk_bound_dev_if);
268 return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; 274 return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
269} 275}
270 276