diff options
author | Eric Dumazet <edumazet@google.com> | 2015-12-03 00:53:57 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-12-03 11:32:06 -0500 |
commit | 6bd4f355df2eae80b8a5c7b097371cd1e05f20d5 (patch) | |
tree | e903f55cdcc87ed834790e4f257d407f886a9d23 | |
parent | c836a8ba93869d6a0290a6ae0047fbef09066871 (diff) |
ipv6: kill sk_dst_lock
While testing the np->opt RCU conversion, I found that UDP/IPv6 was
using a mixture of xchg() and sk_dst_lock to protect concurrent changes
to sk->sk_dst_cache, leading to possible corruptions and crashes.
ip6_sk_dst_lookup_flow() uses sk_dst_check() anyway, so the simplest
way to fix the mess is to remove sk_dst_lock completely, as we did for
IPv4.
__ip6_dst_store() and ip6_dst_store() share same implementation.
sk_setup_caps() being called with socket lock being held or not,
we have to use sk_dst_set() instead of __sk_dst_set()
Note that I had to move the "np->dst_cookie = rt6_get_cookie(rt);"
in ip6_dst_store() before the sk_setup_caps(sk, dst) call.
This is because ip6_dst_store() can be called from process context,
without any lock held.
As soon as the dst is installed in sk->sk_dst_cache, dst can be freed
from another cpu doing a concurrent ip6_dst_store()
Doing the dst dereference before doing the install is needed to make
sure no use after free would trigger.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/ip6_route.h | 17 | ||||
-rw-r--r-- | include/net/sock.h | 3 | ||||
-rw-r--r-- | net/core/sock.c | 4 | ||||
-rw-r--r-- | net/dccp/ipv6.c | 4 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 2 | ||||
-rw-r--r-- | net/ipv6/icmp.c | 14 | ||||
-rw-r--r-- | net/ipv6/inet6_connection_sock.c | 10 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 4 |
8 files changed, 12 insertions, 46 deletions
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 2bfb2ad2fab1..877f682989b8 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h | |||
@@ -133,27 +133,18 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); | |||
133 | /* | 133 | /* |
134 | * Store a destination cache entry in a socket | 134 | * Store a destination cache entry in a socket |
135 | */ | 135 | */ |
136 | static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst, | 136 | static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, |
137 | const struct in6_addr *daddr, | 137 | const struct in6_addr *daddr, |
138 | const struct in6_addr *saddr) | 138 | const struct in6_addr *saddr) |
139 | { | 139 | { |
140 | struct ipv6_pinfo *np = inet6_sk(sk); | 140 | struct ipv6_pinfo *np = inet6_sk(sk); |
141 | struct rt6_info *rt = (struct rt6_info *) dst; | ||
142 | 141 | ||
142 | np->dst_cookie = rt6_get_cookie((struct rt6_info *)dst); | ||
143 | sk_setup_caps(sk, dst); | 143 | sk_setup_caps(sk, dst); |
144 | np->daddr_cache = daddr; | 144 | np->daddr_cache = daddr; |
145 | #ifdef CONFIG_IPV6_SUBTREES | 145 | #ifdef CONFIG_IPV6_SUBTREES |
146 | np->saddr_cache = saddr; | 146 | np->saddr_cache = saddr; |
147 | #endif | 147 | #endif |
148 | np->dst_cookie = rt6_get_cookie(rt); | ||
149 | } | ||
150 | |||
151 | static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, | ||
152 | struct in6_addr *daddr, struct in6_addr *saddr) | ||
153 | { | ||
154 | spin_lock(&sk->sk_dst_lock); | ||
155 | __ip6_dst_store(sk, dst, daddr, saddr); | ||
156 | spin_unlock(&sk->sk_dst_lock); | ||
157 | } | 148 | } |
158 | 149 | ||
159 | static inline bool ipv6_unicast_destination(const struct sk_buff *skb) | 150 | static inline bool ipv6_unicast_destination(const struct sk_buff *skb) |
diff --git a/include/net/sock.h b/include/net/sock.h index 0434138c5f95..52d27ee924f4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -254,7 +254,6 @@ struct cg_proto; | |||
254 | * @sk_wq: sock wait queue and async head | 254 | * @sk_wq: sock wait queue and async head |
255 | * @sk_rx_dst: receive input route used by early demux | 255 | * @sk_rx_dst: receive input route used by early demux |
256 | * @sk_dst_cache: destination cache | 256 | * @sk_dst_cache: destination cache |
257 | * @sk_dst_lock: destination cache lock | ||
258 | * @sk_policy: flow policy | 257 | * @sk_policy: flow policy |
259 | * @sk_receive_queue: incoming packets | 258 | * @sk_receive_queue: incoming packets |
260 | * @sk_wmem_alloc: transmit queue bytes committed | 259 | * @sk_wmem_alloc: transmit queue bytes committed |
@@ -393,7 +392,7 @@ struct sock { | |||
393 | #endif | 392 | #endif |
394 | struct dst_entry *sk_rx_dst; | 393 | struct dst_entry *sk_rx_dst; |
395 | struct dst_entry __rcu *sk_dst_cache; | 394 | struct dst_entry __rcu *sk_dst_cache; |
396 | spinlock_t sk_dst_lock; | 395 | /* Note: 32bit hole on 64bit arches */ |
397 | atomic_t sk_wmem_alloc; | 396 | atomic_t sk_wmem_alloc; |
398 | atomic_t sk_omem_alloc; | 397 | atomic_t sk_omem_alloc; |
399 | int sk_sndbuf; | 398 | int sk_sndbuf; |
diff --git a/net/core/sock.c b/net/core/sock.c index 9d79569935a3..e31dfcee1729 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1530,7 +1530,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) | |||
1530 | skb_queue_head_init(&newsk->sk_receive_queue); | 1530 | skb_queue_head_init(&newsk->sk_receive_queue); |
1531 | skb_queue_head_init(&newsk->sk_write_queue); | 1531 | skb_queue_head_init(&newsk->sk_write_queue); |
1532 | 1532 | ||
1533 | spin_lock_init(&newsk->sk_dst_lock); | ||
1534 | rwlock_init(&newsk->sk_callback_lock); | 1533 | rwlock_init(&newsk->sk_callback_lock); |
1535 | lockdep_set_class_and_name(&newsk->sk_callback_lock, | 1534 | lockdep_set_class_and_name(&newsk->sk_callback_lock, |
1536 | af_callback_keys + newsk->sk_family, | 1535 | af_callback_keys + newsk->sk_family, |
@@ -1607,7 +1606,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) | |||
1607 | { | 1606 | { |
1608 | u32 max_segs = 1; | 1607 | u32 max_segs = 1; |
1609 | 1608 | ||
1610 | __sk_dst_set(sk, dst); | 1609 | sk_dst_set(sk, dst); |
1611 | sk->sk_route_caps = dst->dev->features; | 1610 | sk->sk_route_caps = dst->dev->features; |
1612 | if (sk->sk_route_caps & NETIF_F_GSO) | 1611 | if (sk->sk_route_caps & NETIF_F_GSO) |
1613 | sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; | 1612 | sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; |
@@ -2388,7 +2387,6 @@ void sock_init_data(struct socket *sock, struct sock *sk) | |||
2388 | } else | 2387 | } else |
2389 | sk->sk_wq = NULL; | 2388 | sk->sk_wq = NULL; |
2390 | 2389 | ||
2391 | spin_lock_init(&sk->sk_dst_lock); | ||
2392 | rwlock_init(&sk->sk_callback_lock); | 2390 | rwlock_init(&sk->sk_callback_lock); |
2393 | lockdep_set_class_and_name(&sk->sk_callback_lock, | 2391 | lockdep_set_class_and_name(&sk->sk_callback_lock, |
2394 | af_callback_keys + sk->sk_family, | 2392 | af_callback_keys + sk->sk_family, |
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index e7e0b9bc2a43..9c6d0508e63a 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c | |||
@@ -459,7 +459,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, | |||
459 | * comment in that function for the gory details. -acme | 459 | * comment in that function for the gory details. -acme |
460 | */ | 460 | */ |
461 | 461 | ||
462 | __ip6_dst_store(newsk, dst, NULL, NULL); | 462 | ip6_dst_store(newsk, dst, NULL, NULL); |
463 | newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | | 463 | newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | |
464 | NETIF_F_TSO); | 464 | NETIF_F_TSO); |
465 | newdp6 = (struct dccp6_sock *)newsk; | 465 | newdp6 = (struct dccp6_sock *)newsk; |
@@ -883,7 +883,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
883 | np->saddr = *saddr; | 883 | np->saddr = *saddr; |
884 | inet->inet_rcv_saddr = LOOPBACK4_IPV6; | 884 | inet->inet_rcv_saddr = LOOPBACK4_IPV6; |
885 | 885 | ||
886 | __ip6_dst_store(sk, dst, NULL, NULL); | 886 | ip6_dst_store(sk, dst, NULL, NULL); |
887 | 887 | ||
888 | icsk->icsk_ext_hdr_len = 0; | 888 | icsk->icsk_ext_hdr_len = 0; |
889 | if (opt) | 889 | if (opt) |
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 38d66ddfb937..8ec0df75f1c4 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c | |||
@@ -673,7 +673,7 @@ int inet6_sk_rebuild_header(struct sock *sk) | |||
673 | return PTR_ERR(dst); | 673 | return PTR_ERR(dst); |
674 | } | 674 | } |
675 | 675 | ||
676 | __ip6_dst_store(sk, dst, NULL, NULL); | 676 | ip6_dst_store(sk, dst, NULL, NULL); |
677 | } | 677 | } |
678 | 678 | ||
679 | return 0; | 679 | return 0; |
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 36c5a98b0472..0a37ddc7af51 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c | |||
@@ -834,11 +834,6 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, | |||
834 | security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); | 834 | security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); |
835 | } | 835 | } |
836 | 836 | ||
837 | /* | ||
838 | * Special lock-class for __icmpv6_sk: | ||
839 | */ | ||
840 | static struct lock_class_key icmpv6_socket_sk_dst_lock_key; | ||
841 | |||
842 | static int __net_init icmpv6_sk_init(struct net *net) | 837 | static int __net_init icmpv6_sk_init(struct net *net) |
843 | { | 838 | { |
844 | struct sock *sk; | 839 | struct sock *sk; |
@@ -860,15 +855,6 @@ static int __net_init icmpv6_sk_init(struct net *net) | |||
860 | 855 | ||
861 | net->ipv6.icmp_sk[i] = sk; | 856 | net->ipv6.icmp_sk[i] = sk; |
862 | 857 | ||
863 | /* | ||
864 | * Split off their lock-class, because sk->sk_dst_lock | ||
865 | * gets used from softirqs, which is safe for | ||
866 | * __icmpv6_sk (because those never get directly used | ||
867 | * via userspace syscalls), but unsafe for normal sockets. | ||
868 | */ | ||
869 | lockdep_set_class(&sk->sk_dst_lock, | ||
870 | &icmpv6_socket_sk_dst_lock_key); | ||
871 | |||
872 | /* Enough space for 2 64K ICMP packets, including | 858 | /* Enough space for 2 64K ICMP packets, including |
873 | * sk_buff struct overhead. | 859 | * sk_buff struct overhead. |
874 | */ | 860 | */ |
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 3ff5208772bb..a7ca2cde2ecb 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c | |||
@@ -111,14 +111,6 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) | |||
111 | EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); | 111 | EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); |
112 | 112 | ||
113 | static inline | 113 | static inline |
114 | void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst, | ||
115 | const struct in6_addr *daddr, | ||
116 | const struct in6_addr *saddr) | ||
117 | { | ||
118 | __ip6_dst_store(sk, dst, daddr, saddr); | ||
119 | } | ||
120 | |||
121 | static inline | ||
122 | struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) | 114 | struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) |
123 | { | 115 | { |
124 | return __sk_dst_check(sk, cookie); | 116 | return __sk_dst_check(sk, cookie); |
@@ -153,7 +145,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, | |||
153 | dst = ip6_dst_lookup_flow(sk, fl6, final_p); | 145 | dst = ip6_dst_lookup_flow(sk, fl6, final_p); |
154 | 146 | ||
155 | if (!IS_ERR(dst)) | 147 | if (!IS_ERR(dst)) |
156 | __inet6_csk_dst_store(sk, dst, NULL, NULL); | 148 | ip6_dst_store(sk, dst, NULL, NULL); |
157 | } | 149 | } |
158 | return dst; | 150 | return dst; |
159 | } | 151 | } |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6a50bb4a0dae..e7aab561b7b4 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -257,7 +257,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, | |||
257 | inet->inet_rcv_saddr = LOOPBACK4_IPV6; | 257 | inet->inet_rcv_saddr = LOOPBACK4_IPV6; |
258 | 258 | ||
259 | sk->sk_gso_type = SKB_GSO_TCPV6; | 259 | sk->sk_gso_type = SKB_GSO_TCPV6; |
260 | __ip6_dst_store(sk, dst, NULL, NULL); | 260 | ip6_dst_store(sk, dst, NULL, NULL); |
261 | 261 | ||
262 | if (tcp_death_row.sysctl_tw_recycle && | 262 | if (tcp_death_row.sysctl_tw_recycle && |
263 | !tp->rx_opt.ts_recent_stamp && | 263 | !tp->rx_opt.ts_recent_stamp && |
@@ -1060,7 +1060,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * | |||
1060 | */ | 1060 | */ |
1061 | 1061 | ||
1062 | newsk->sk_gso_type = SKB_GSO_TCPV6; | 1062 | newsk->sk_gso_type = SKB_GSO_TCPV6; |
1063 | __ip6_dst_store(newsk, dst, NULL, NULL); | 1063 | ip6_dst_store(newsk, dst, NULL, NULL); |
1064 | inet6_sk_rx_dst_set(newsk, skb); | 1064 | inet6_sk_rx_dst_set(newsk, skb); |
1065 | 1065 | ||
1066 | newtcp6sk = (struct tcp6_sock *)newsk; | 1066 | newtcp6sk = (struct tcp6_sock *)newsk; |