aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2015-12-03 00:53:57 -0500
committerDavid S. Miller <davem@davemloft.net>2015-12-03 11:32:06 -0500
commit6bd4f355df2eae80b8a5c7b097371cd1e05f20d5 (patch)
treee903f55cdcc87ed834790e4f257d407f886a9d23
parentc836a8ba93869d6a0290a6ae0047fbef09066871 (diff)
ipv6: kill sk_dst_lock
While testing the np->opt RCU conversion, I found that UDP/IPv6 was using a mixture of xchg() and sk_dst_lock to protect concurrent changes to sk->sk_dst_cache, leading to possible corruptions and crashes. ip6_sk_dst_lookup_flow() uses sk_dst_check() anyway, so the simplest way to fix the mess is to remove sk_dst_lock completely, as we did for IPv4. __ip6_dst_store() and ip6_dst_store() share same implementation. sk_setup_caps() being called with socket lock being held or not, we have to use sk_dst_set() instead of __sk_dst_set() Note that I had to move the "np->dst_cookie = rt6_get_cookie(rt);" in ip6_dst_store() before the sk_setup_caps(sk, dst) call. This is because ip6_dst_store() can be called from process context, without any lock held. As soon as the dst is installed in sk->sk_dst_cache, dst can be freed from another cpu doing a concurrent ip6_dst_store() Doing the dst dereference before doing the install is needed to make sure no use after free would trigger. Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Dmitry Vyukov <dvyukov@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip6_route.h17
-rw-r--r--include/net/sock.h3
-rw-r--r--net/core/sock.c4
-rw-r--r--net/dccp/ipv6.c4
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/icmp.c14
-rw-r--r--net/ipv6/inet6_connection_sock.c10
-rw-r--r--net/ipv6/tcp_ipv6.c4
8 files changed, 12 insertions, 46 deletions
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 2bfb2ad2fab1..877f682989b8 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -133,27 +133,18 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
133/* 133/*
134 * Store a destination cache entry in a socket 134 * Store a destination cache entry in a socket
135 */ 135 */
136static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst, 136static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
137 const struct in6_addr *daddr, 137 const struct in6_addr *daddr,
138 const struct in6_addr *saddr) 138 const struct in6_addr *saddr)
139{ 139{
140 struct ipv6_pinfo *np = inet6_sk(sk); 140 struct ipv6_pinfo *np = inet6_sk(sk);
141 struct rt6_info *rt = (struct rt6_info *) dst;
142 141
142 np->dst_cookie = rt6_get_cookie((struct rt6_info *)dst);
143 sk_setup_caps(sk, dst); 143 sk_setup_caps(sk, dst);
144 np->daddr_cache = daddr; 144 np->daddr_cache = daddr;
145#ifdef CONFIG_IPV6_SUBTREES 145#ifdef CONFIG_IPV6_SUBTREES
146 np->saddr_cache = saddr; 146 np->saddr_cache = saddr;
147#endif 147#endif
148 np->dst_cookie = rt6_get_cookie(rt);
149}
150
151static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
152 struct in6_addr *daddr, struct in6_addr *saddr)
153{
154 spin_lock(&sk->sk_dst_lock);
155 __ip6_dst_store(sk, dst, daddr, saddr);
156 spin_unlock(&sk->sk_dst_lock);
157} 148}
158 149
159static inline bool ipv6_unicast_destination(const struct sk_buff *skb) 150static inline bool ipv6_unicast_destination(const struct sk_buff *skb)
diff --git a/include/net/sock.h b/include/net/sock.h
index 0434138c5f95..52d27ee924f4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -254,7 +254,6 @@ struct cg_proto;
254 * @sk_wq: sock wait queue and async head 254 * @sk_wq: sock wait queue and async head
255 * @sk_rx_dst: receive input route used by early demux 255 * @sk_rx_dst: receive input route used by early demux
256 * @sk_dst_cache: destination cache 256 * @sk_dst_cache: destination cache
257 * @sk_dst_lock: destination cache lock
258 * @sk_policy: flow policy 257 * @sk_policy: flow policy
259 * @sk_receive_queue: incoming packets 258 * @sk_receive_queue: incoming packets
260 * @sk_wmem_alloc: transmit queue bytes committed 259 * @sk_wmem_alloc: transmit queue bytes committed
@@ -393,7 +392,7 @@ struct sock {
393#endif 392#endif
394 struct dst_entry *sk_rx_dst; 393 struct dst_entry *sk_rx_dst;
395 struct dst_entry __rcu *sk_dst_cache; 394 struct dst_entry __rcu *sk_dst_cache;
396 spinlock_t sk_dst_lock; 395 /* Note: 32bit hole on 64bit arches */
397 atomic_t sk_wmem_alloc; 396 atomic_t sk_wmem_alloc;
398 atomic_t sk_omem_alloc; 397 atomic_t sk_omem_alloc;
399 int sk_sndbuf; 398 int sk_sndbuf;
diff --git a/net/core/sock.c b/net/core/sock.c
index 9d79569935a3..e31dfcee1729 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1530,7 +1530,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1530 skb_queue_head_init(&newsk->sk_receive_queue); 1530 skb_queue_head_init(&newsk->sk_receive_queue);
1531 skb_queue_head_init(&newsk->sk_write_queue); 1531 skb_queue_head_init(&newsk->sk_write_queue);
1532 1532
1533 spin_lock_init(&newsk->sk_dst_lock);
1534 rwlock_init(&newsk->sk_callback_lock); 1533 rwlock_init(&newsk->sk_callback_lock);
1535 lockdep_set_class_and_name(&newsk->sk_callback_lock, 1534 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1536 af_callback_keys + newsk->sk_family, 1535 af_callback_keys + newsk->sk_family,
@@ -1607,7 +1606,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1607{ 1606{
1608 u32 max_segs = 1; 1607 u32 max_segs = 1;
1609 1608
1610 __sk_dst_set(sk, dst); 1609 sk_dst_set(sk, dst);
1611 sk->sk_route_caps = dst->dev->features; 1610 sk->sk_route_caps = dst->dev->features;
1612 if (sk->sk_route_caps & NETIF_F_GSO) 1611 if (sk->sk_route_caps & NETIF_F_GSO)
1613 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; 1612 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
@@ -2388,7 +2387,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
2388 } else 2387 } else
2389 sk->sk_wq = NULL; 2388 sk->sk_wq = NULL;
2390 2389
2391 spin_lock_init(&sk->sk_dst_lock);
2392 rwlock_init(&sk->sk_callback_lock); 2390 rwlock_init(&sk->sk_callback_lock);
2393 lockdep_set_class_and_name(&sk->sk_callback_lock, 2391 lockdep_set_class_and_name(&sk->sk_callback_lock,
2394 af_callback_keys + sk->sk_family, 2392 af_callback_keys + sk->sk_family,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index e7e0b9bc2a43..9c6d0508e63a 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -459,7 +459,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
459 * comment in that function for the gory details. -acme 459 * comment in that function for the gory details. -acme
460 */ 460 */
461 461
462 __ip6_dst_store(newsk, dst, NULL, NULL); 462 ip6_dst_store(newsk, dst, NULL, NULL);
463 newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | 463 newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
464 NETIF_F_TSO); 464 NETIF_F_TSO);
465 newdp6 = (struct dccp6_sock *)newsk; 465 newdp6 = (struct dccp6_sock *)newsk;
@@ -883,7 +883,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
883 np->saddr = *saddr; 883 np->saddr = *saddr;
884 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 884 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
885 885
886 __ip6_dst_store(sk, dst, NULL, NULL); 886 ip6_dst_store(sk, dst, NULL, NULL);
887 887
888 icsk->icsk_ext_hdr_len = 0; 888 icsk->icsk_ext_hdr_len = 0;
889 if (opt) 889 if (opt)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 38d66ddfb937..8ec0df75f1c4 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -673,7 +673,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
673 return PTR_ERR(dst); 673 return PTR_ERR(dst);
674 } 674 }
675 675
676 __ip6_dst_store(sk, dst, NULL, NULL); 676 ip6_dst_store(sk, dst, NULL, NULL);
677 } 677 }
678 678
679 return 0; 679 return 0;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 36c5a98b0472..0a37ddc7af51 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -834,11 +834,6 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
834 security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); 834 security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
835} 835}
836 836
837/*
838 * Special lock-class for __icmpv6_sk:
839 */
840static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
841
842static int __net_init icmpv6_sk_init(struct net *net) 837static int __net_init icmpv6_sk_init(struct net *net)
843{ 838{
844 struct sock *sk; 839 struct sock *sk;
@@ -860,15 +855,6 @@ static int __net_init icmpv6_sk_init(struct net *net)
860 855
861 net->ipv6.icmp_sk[i] = sk; 856 net->ipv6.icmp_sk[i] = sk;
862 857
863 /*
864 * Split off their lock-class, because sk->sk_dst_lock
865 * gets used from softirqs, which is safe for
866 * __icmpv6_sk (because those never get directly used
867 * via userspace syscalls), but unsafe for normal sockets.
868 */
869 lockdep_set_class(&sk->sk_dst_lock,
870 &icmpv6_socket_sk_dst_lock_key);
871
872 /* Enough space for 2 64K ICMP packets, including 858 /* Enough space for 2 64K ICMP packets, including
873 * sk_buff struct overhead. 859 * sk_buff struct overhead.
874 */ 860 */
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 3ff5208772bb..a7ca2cde2ecb 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -111,14 +111,6 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
111EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); 111EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
112 112
113static inline 113static inline
114void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
115 const struct in6_addr *daddr,
116 const struct in6_addr *saddr)
117{
118 __ip6_dst_store(sk, dst, daddr, saddr);
119}
120
121static inline
122struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) 114struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
123{ 115{
124 return __sk_dst_check(sk, cookie); 116 return __sk_dst_check(sk, cookie);
@@ -153,7 +145,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
153 dst = ip6_dst_lookup_flow(sk, fl6, final_p); 145 dst = ip6_dst_lookup_flow(sk, fl6, final_p);
154 146
155 if (!IS_ERR(dst)) 147 if (!IS_ERR(dst))
156 __inet6_csk_dst_store(sk, dst, NULL, NULL); 148 ip6_dst_store(sk, dst, NULL, NULL);
157 } 149 }
158 return dst; 150 return dst;
159} 151}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6a50bb4a0dae..e7aab561b7b4 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -257,7 +257,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
257 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 257 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
258 258
259 sk->sk_gso_type = SKB_GSO_TCPV6; 259 sk->sk_gso_type = SKB_GSO_TCPV6;
260 __ip6_dst_store(sk, dst, NULL, NULL); 260 ip6_dst_store(sk, dst, NULL, NULL);
261 261
262 if (tcp_death_row.sysctl_tw_recycle && 262 if (tcp_death_row.sysctl_tw_recycle &&
263 !tp->rx_opt.ts_recent_stamp && 263 !tp->rx_opt.ts_recent_stamp &&
@@ -1060,7 +1060,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
1060 */ 1060 */
1061 1061
1062 newsk->sk_gso_type = SKB_GSO_TCPV6; 1062 newsk->sk_gso_type = SKB_GSO_TCPV6;
1063 __ip6_dst_store(newsk, dst, NULL, NULL); 1063 ip6_dst_store(newsk, dst, NULL, NULL);
1064 inet6_sk_rx_dst_set(newsk, skb); 1064 inet6_sk_rx_dst_set(newsk, skb);
1065 1065
1066 newtcp6sk = (struct tcp6_sock *)newsk; 1066 newtcp6sk = (struct tcp6_sock *)newsk;