diff options
author | Daniel Borkmann <dborkman@redhat.com> | 2013-11-21 10:50:58 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-11-21 13:09:43 -0500 |
commit | e40526cb20b5ee53419452e1f03d97092f144418 (patch) | |
tree | 6e78af9f31887cc85e3137ea11437bbde78515c4 /net | |
parent | db739ef37f07a1a12e388dbaec225d9d9d5d6ded (diff) |
packet: fix use after free race in send path when dev is released
Salam reported a use after free bug in PF_PACKET that occurs when
we're sending out frames on a socket bound device and suddenly the
net device is being unregistered. It appears that commit 827d9780
introduced a possible race condition between {t,}packet_snd() and
packet_notifier(). In the case of a bound socket, packet_notifier()
can drop the last reference to the net_device and {t,}packet_snd()
might end up suddenly sending a packet over a freed net_device.
To avoid reverting 827d9780 and thus introducing a performance
regression compared to the current state of things, we decided to
hold a cached RCU protected pointer to the net device and maintain
it on write side via bind spin_lock protected register_prot_hook()
and __unregister_prot_hook() calls.
In {t,}packet_snd() path, we access this pointer under rcu_read_lock
through packet_cached_dev_get() that holds reference to the device
to prevent it from being freed through packet_notifier() while
we're in send path. This is okay to do as dev_put()/dev_hold() are
per-cpu counters, so this should not be a performance issue. Also,
the code simplifies a bit as we don't need need_rls_dev anymore.
Fixes: 827d978037d7 ("af-packet: Use existing netdev reference for bound sockets.")
Reported-by: Salam Noureddine <noureddine@aristanetworks.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Salam Noureddine <noureddine@aristanetworks.com>
Cc: Ben Greear <greearb@candelatech.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/packet/af_packet.c | 59 | ||||
-rw-r--r-- | net/packet/internal.h | 1 |
2 files changed, 37 insertions, 23 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 61bd50adead1..ac27c86ef6d1 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -244,11 +244,15 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); | |||
244 | static void register_prot_hook(struct sock *sk) | 244 | static void register_prot_hook(struct sock *sk) |
245 | { | 245 | { |
246 | struct packet_sock *po = pkt_sk(sk); | 246 | struct packet_sock *po = pkt_sk(sk); |
247 | |||
247 | if (!po->running) { | 248 | if (!po->running) { |
248 | if (po->fanout) | 249 | if (po->fanout) { |
249 | __fanout_link(sk, po); | 250 | __fanout_link(sk, po); |
250 | else | 251 | } else { |
251 | dev_add_pack(&po->prot_hook); | 252 | dev_add_pack(&po->prot_hook); |
253 | rcu_assign_pointer(po->cached_dev, po->prot_hook.dev); | ||
254 | } | ||
255 | |||
252 | sock_hold(sk); | 256 | sock_hold(sk); |
253 | po->running = 1; | 257 | po->running = 1; |
254 | } | 258 | } |
@@ -266,10 +270,13 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) | |||
266 | struct packet_sock *po = pkt_sk(sk); | 270 | struct packet_sock *po = pkt_sk(sk); |
267 | 271 | ||
268 | po->running = 0; | 272 | po->running = 0; |
269 | if (po->fanout) | 273 | if (po->fanout) { |
270 | __fanout_unlink(sk, po); | 274 | __fanout_unlink(sk, po); |
271 | else | 275 | } else { |
272 | __dev_remove_pack(&po->prot_hook); | 276 | __dev_remove_pack(&po->prot_hook); |
277 | RCU_INIT_POINTER(po->cached_dev, NULL); | ||
278 | } | ||
279 | |||
273 | __sock_put(sk); | 280 | __sock_put(sk); |
274 | 281 | ||
275 | if (sync) { | 282 | if (sync) { |
@@ -2052,12 +2059,24 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, | |||
2052 | return tp_len; | 2059 | return tp_len; |
2053 | } | 2060 | } |
2054 | 2061 | ||
2062 | static struct net_device *packet_cached_dev_get(struct packet_sock *po) | ||
2063 | { | ||
2064 | struct net_device *dev; | ||
2065 | |||
2066 | rcu_read_lock(); | ||
2067 | dev = rcu_dereference(po->cached_dev); | ||
2068 | if (dev) | ||
2069 | dev_hold(dev); | ||
2070 | rcu_read_unlock(); | ||
2071 | |||
2072 | return dev; | ||
2073 | } | ||
2074 | |||
2055 | static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) | 2075 | static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) |
2056 | { | 2076 | { |
2057 | struct sk_buff *skb; | 2077 | struct sk_buff *skb; |
2058 | struct net_device *dev; | 2078 | struct net_device *dev; |
2059 | __be16 proto; | 2079 | __be16 proto; |
2060 | bool need_rls_dev = false; | ||
2061 | int err, reserve = 0; | 2080 | int err, reserve = 0; |
2062 | void *ph; | 2081 | void *ph; |
2063 | struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; | 2082 | struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; |
@@ -2070,7 +2089,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) | |||
2070 | mutex_lock(&po->pg_vec_lock); | 2089 | mutex_lock(&po->pg_vec_lock); |
2071 | 2090 | ||
2072 | if (saddr == NULL) { | 2091 | if (saddr == NULL) { |
2073 | dev = po->prot_hook.dev; | 2092 | dev = packet_cached_dev_get(po); |
2074 | proto = po->num; | 2093 | proto = po->num; |
2075 | addr = NULL; | 2094 | addr = NULL; |
2076 | } else { | 2095 | } else { |
@@ -2084,19 +2103,17 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) | |||
2084 | proto = saddr->sll_protocol; | 2103 | proto = saddr->sll_protocol; |
2085 | addr = saddr->sll_addr; | 2104 | addr = saddr->sll_addr; |
2086 | dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); | 2105 | dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); |
2087 | need_rls_dev = true; | ||
2088 | } | 2106 | } |
2089 | 2107 | ||
2090 | err = -ENXIO; | 2108 | err = -ENXIO; |
2091 | if (unlikely(dev == NULL)) | 2109 | if (unlikely(dev == NULL)) |
2092 | goto out; | 2110 | goto out; |
2093 | |||
2094 | reserve = dev->hard_header_len; | ||
2095 | |||
2096 | err = -ENETDOWN; | 2111 | err = -ENETDOWN; |
2097 | if (unlikely(!(dev->flags & IFF_UP))) | 2112 | if (unlikely(!(dev->flags & IFF_UP))) |
2098 | goto out_put; | 2113 | goto out_put; |
2099 | 2114 | ||
2115 | reserve = dev->hard_header_len; | ||
2116 | |||
2100 | size_max = po->tx_ring.frame_size | 2117 | size_max = po->tx_ring.frame_size |
2101 | - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); | 2118 | - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); |
2102 | 2119 | ||
@@ -2173,8 +2190,7 @@ out_status: | |||
2173 | __packet_set_status(po, ph, status); | 2190 | __packet_set_status(po, ph, status); |
2174 | kfree_skb(skb); | 2191 | kfree_skb(skb); |
2175 | out_put: | 2192 | out_put: |
2176 | if (need_rls_dev) | 2193 | dev_put(dev); |
2177 | dev_put(dev); | ||
2178 | out: | 2194 | out: |
2179 | mutex_unlock(&po->pg_vec_lock); | 2195 | mutex_unlock(&po->pg_vec_lock); |
2180 | return err; | 2196 | return err; |
@@ -2212,7 +2228,6 @@ static int packet_snd(struct socket *sock, | |||
2212 | struct sk_buff *skb; | 2228 | struct sk_buff *skb; |
2213 | struct net_device *dev; | 2229 | struct net_device *dev; |
2214 | __be16 proto; | 2230 | __be16 proto; |
2215 | bool need_rls_dev = false; | ||
2216 | unsigned char *addr; | 2231 | unsigned char *addr; |
2217 | int err, reserve = 0; | 2232 | int err, reserve = 0; |
2218 | struct virtio_net_hdr vnet_hdr = { 0 }; | 2233 | struct virtio_net_hdr vnet_hdr = { 0 }; |
@@ -2228,7 +2243,7 @@ static int packet_snd(struct socket *sock, | |||
2228 | */ | 2243 | */ |
2229 | 2244 | ||
2230 | if (saddr == NULL) { | 2245 | if (saddr == NULL) { |
2231 | dev = po->prot_hook.dev; | 2246 | dev = packet_cached_dev_get(po); |
2232 | proto = po->num; | 2247 | proto = po->num; |
2233 | addr = NULL; | 2248 | addr = NULL; |
2234 | } else { | 2249 | } else { |
@@ -2240,19 +2255,17 @@ static int packet_snd(struct socket *sock, | |||
2240 | proto = saddr->sll_protocol; | 2255 | proto = saddr->sll_protocol; |
2241 | addr = saddr->sll_addr; | 2256 | addr = saddr->sll_addr; |
2242 | dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); | 2257 | dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); |
2243 | need_rls_dev = true; | ||
2244 | } | 2258 | } |
2245 | 2259 | ||
2246 | err = -ENXIO; | 2260 | err = -ENXIO; |
2247 | if (dev == NULL) | 2261 | if (unlikely(dev == NULL)) |
2248 | goto out_unlock; | 2262 | goto out_unlock; |
2249 | if (sock->type == SOCK_RAW) | ||
2250 | reserve = dev->hard_header_len; | ||
2251 | |||
2252 | err = -ENETDOWN; | 2263 | err = -ENETDOWN; |
2253 | if (!(dev->flags & IFF_UP)) | 2264 | if (unlikely(!(dev->flags & IFF_UP))) |
2254 | goto out_unlock; | 2265 | goto out_unlock; |
2255 | 2266 | ||
2267 | if (sock->type == SOCK_RAW) | ||
2268 | reserve = dev->hard_header_len; | ||
2256 | if (po->has_vnet_hdr) { | 2269 | if (po->has_vnet_hdr) { |
2257 | vnet_hdr_len = sizeof(vnet_hdr); | 2270 | vnet_hdr_len = sizeof(vnet_hdr); |
2258 | 2271 | ||
@@ -2386,15 +2399,14 @@ static int packet_snd(struct socket *sock, | |||
2386 | if (err > 0 && (err = net_xmit_errno(err)) != 0) | 2399 | if (err > 0 && (err = net_xmit_errno(err)) != 0) |
2387 | goto out_unlock; | 2400 | goto out_unlock; |
2388 | 2401 | ||
2389 | if (need_rls_dev) | 2402 | dev_put(dev); |
2390 | dev_put(dev); | ||
2391 | 2403 | ||
2392 | return len; | 2404 | return len; |
2393 | 2405 | ||
2394 | out_free: | 2406 | out_free: |
2395 | kfree_skb(skb); | 2407 | kfree_skb(skb); |
2396 | out_unlock: | 2408 | out_unlock: |
2397 | if (dev && need_rls_dev) | 2409 | if (dev) |
2398 | dev_put(dev); | 2410 | dev_put(dev); |
2399 | out: | 2411 | out: |
2400 | return err; | 2412 | return err; |
@@ -2614,6 +2626,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, | |||
2614 | po = pkt_sk(sk); | 2626 | po = pkt_sk(sk); |
2615 | sk->sk_family = PF_PACKET; | 2627 | sk->sk_family = PF_PACKET; |
2616 | po->num = proto; | 2628 | po->num = proto; |
2629 | RCU_INIT_POINTER(po->cached_dev, NULL); | ||
2617 | 2630 | ||
2618 | sk->sk_destruct = packet_sock_destruct; | 2631 | sk->sk_destruct = packet_sock_destruct; |
2619 | sk_refcnt_debug_inc(sk); | 2632 | sk_refcnt_debug_inc(sk); |
diff --git a/net/packet/internal.h b/net/packet/internal.h index c4e4b4561207..1035fa2d909c 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h | |||
@@ -113,6 +113,7 @@ struct packet_sock { | |||
113 | unsigned int tp_loss:1; | 113 | unsigned int tp_loss:1; |
114 | unsigned int tp_tx_has_off:1; | 114 | unsigned int tp_tx_has_off:1; |
115 | unsigned int tp_tstamp; | 115 | unsigned int tp_tstamp; |
116 | struct net_device __rcu *cached_dev; | ||
116 | struct packet_type prot_hook ____cacheline_aligned_in_smp; | 117 | struct packet_type prot_hook ____cacheline_aligned_in_smp; |
117 | }; | 118 | }; |
118 | 119 | ||