diff options
author | Francesco Ruggeri <fruggeri@aristanetworks.com> | 2015-11-05 11:16:14 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-11-05 14:48:42 -0500 |
commit | 30f7ea1c2b5f5fb7462c5ae44fe2e40cb2d6a474 (patch) | |
tree | 47da4e1bb16c1115e634744ebd4b7a1659e47324 | |
parent | f668f5f7e0861087ef9d64d473a9c1399fc25471 (diff) |
packet: race condition in packet_bind
There is a race conditions between packet_notifier and packet_bind{_spkt}.
It happens if packet_notifier(NETDEV_UNREGISTER) executes between the
time packet_bind{_spkt} takes a reference on the new netdevice and the
time packet_do_bind sets po->ifindex.
In this case the notification can be missed.
If this happens during a dev_change_net_namespace this can result in the
netdevice to be moved to the new namespace while the packet_sock in the
old namespace still holds a reference on it. When the netdevice is later
deleted in the new namespace the deletion hangs since the packet_sock
is not found in the new namespace' &net->packet.sklist.
It can be reproduced with the script below.
This patch makes packet_do_bind check again for the presence of the
netdevice in the packet_sock's namespace after the synchronize_net
in unregister_prot_hook.
More in general it also uses the rcu lock for the duration of the bind
to stop dev_change_net_namespace/rollback_registered_many from
going past the synchronize_net following unlist_netdevice, so that
no NETDEV_UNREGISTER notifications can happen on the new netdevice
while the bind is executing. In order to do this some code from
packet_bind{_spkt} is consolidated into packet_do_dev.
import socket, os, time, sys
proto=7
realDev='em1'
vlanId=400
if len(sys.argv) > 1:
vlanId=int(sys.argv[1])
dev='vlan%d' % vlanId
os.system('taskset -p 0x10 %d' % os.getpid())
s = socket.socket(socket.PF_PACKET, socket.SOCK_RAW, proto)
os.system('ip link add link %s name %s type vlan id %d' %
(realDev, dev, vlanId))
os.system('ip netns add dummy')
pid=os.fork()
if pid == 0:
# dev should be moved while packet_do_bind is in synchronize net
os.system('taskset -p 0x20000 %d' % os.getpid())
os.system('ip link set %s netns dummy' % dev)
os.system('ip netns exec dummy ip link del %s' % dev)
s.close()
sys.exit(0)
time.sleep(.004)
try:
s.bind(('%s' % dev, proto+1))
except:
print 'Could not bind socket'
s.close()
os.system('ip netns del dummy')
sys.exit(0)
os.waitpid(pid, 0)
s.close()
os.system('ip netns del dummy')
sys.exit(0)
Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/packet/af_packet.c | 80 |
1 files changed, 49 insertions, 31 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 691660b9b7ef..af399cac5205 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -2911,22 +2911,40 @@ static int packet_release(struct socket *sock) | |||
2911 | * Attach a packet hook. | 2911 | * Attach a packet hook. |
2912 | */ | 2912 | */ |
2913 | 2913 | ||
2914 | static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) | 2914 | static int packet_do_bind(struct sock *sk, const char *name, int ifindex, |
2915 | __be16 proto) | ||
2915 | { | 2916 | { |
2916 | struct packet_sock *po = pkt_sk(sk); | 2917 | struct packet_sock *po = pkt_sk(sk); |
2917 | struct net_device *dev_curr; | 2918 | struct net_device *dev_curr; |
2918 | __be16 proto_curr; | 2919 | __be16 proto_curr; |
2919 | bool need_rehook; | 2920 | bool need_rehook; |
2921 | struct net_device *dev = NULL; | ||
2922 | int ret = 0; | ||
2923 | bool unlisted = false; | ||
2920 | 2924 | ||
2921 | if (po->fanout) { | 2925 | if (po->fanout) |
2922 | if (dev) | ||
2923 | dev_put(dev); | ||
2924 | |||
2925 | return -EINVAL; | 2926 | return -EINVAL; |
2926 | } | ||
2927 | 2927 | ||
2928 | lock_sock(sk); | 2928 | lock_sock(sk); |
2929 | spin_lock(&po->bind_lock); | 2929 | spin_lock(&po->bind_lock); |
2930 | rcu_read_lock(); | ||
2931 | |||
2932 | if (name) { | ||
2933 | dev = dev_get_by_name_rcu(sock_net(sk), name); | ||
2934 | if (!dev) { | ||
2935 | ret = -ENODEV; | ||
2936 | goto out_unlock; | ||
2937 | } | ||
2938 | } else if (ifindex) { | ||
2939 | dev = dev_get_by_index_rcu(sock_net(sk), ifindex); | ||
2940 | if (!dev) { | ||
2941 | ret = -ENODEV; | ||
2942 | goto out_unlock; | ||
2943 | } | ||
2944 | } | ||
2945 | |||
2946 | if (dev) | ||
2947 | dev_hold(dev); | ||
2930 | 2948 | ||
2931 | proto_curr = po->prot_hook.type; | 2949 | proto_curr = po->prot_hook.type; |
2932 | dev_curr = po->prot_hook.dev; | 2950 | dev_curr = po->prot_hook.dev; |
@@ -2934,14 +2952,29 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) | |||
2934 | need_rehook = proto_curr != proto || dev_curr != dev; | 2952 | need_rehook = proto_curr != proto || dev_curr != dev; |
2935 | 2953 | ||
2936 | if (need_rehook) { | 2954 | if (need_rehook) { |
2937 | unregister_prot_hook(sk, true); | 2955 | if (po->running) { |
2956 | rcu_read_unlock(); | ||
2957 | __unregister_prot_hook(sk, true); | ||
2958 | rcu_read_lock(); | ||
2959 | dev_curr = po->prot_hook.dev; | ||
2960 | if (dev) | ||
2961 | unlisted = !dev_get_by_index_rcu(sock_net(sk), | ||
2962 | dev->ifindex); | ||
2963 | } | ||
2938 | 2964 | ||
2939 | po->num = proto; | 2965 | po->num = proto; |
2940 | po->prot_hook.type = proto; | 2966 | po->prot_hook.type = proto; |
2941 | po->prot_hook.dev = dev; | ||
2942 | 2967 | ||
2943 | po->ifindex = dev ? dev->ifindex : 0; | 2968 | if (unlikely(unlisted)) { |
2944 | packet_cached_dev_assign(po, dev); | 2969 | dev_put(dev); |
2970 | po->prot_hook.dev = NULL; | ||
2971 | po->ifindex = -1; | ||
2972 | packet_cached_dev_reset(po); | ||
2973 | } else { | ||
2974 | po->prot_hook.dev = dev; | ||
2975 | po->ifindex = dev ? dev->ifindex : 0; | ||
2976 | packet_cached_dev_assign(po, dev); | ||
2977 | } | ||
2945 | } | 2978 | } |
2946 | if (dev_curr) | 2979 | if (dev_curr) |
2947 | dev_put(dev_curr); | 2980 | dev_put(dev_curr); |
@@ -2949,7 +2982,7 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) | |||
2949 | if (proto == 0 || !need_rehook) | 2982 | if (proto == 0 || !need_rehook) |
2950 | goto out_unlock; | 2983 | goto out_unlock; |
2951 | 2984 | ||
2952 | if (!dev || (dev->flags & IFF_UP)) { | 2985 | if (!unlisted && (!dev || (dev->flags & IFF_UP))) { |
2953 | register_prot_hook(sk); | 2986 | register_prot_hook(sk); |
2954 | } else { | 2987 | } else { |
2955 | sk->sk_err = ENETDOWN; | 2988 | sk->sk_err = ENETDOWN; |
@@ -2958,9 +2991,10 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) | |||
2958 | } | 2991 | } |
2959 | 2992 | ||
2960 | out_unlock: | 2993 | out_unlock: |
2994 | rcu_read_unlock(); | ||
2961 | spin_unlock(&po->bind_lock); | 2995 | spin_unlock(&po->bind_lock); |
2962 | release_sock(sk); | 2996 | release_sock(sk); |
2963 | return 0; | 2997 | return ret; |
2964 | } | 2998 | } |
2965 | 2999 | ||
2966 | /* | 3000 | /* |
@@ -2972,8 +3006,6 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, | |||
2972 | { | 3006 | { |
2973 | struct sock *sk = sock->sk; | 3007 | struct sock *sk = sock->sk; |
2974 | char name[15]; | 3008 | char name[15]; |
2975 | struct net_device *dev; | ||
2976 | int err = -ENODEV; | ||
2977 | 3009 | ||
2978 | /* | 3010 | /* |
2979 | * Check legality | 3011 | * Check legality |
@@ -2983,19 +3015,13 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, | |||
2983 | return -EINVAL; | 3015 | return -EINVAL; |
2984 | strlcpy(name, uaddr->sa_data, sizeof(name)); | 3016 | strlcpy(name, uaddr->sa_data, sizeof(name)); |
2985 | 3017 | ||
2986 | dev = dev_get_by_name(sock_net(sk), name); | 3018 | return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); |
2987 | if (dev) | ||
2988 | err = packet_do_bind(sk, dev, pkt_sk(sk)->num); | ||
2989 | return err; | ||
2990 | } | 3019 | } |
2991 | 3020 | ||
2992 | static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | 3021 | static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) |
2993 | { | 3022 | { |
2994 | struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr; | 3023 | struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr; |
2995 | struct sock *sk = sock->sk; | 3024 | struct sock *sk = sock->sk; |
2996 | struct net_device *dev = NULL; | ||
2997 | int err; | ||
2998 | |||
2999 | 3025 | ||
3000 | /* | 3026 | /* |
3001 | * Check legality | 3027 | * Check legality |
@@ -3006,16 +3032,8 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len | |||
3006 | if (sll->sll_family != AF_PACKET) | 3032 | if (sll->sll_family != AF_PACKET) |
3007 | return -EINVAL; | 3033 | return -EINVAL; |
3008 | 3034 | ||
3009 | if (sll->sll_ifindex) { | 3035 | return packet_do_bind(sk, NULL, sll->sll_ifindex, |
3010 | err = -ENODEV; | 3036 | sll->sll_protocol ? : pkt_sk(sk)->num); |
3011 | dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex); | ||
3012 | if (dev == NULL) | ||
3013 | goto out; | ||
3014 | } | ||
3015 | err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num); | ||
3016 | |||
3017 | out: | ||
3018 | return err; | ||
3019 | } | 3037 | } |
3020 | 3038 | ||
3021 | static struct proto packet_proto = { | 3039 | static struct proto packet_proto = { |