diff options
author | Anoob Soman <anoob.soman@citrix.com> | 2017-02-15 15:25:39 -0500 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2017-02-26 05:10:50 -0500 |
commit | 9206efc8a1f74e558e3165218fab136b51be848a (patch) | |
tree | afbc3f530e144fd595f3d94c0bd7a06aae0c0140 /net | |
parent | 722737f27774b14be5a1d2d3b9281dcded7c48b2 (diff) |
packet: Do not call fanout_release from atomic contexts
[ Upstream commit 2bd624b4611ffee36422782d16e1c944d1351e98 ]
Commit 6664498280cf ("packet: call fanout_release, while UNREGISTERING a
netdev"), unfortunately, introduced the following issues.
1. calling mutex_lock(&fanout_mutex) (fanout_release()) from inside
rcu_read-side critical section. rcu_read_lock disables preemption, most often,
which prohibits calling sleeping functions.
[ ] include/linux/rcupdate.h:560 Illegal context switch in RCU read-side critical section!
[ ]
[ ] rcu_scheduler_active = 1, debug_locks = 0
[ ] 4 locks held by ovs-vswitchd/1969:
[ ] #0: (cb_lock){++++++}, at: [<ffffffff8158a6c9>] genl_rcv+0x19/0x40
[ ] #1: (ovs_mutex){+.+.+.}, at: [<ffffffffa04878ca>] ovs_vport_cmd_del+0x4a/0x100 [openvswitch]
[ ] #2: (rtnl_mutex){+.+.+.}, at: [<ffffffff81564157>] rtnl_lock+0x17/0x20
[ ] #3: (rcu_read_lock){......}, at: [<ffffffff81614165>] packet_notifier+0x5/0x3f0
[ ]
[ ] Call Trace:
[ ] [<ffffffff813770c1>] dump_stack+0x85/0xc4
[ ] [<ffffffff810c9077>] lockdep_rcu_suspicious+0x107/0x110
[ ] [<ffffffff810a2da7>] ___might_sleep+0x57/0x210
[ ] [<ffffffff810a2fd0>] __might_sleep+0x70/0x90
[ ] [<ffffffff8162e80c>] mutex_lock_nested+0x3c/0x3a0
[ ] [<ffffffff810de93f>] ? vprintk_default+0x1f/0x30
[ ] [<ffffffff81186e88>] ? printk+0x4d/0x4f
[ ] [<ffffffff816106dd>] fanout_release+0x1d/0xe0
[ ] [<ffffffff81614459>] packet_notifier+0x2f9/0x3f0
2. calling mutex_lock(&fanout_mutex) inside spin_lock(&po->bind_lock).
"sleeping function called from invalid context"
[ ] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620
[ ] in_atomic(): 1, irqs_disabled(): 0, pid: 1969, name: ovs-vswitchd
[ ] INFO: lockdep is turned off.
[ ] Call Trace:
[ ] [<ffffffff813770c1>] dump_stack+0x85/0xc4
[ ] [<ffffffff810a2f52>] ___might_sleep+0x202/0x210
[ ] [<ffffffff810a2fd0>] __might_sleep+0x70/0x90
[ ] [<ffffffff8162e80c>] mutex_lock_nested+0x3c/0x3a0
[ ] [<ffffffff816106dd>] fanout_release+0x1d/0xe0
[ ] [<ffffffff81614459>] packet_notifier+0x2f9/0x3f0
3. calling dev_remove_pack(&fanout->prot_hook), from inside
spin_lock(&po->bind_lock) or rcu_read-side critical-section. dev_remove_pack()
-> synchronize_net(), which might sleep.
[ ] BUG: scheduling while atomic: ovs-vswitchd/1969/0x00000002
[ ] INFO: lockdep is turned off.
[ ] Call Trace:
[ ] [<ffffffff813770c1>] dump_stack+0x85/0xc4
[ ] [<ffffffff81186274>] __schedule_bug+0x64/0x73
[ ] [<ffffffff8162b8cb>] __schedule+0x6b/0xd10
[ ] [<ffffffff8162c5db>] schedule+0x6b/0x80
[ ] [<ffffffff81630b1d>] schedule_timeout+0x38d/0x410
[ ] [<ffffffff810ea3fd>] synchronize_sched_expedited+0x53d/0x810
[ ] [<ffffffff810ea6de>] synchronize_rcu_expedited+0xe/0x10
[ ] [<ffffffff8154eab5>] synchronize_net+0x35/0x50
[ ] [<ffffffff8154eae3>] dev_remove_pack+0x13/0x20
[ ] [<ffffffff8161077e>] fanout_release+0xbe/0xe0
[ ] [<ffffffff81614459>] packet_notifier+0x2f9/0x3f0
4. fanout_release() races with calls from different CPU.
To fix the above problems, remove the call to fanout_release() under
rcu_read_lock(). Instead, call __dev_remove_pack(&fanout->prot_hook) and
netdev_run_todo will be happy that &dev->ptype_specific list is empty. In order
to achieve this, I moved dev_{add,remove}_pack() out of fanout_{add,release} to
__fanout_{link,unlink}. So, call to {,__}unregister_prot_hook() will make sure
fanout->prot_hook is removed as well.
Fixes: 6664498280cf ("packet: call fanout_release, while UNREGISTERING a netdev")
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Anoob Soman <anoob.soman@citrix.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'net')
-rw-r--r-- | net/packet/af_packet.c | 31 |
1 files changed, 22 insertions, 9 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 87113e86b328..34de326b4f09 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -1497,6 +1497,8 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po) | |||
1497 | f->arr[f->num_members] = sk; | 1497 | f->arr[f->num_members] = sk; |
1498 | smp_wmb(); | 1498 | smp_wmb(); |
1499 | f->num_members++; | 1499 | f->num_members++; |
1500 | if (f->num_members == 1) | ||
1501 | dev_add_pack(&f->prot_hook); | ||
1500 | spin_unlock(&f->lock); | 1502 | spin_unlock(&f->lock); |
1501 | } | 1503 | } |
1502 | 1504 | ||
@@ -1513,6 +1515,8 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) | |||
1513 | BUG_ON(i >= f->num_members); | 1515 | BUG_ON(i >= f->num_members); |
1514 | f->arr[i] = f->arr[f->num_members - 1]; | 1516 | f->arr[i] = f->arr[f->num_members - 1]; |
1515 | f->num_members--; | 1517 | f->num_members--; |
1518 | if (f->num_members == 0) | ||
1519 | __dev_remove_pack(&f->prot_hook); | ||
1516 | spin_unlock(&f->lock); | 1520 | spin_unlock(&f->lock); |
1517 | } | 1521 | } |
1518 | 1522 | ||
@@ -1693,7 +1697,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) | |||
1693 | match->prot_hook.func = packet_rcv_fanout; | 1697 | match->prot_hook.func = packet_rcv_fanout; |
1694 | match->prot_hook.af_packet_priv = match; | 1698 | match->prot_hook.af_packet_priv = match; |
1695 | match->prot_hook.id_match = match_fanout_group; | 1699 | match->prot_hook.id_match = match_fanout_group; |
1696 | dev_add_pack(&match->prot_hook); | ||
1697 | list_add(&match->list, &fanout_list); | 1700 | list_add(&match->list, &fanout_list); |
1698 | } | 1701 | } |
1699 | err = -EINVAL; | 1702 | err = -EINVAL; |
@@ -1718,7 +1721,12 @@ out: | |||
1718 | return err; | 1721 | return err; |
1719 | } | 1722 | } |
1720 | 1723 | ||
1721 | static void fanout_release(struct sock *sk) | 1724 | /* If pkt_sk(sk)->fanout->sk_ref is zero, this function removes |
1725 | * pkt_sk(sk)->fanout from fanout_list and returns pkt_sk(sk)->fanout. | ||
1726 | * It is the responsibility of the caller to call fanout_release_data() and | ||
1727 | * free the returned packet_fanout (after synchronize_net()) | ||
1728 | */ | ||
1729 | static struct packet_fanout *fanout_release(struct sock *sk) | ||
1722 | { | 1730 | { |
1723 | struct packet_sock *po = pkt_sk(sk); | 1731 | struct packet_sock *po = pkt_sk(sk); |
1724 | struct packet_fanout *f; | 1732 | struct packet_fanout *f; |
@@ -1728,17 +1736,17 @@ static void fanout_release(struct sock *sk) | |||
1728 | if (f) { | 1736 | if (f) { |
1729 | po->fanout = NULL; | 1737 | po->fanout = NULL; |
1730 | 1738 | ||
1731 | if (atomic_dec_and_test(&f->sk_ref)) { | 1739 | if (atomic_dec_and_test(&f->sk_ref)) |
1732 | list_del(&f->list); | 1740 | list_del(&f->list); |
1733 | dev_remove_pack(&f->prot_hook); | 1741 | else |
1734 | fanout_release_data(f); | 1742 | f = NULL; |
1735 | kfree(f); | ||
1736 | } | ||
1737 | 1743 | ||
1738 | if (po->rollover) | 1744 | if (po->rollover) |
1739 | kfree_rcu(po->rollover, rcu); | 1745 | kfree_rcu(po->rollover, rcu); |
1740 | } | 1746 | } |
1741 | mutex_unlock(&fanout_mutex); | 1747 | mutex_unlock(&fanout_mutex); |
1748 | |||
1749 | return f; | ||
1742 | } | 1750 | } |
1743 | 1751 | ||
1744 | static bool packet_extra_vlan_len_allowed(const struct net_device *dev, | 1752 | static bool packet_extra_vlan_len_allowed(const struct net_device *dev, |
@@ -2970,6 +2978,7 @@ static int packet_release(struct socket *sock) | |||
2970 | { | 2978 | { |
2971 | struct sock *sk = sock->sk; | 2979 | struct sock *sk = sock->sk; |
2972 | struct packet_sock *po; | 2980 | struct packet_sock *po; |
2981 | struct packet_fanout *f; | ||
2973 | struct net *net; | 2982 | struct net *net; |
2974 | union tpacket_req_u req_u; | 2983 | union tpacket_req_u req_u; |
2975 | 2984 | ||
@@ -3009,9 +3018,14 @@ static int packet_release(struct socket *sock) | |||
3009 | packet_set_ring(sk, &req_u, 1, 1); | 3018 | packet_set_ring(sk, &req_u, 1, 1); |
3010 | } | 3019 | } |
3011 | 3020 | ||
3012 | fanout_release(sk); | 3021 | f = fanout_release(sk); |
3013 | 3022 | ||
3014 | synchronize_net(); | 3023 | synchronize_net(); |
3024 | |||
3025 | if (f) { | ||
3026 | fanout_release_data(f); | ||
3027 | kfree(f); | ||
3028 | } | ||
3015 | /* | 3029 | /* |
3016 | * Now the socket is dead. No more input will appear. | 3030 | * Now the socket is dead. No more input will appear. |
3017 | */ | 3031 | */ |
@@ -3963,7 +3977,6 @@ static int packet_notifier(struct notifier_block *this, | |||
3963 | } | 3977 | } |
3964 | if (msg == NETDEV_UNREGISTER) { | 3978 | if (msg == NETDEV_UNREGISTER) { |
3965 | packet_cached_dev_reset(po); | 3979 | packet_cached_dev_reset(po); |
3966 | fanout_release(sk); | ||
3967 | po->ifindex = -1; | 3980 | po->ifindex = -1; |
3968 | if (po->prot_hook.dev) | 3981 | if (po->prot_hook.dev) |
3969 | dev_put(po->prot_hook.dev); | 3982 | dev_put(po->prot_hook.dev); |