diff options
| author | Anoob Soman <anoob.soman@citrix.com> | 2017-02-15 15:25:39 -0500 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2017-02-17 11:11:35 -0500 |
| commit | 2bd624b4611ffee36422782d16e1c944d1351e98 (patch) | |
| tree | ade2570717b0ec982a6a9f11ff84e8f2da63cf14 /net/packet | |
| parent | 4695daefba8df8a11fa0b0edd595eedae9ea59ae (diff) | |
packet: Do not call fanout_release from atomic contexts
Commit 6664498280cf ("packet: call fanout_release, while UNREGISTERING a
netdev"), unfortunately, introduced the following issues.
1. calling mutex_lock(&fanout_mutex) (fanout_release()) from inside
rcu_read-side critical section. rcu_read_lock disables preemption, most often,
which prohibits calling sleeping functions.
[ ] include/linux/rcupdate.h:560 Illegal context switch in RCU read-side critical section!
[ ]
[ ] rcu_scheduler_active = 1, debug_locks = 0
[ ] 4 locks held by ovs-vswitchd/1969:
[ ] #0: (cb_lock){++++++}, at: [<ffffffff8158a6c9>] genl_rcv+0x19/0x40
[ ] #1: (ovs_mutex){+.+.+.}, at: [<ffffffffa04878ca>] ovs_vport_cmd_del+0x4a/0x100 [openvswitch]
[ ] #2: (rtnl_mutex){+.+.+.}, at: [<ffffffff81564157>] rtnl_lock+0x17/0x20
[ ] #3: (rcu_read_lock){......}, at: [<ffffffff81614165>] packet_notifier+0x5/0x3f0
[ ]
[ ] Call Trace:
[ ] [<ffffffff813770c1>] dump_stack+0x85/0xc4
[ ] [<ffffffff810c9077>] lockdep_rcu_suspicious+0x107/0x110
[ ] [<ffffffff810a2da7>] ___might_sleep+0x57/0x210
[ ] [<ffffffff810a2fd0>] __might_sleep+0x70/0x90
[ ] [<ffffffff8162e80c>] mutex_lock_nested+0x3c/0x3a0
[ ] [<ffffffff810de93f>] ? vprintk_default+0x1f/0x30
[ ] [<ffffffff81186e88>] ? printk+0x4d/0x4f
[ ] [<ffffffff816106dd>] fanout_release+0x1d/0xe0
[ ] [<ffffffff81614459>] packet_notifier+0x2f9/0x3f0
2. calling mutex_lock(&fanout_mutex) inside spin_lock(&po->bind_lock).
"sleeping function called from invalid context"
[ ] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620
[ ] in_atomic(): 1, irqs_disabled(): 0, pid: 1969, name: ovs-vswitchd
[ ] INFO: lockdep is turned off.
[ ] Call Trace:
[ ] [<ffffffff813770c1>] dump_stack+0x85/0xc4
[ ] [<ffffffff810a2f52>] ___might_sleep+0x202/0x210
[ ] [<ffffffff810a2fd0>] __might_sleep+0x70/0x90
[ ] [<ffffffff8162e80c>] mutex_lock_nested+0x3c/0x3a0
[ ] [<ffffffff816106dd>] fanout_release+0x1d/0xe0
[ ] [<ffffffff81614459>] packet_notifier+0x2f9/0x3f0
3. calling dev_remove_pack(&fanout->prot_hook), from inside
spin_lock(&po->bind_lock) or rcu_read-side critical-section. dev_remove_pack()
-> synchronize_net(), which might sleep.
[ ] BUG: scheduling while atomic: ovs-vswitchd/1969/0x00000002
[ ] INFO: lockdep is turned off.
[ ] Call Trace:
[ ] [<ffffffff813770c1>] dump_stack+0x85/0xc4
[ ] [<ffffffff81186274>] __schedule_bug+0x64/0x73
[ ] [<ffffffff8162b8cb>] __schedule+0x6b/0xd10
[ ] [<ffffffff8162c5db>] schedule+0x6b/0x80
[ ] [<ffffffff81630b1d>] schedule_timeout+0x38d/0x410
[ ] [<ffffffff810ea3fd>] synchronize_sched_expedited+0x53d/0x810
[ ] [<ffffffff810ea6de>] synchronize_rcu_expedited+0xe/0x10
[ ] [<ffffffff8154eab5>] synchronize_net+0x35/0x50
[ ] [<ffffffff8154eae3>] dev_remove_pack+0x13/0x20
[ ] [<ffffffff8161077e>] fanout_release+0xbe/0xe0
[ ] [<ffffffff81614459>] packet_notifier+0x2f9/0x3f0
4. fanout_release() races with calls from different CPU.
To fix the above problems, remove the call to fanout_release() under
rcu_read_lock(). Instead, call __dev_remove_pack(&fanout->prot_hook) and
netdev_run_todo will be happy that &dev->ptype_specific list is empty. In order
to achieve this, I moved dev_{add,remove}_pack() out of fanout_{add,release} to
__fanout_{link,unlink}. So, call to {,__}unregister_prot_hook() will make sure
fanout->prot_hook is removed as well.
Fixes: 6664498280cf ("packet: call fanout_release, while UNREGISTERING a netdev")
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Anoob Soman <anoob.soman@citrix.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/packet')
| -rw-r--r-- | net/packet/af_packet.c | 31 |
1 files changed, 22 insertions, 9 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 0f03f6a53b4d..70f5b6a4683c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
| @@ -1497,6 +1497,8 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po) | |||
| 1497 | f->arr[f->num_members] = sk; | 1497 | f->arr[f->num_members] = sk; |
| 1498 | smp_wmb(); | 1498 | smp_wmb(); |
| 1499 | f->num_members++; | 1499 | f->num_members++; |
| 1500 | if (f->num_members == 1) | ||
| 1501 | dev_add_pack(&f->prot_hook); | ||
| 1500 | spin_unlock(&f->lock); | 1502 | spin_unlock(&f->lock); |
| 1501 | } | 1503 | } |
| 1502 | 1504 | ||
| @@ -1513,6 +1515,8 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) | |||
| 1513 | BUG_ON(i >= f->num_members); | 1515 | BUG_ON(i >= f->num_members); |
| 1514 | f->arr[i] = f->arr[f->num_members - 1]; | 1516 | f->arr[i] = f->arr[f->num_members - 1]; |
| 1515 | f->num_members--; | 1517 | f->num_members--; |
| 1518 | if (f->num_members == 0) | ||
| 1519 | __dev_remove_pack(&f->prot_hook); | ||
| 1516 | spin_unlock(&f->lock); | 1520 | spin_unlock(&f->lock); |
| 1517 | } | 1521 | } |
| 1518 | 1522 | ||
| @@ -1693,7 +1697,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) | |||
| 1693 | match->prot_hook.func = packet_rcv_fanout; | 1697 | match->prot_hook.func = packet_rcv_fanout; |
| 1694 | match->prot_hook.af_packet_priv = match; | 1698 | match->prot_hook.af_packet_priv = match; |
| 1695 | match->prot_hook.id_match = match_fanout_group; | 1699 | match->prot_hook.id_match = match_fanout_group; |
| 1696 | dev_add_pack(&match->prot_hook); | ||
| 1697 | list_add(&match->list, &fanout_list); | 1700 | list_add(&match->list, &fanout_list); |
| 1698 | } | 1701 | } |
| 1699 | err = -EINVAL; | 1702 | err = -EINVAL; |
| @@ -1718,7 +1721,12 @@ out: | |||
| 1718 | return err; | 1721 | return err; |
| 1719 | } | 1722 | } |
| 1720 | 1723 | ||
| 1721 | static void fanout_release(struct sock *sk) | 1724 | /* If pkt_sk(sk)->fanout->sk_ref is zero, this function removes |
| 1725 | * pkt_sk(sk)->fanout from fanout_list and returns pkt_sk(sk)->fanout. | ||
| 1726 | * It is the responsibility of the caller to call fanout_release_data() and | ||
| 1727 | * free the returned packet_fanout (after synchronize_net()) | ||
| 1728 | */ | ||
| 1729 | static struct packet_fanout *fanout_release(struct sock *sk) | ||
| 1722 | { | 1730 | { |
| 1723 | struct packet_sock *po = pkt_sk(sk); | 1731 | struct packet_sock *po = pkt_sk(sk); |
| 1724 | struct packet_fanout *f; | 1732 | struct packet_fanout *f; |
| @@ -1728,17 +1736,17 @@ static void fanout_release(struct sock *sk) | |||
| 1728 | if (f) { | 1736 | if (f) { |
| 1729 | po->fanout = NULL; | 1737 | po->fanout = NULL; |
| 1730 | 1738 | ||
| 1731 | if (atomic_dec_and_test(&f->sk_ref)) { | 1739 | if (atomic_dec_and_test(&f->sk_ref)) |
| 1732 | list_del(&f->list); | 1740 | list_del(&f->list); |
| 1733 | dev_remove_pack(&f->prot_hook); | 1741 | else |
| 1734 | fanout_release_data(f); | 1742 | f = NULL; |
| 1735 | kfree(f); | ||
| 1736 | } | ||
| 1737 | 1743 | ||
| 1738 | if (po->rollover) | 1744 | if (po->rollover) |
| 1739 | kfree_rcu(po->rollover, rcu); | 1745 | kfree_rcu(po->rollover, rcu); |
| 1740 | } | 1746 | } |
| 1741 | mutex_unlock(&fanout_mutex); | 1747 | mutex_unlock(&fanout_mutex); |
| 1748 | |||
| 1749 | return f; | ||
| 1742 | } | 1750 | } |
| 1743 | 1751 | ||
| 1744 | static bool packet_extra_vlan_len_allowed(const struct net_device *dev, | 1752 | static bool packet_extra_vlan_len_allowed(const struct net_device *dev, |
| @@ -2912,6 +2920,7 @@ static int packet_release(struct socket *sock) | |||
| 2912 | { | 2920 | { |
| 2913 | struct sock *sk = sock->sk; | 2921 | struct sock *sk = sock->sk; |
| 2914 | struct packet_sock *po; | 2922 | struct packet_sock *po; |
| 2923 | struct packet_fanout *f; | ||
| 2915 | struct net *net; | 2924 | struct net *net; |
| 2916 | union tpacket_req_u req_u; | 2925 | union tpacket_req_u req_u; |
| 2917 | 2926 | ||
| @@ -2951,9 +2960,14 @@ static int packet_release(struct socket *sock) | |||
| 2951 | packet_set_ring(sk, &req_u, 1, 1); | 2960 | packet_set_ring(sk, &req_u, 1, 1); |
| 2952 | } | 2961 | } |
| 2953 | 2962 | ||
| 2954 | fanout_release(sk); | 2963 | f = fanout_release(sk); |
| 2955 | 2964 | ||
| 2956 | synchronize_net(); | 2965 | synchronize_net(); |
| 2966 | |||
| 2967 | if (f) { | ||
| 2968 | fanout_release_data(f); | ||
| 2969 | kfree(f); | ||
| 2970 | } | ||
| 2957 | /* | 2971 | /* |
| 2958 | * Now the socket is dead. No more input will appear. | 2972 | * Now the socket is dead. No more input will appear. |
| 2959 | */ | 2973 | */ |
| @@ -3905,7 +3919,6 @@ static int packet_notifier(struct notifier_block *this, | |||
| 3905 | } | 3919 | } |
| 3906 | if (msg == NETDEV_UNREGISTER) { | 3920 | if (msg == NETDEV_UNREGISTER) { |
| 3907 | packet_cached_dev_reset(po); | 3921 | packet_cached_dev_reset(po); |
| 3908 | fanout_release(sk); | ||
| 3909 | po->ifindex = -1; | 3922 | po->ifindex = -1; |
| 3910 | if (po->prot_hook.dev) | 3923 | if (po->prot_hook.dev) |
| 3911 | dev_put(po->prot_hook.dev); | 3924 | dev_put(po->prot_hook.dev); |
