diff options
author | Pavel Emelyanov <xemul@parallels.com> | 2012-08-20 21:06:47 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-08-23 01:58:27 -0400 |
commit | 0fa7fa98dbcc2789409ed24e885485e645803d7f (patch) | |
tree | 30a9b851fdee3b01a64f5ee07940224d089d31de /net/packet | |
parent | b32607dd47d456b99f0a16f1c37bc8a0975ffb19 (diff) |
packet: Protect packet sk list with mutex (v2)
Change since v1:
* Fixed inuse counters access spotted by Eric
In patch eea68e2f (packet: Report socket mclist info via diag module) I've
introduced a "scheduling in atomic" problem in packet diag module -- the
socket list is traversed under rcu_read_lock() while performed under it sk
mclist access requires rtnl lock (i.e. -- mutex) to be taken.
[152363.820563] BUG: scheduling while atomic: crtools/12517/0x10000002
[152363.820573] 4 locks held by crtools/12517:
[152363.820581] #0: (sock_diag_mutex){+.+.+.}, at: [<ffffffff81a2dcb5>] sock_diag_rcv+0x1f/0x3e
[152363.820613] #1: (sock_diag_table_mutex){+.+.+.}, at: [<ffffffff81a2de70>] sock_diag_rcv_msg+0xdb/0x11a
[152363.820644] #2: (nlk->cb_mutex){+.+.+.}, at: [<ffffffff81a67d01>] netlink_dump+0x23/0x1ab
[152363.820693] #3: (rcu_read_lock){.+.+..}, at: [<ffffffff81b6a049>] packet_diag_dump+0x0/0x1af
Similar thing was then re-introduced by further packet diag patches (fanount
mutex and pgvec mutex for rings) :(
Apart from being terribly sorry for the above, I propose to change the packet
sk list protection from spinlock to mutex. This lock currently protects two
modifications:
* sklist
* prot inuse counters
The sklist modifications can be just reprotected with mutex since they already
occur in a sleeping context. The inuse counters modifications are trickier -- the
__this_cpu_-s are used inside, thus requiring the caller to handle the potential
issues with contexts himself. Since packet sockets' counters are modified in two
places only (packet_create and packet_release) we only need to protect the context
from being preempted. BH disabling is not required in this case.
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/packet')
-rw-r--r-- | net/packet/af_packet.c | 16 | ||||
-rw-r--r-- | net/packet/diag.c | 6 |
2 files changed, 14 insertions, 8 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index bfaa0a83f0eb..f220c5bdb71f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -2317,10 +2317,13 @@ static int packet_release(struct socket *sock) | |||
2317 | net = sock_net(sk); | 2317 | net = sock_net(sk); |
2318 | po = pkt_sk(sk); | 2318 | po = pkt_sk(sk); |
2319 | 2319 | ||
2320 | spin_lock_bh(&net->packet.sklist_lock); | 2320 | mutex_lock(&net->packet.sklist_lock); |
2321 | sk_del_node_init_rcu(sk); | 2321 | sk_del_node_init_rcu(sk); |
2322 | mutex_unlock(&net->packet.sklist_lock); | ||
2323 | |||
2324 | preempt_disable(); | ||
2322 | sock_prot_inuse_add(net, sk->sk_prot, -1); | 2325 | sock_prot_inuse_add(net, sk->sk_prot, -1); |
2323 | spin_unlock_bh(&net->packet.sklist_lock); | 2326 | preempt_enable(); |
2324 | 2327 | ||
2325 | spin_lock(&po->bind_lock); | 2328 | spin_lock(&po->bind_lock); |
2326 | unregister_prot_hook(sk, false); | 2329 | unregister_prot_hook(sk, false); |
@@ -2519,10 +2522,13 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, | |||
2519 | register_prot_hook(sk); | 2522 | register_prot_hook(sk); |
2520 | } | 2523 | } |
2521 | 2524 | ||
2522 | spin_lock_bh(&net->packet.sklist_lock); | 2525 | mutex_lock(&net->packet.sklist_lock); |
2523 | sk_add_node_rcu(sk, &net->packet.sklist); | 2526 | sk_add_node_rcu(sk, &net->packet.sklist); |
2527 | mutex_unlock(&net->packet.sklist_lock); | ||
2528 | |||
2529 | preempt_disable(); | ||
2524 | sock_prot_inuse_add(net, &packet_proto, 1); | 2530 | sock_prot_inuse_add(net, &packet_proto, 1); |
2525 | spin_unlock_bh(&net->packet.sklist_lock); | 2531 | preempt_enable(); |
2526 | 2532 | ||
2527 | return 0; | 2533 | return 0; |
2528 | out: | 2534 | out: |
@@ -3775,7 +3781,7 @@ static const struct file_operations packet_seq_fops = { | |||
3775 | 3781 | ||
3776 | static int __net_init packet_net_init(struct net *net) | 3782 | static int __net_init packet_net_init(struct net *net) |
3777 | { | 3783 | { |
3778 | spin_lock_init(&net->packet.sklist_lock); | 3784 | mutex_init(&net->packet.sklist_lock); |
3779 | INIT_HLIST_HEAD(&net->packet.sklist); | 3785 | INIT_HLIST_HEAD(&net->packet.sklist); |
3780 | 3786 | ||
3781 | if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) | 3787 | if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) |
diff --git a/net/packet/diag.c b/net/packet/diag.c index bc33fbe8a5ef..39bce0d50df9 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c | |||
@@ -177,8 +177,8 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
177 | net = sock_net(skb->sk); | 177 | net = sock_net(skb->sk); |
178 | req = nlmsg_data(cb->nlh); | 178 | req = nlmsg_data(cb->nlh); |
179 | 179 | ||
180 | rcu_read_lock(); | 180 | mutex_lock(&net->packet.sklist_lock); |
181 | sk_for_each_rcu(sk, node, &net->packet.sklist) { | 181 | sk_for_each(sk, node, &net->packet.sklist) { |
182 | if (!net_eq(sock_net(sk), net)) | 182 | if (!net_eq(sock_net(sk), net)) |
183 | continue; | 183 | continue; |
184 | if (num < s_num) | 184 | if (num < s_num) |
@@ -192,7 +192,7 @@ next: | |||
192 | num++; | 192 | num++; |
193 | } | 193 | } |
194 | done: | 194 | done: |
195 | rcu_read_unlock(); | 195 | mutex_unlock(&net->packet.sklist_lock); |
196 | cb->args[0] = num; | 196 | cb->args[0] = num; |
197 | 197 | ||
198 | return skb->len; | 198 | return skb->len; |