aboutsummaryrefslogtreecommitdiffstats
path: root/net/packet
diff options
context:
space:
mode:
authorPavel Emelyanov <xemul@parallels.com>2012-08-20 21:06:47 -0400
committerDavid S. Miller <davem@davemloft.net>2012-08-23 01:58:27 -0400
commit0fa7fa98dbcc2789409ed24e885485e645803d7f (patch)
tree30a9b851fdee3b01a64f5ee07940224d089d31de /net/packet
parentb32607dd47d456b99f0a16f1c37bc8a0975ffb19 (diff)
packet: Protect packet sk list with mutex (v2)
Change since v1: * Fixed inuse counters access spotted by Eric In patch eea68e2f (packet: Report socket mclist info via diag module) I've introduced a "scheduling in atomic" problem in packet diag module -- the socket list is traversed under rcu_read_lock() while performed under it sk mclist access requires rtnl lock (i.e. -- mutex) to be taken. [152363.820563] BUG: scheduling while atomic: crtools/12517/0x10000002 [152363.820573] 4 locks held by crtools/12517: [152363.820581] #0: (sock_diag_mutex){+.+.+.}, at: [<ffffffff81a2dcb5>] sock_diag_rcv+0x1f/0x3e [152363.820613] #1: (sock_diag_table_mutex){+.+.+.}, at: [<ffffffff81a2de70>] sock_diag_rcv_msg+0xdb/0x11a [152363.820644] #2: (nlk->cb_mutex){+.+.+.}, at: [<ffffffff81a67d01>] netlink_dump+0x23/0x1ab [152363.820693] #3: (rcu_read_lock){.+.+..}, at: [<ffffffff81b6a049>] packet_diag_dump+0x0/0x1af Similar thing was then re-introduced by further packet diag patches (fanount mutex and pgvec mutex for rings) :( Apart from being terribly sorry for the above, I propose to change the packet sk list protection from spinlock to mutex. This lock currently protects two modifications: * sklist * prot inuse counters The sklist modifications can be just reprotected with mutex since they already occur in a sleeping context. The inuse counters modifications are trickier -- the __this_cpu_-s are used inside, thus requiring the caller to handle the potential issues with contexts himself. Since packet sockets' counters are modified in two places only (packet_create and packet_release) we only need to protect the context from being preempted. BH disabling is not required in this case. Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/packet')
-rw-r--r--net/packet/af_packet.c16
-rw-r--r--net/packet/diag.c6
2 files changed, 14 insertions, 8 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index bfaa0a83f0eb..f220c5bdb71f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2317,10 +2317,13 @@ static int packet_release(struct socket *sock)
2317 net = sock_net(sk); 2317 net = sock_net(sk);
2318 po = pkt_sk(sk); 2318 po = pkt_sk(sk);
2319 2319
2320 spin_lock_bh(&net->packet.sklist_lock); 2320 mutex_lock(&net->packet.sklist_lock);
2321 sk_del_node_init_rcu(sk); 2321 sk_del_node_init_rcu(sk);
2322 mutex_unlock(&net->packet.sklist_lock);
2323
2324 preempt_disable();
2322 sock_prot_inuse_add(net, sk->sk_prot, -1); 2325 sock_prot_inuse_add(net, sk->sk_prot, -1);
2323 spin_unlock_bh(&net->packet.sklist_lock); 2326 preempt_enable();
2324 2327
2325 spin_lock(&po->bind_lock); 2328 spin_lock(&po->bind_lock);
2326 unregister_prot_hook(sk, false); 2329 unregister_prot_hook(sk, false);
@@ -2519,10 +2522,13 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
2519 register_prot_hook(sk); 2522 register_prot_hook(sk);
2520 } 2523 }
2521 2524
2522 spin_lock_bh(&net->packet.sklist_lock); 2525 mutex_lock(&net->packet.sklist_lock);
2523 sk_add_node_rcu(sk, &net->packet.sklist); 2526 sk_add_node_rcu(sk, &net->packet.sklist);
2527 mutex_unlock(&net->packet.sklist_lock);
2528
2529 preempt_disable();
2524 sock_prot_inuse_add(net, &packet_proto, 1); 2530 sock_prot_inuse_add(net, &packet_proto, 1);
2525 spin_unlock_bh(&net->packet.sklist_lock); 2531 preempt_enable();
2526 2532
2527 return 0; 2533 return 0;
2528out: 2534out:
@@ -3775,7 +3781,7 @@ static const struct file_operations packet_seq_fops = {
3775 3781
3776static int __net_init packet_net_init(struct net *net) 3782static int __net_init packet_net_init(struct net *net)
3777{ 3783{
3778 spin_lock_init(&net->packet.sklist_lock); 3784 mutex_init(&net->packet.sklist_lock);
3779 INIT_HLIST_HEAD(&net->packet.sklist); 3785 INIT_HLIST_HEAD(&net->packet.sklist);
3780 3786
3781 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) 3787 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
diff --git a/net/packet/diag.c b/net/packet/diag.c
index bc33fbe8a5ef..39bce0d50df9 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -177,8 +177,8 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
177 net = sock_net(skb->sk); 177 net = sock_net(skb->sk);
178 req = nlmsg_data(cb->nlh); 178 req = nlmsg_data(cb->nlh);
179 179
180 rcu_read_lock(); 180 mutex_lock(&net->packet.sklist_lock);
181 sk_for_each_rcu(sk, node, &net->packet.sklist) { 181 sk_for_each(sk, node, &net->packet.sklist) {
182 if (!net_eq(sock_net(sk), net)) 182 if (!net_eq(sock_net(sk), net))
183 continue; 183 continue;
184 if (num < s_num) 184 if (num < s_num)
@@ -192,7 +192,7 @@ next:
192 num++; 192 num++;
193 } 193 }
194done: 194done:
195 rcu_read_unlock(); 195 mutex_unlock(&net->packet.sklist_lock);
196 cb->args[0] = num; 196 cb->args[0] = num;
197 197
198 return skb->len; 198 return skb->len;