aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPavel Emelyanov <xemul@openvz.org>2008-01-30 22:31:06 -0500
committerDavid S. Miller <davem@davemloft.net>2008-01-31 22:27:22 -0500
commit23fe18669e7fdaf5b229747858d943a723124e2e (patch)
tree1610562f5a80cb8c97102acafa16443de0e659f7
parent533cb5b0a63f28ecab5503cfceb77e641fa7f7c4 (diff)
[NETNS]: Fix race between put_net() and netlink_kernel_create().
The comment about "race free view of the set of network namespaces" was a bit hasty. Look (there even can be only one CPU, as discovered by Alexey Dobriyan and Denis Lunev): put_net() if (atomic_dec_and_test(&net->refcnt)) /* true */ __put_net(net); queue_work(...); /* * note: the net now has refcnt 0, but still in * the global list of net namespaces */ == re-schedule == register_pernet_subsys(&some_ops); register_pernet_operations(&some_ops); (*some_ops)->init(net); /* * we call netlink_kernel_create() here * in some places */ netlink_kernel_create(); sk_alloc(); get_net(net); /* refcnt = 1 */ /* * now we drop the net refcount not to * block the net namespace exit in the * future (or this can be done on the * error path) */ put_net(sk->sk_net); if (atomic_dec_and_test(&...)) /* * true. BOOOM! The net is * scheduled for release twice */ When thinking on this problem, I decided, that getting and putting the net in init callback is wrong. If some init callback needs to have a refcount-less reference on the struct net, _it_ has to be careful himself, rather than relying on the infrastructure to handle this correctly. In case of netlink_kernel_create(), the problem is that the sk_alloc() gets the given namespace, but passing the info that we don't want to get it inside this call is too heavy. Instead, I propose to crate the socket inside an init_net namespace and then re-attach it to the desired one right after the socket is created. After doing this, we also have to be careful on error paths not to drop the reference on the namespace, we didn't get the one on. Signed-off-by: Pavel Emelyanov <xemul@openvz.org> Acked-by: Denis Lunev <den@openvz.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/netlink/af_netlink.c52
1 files changed, 33 insertions, 19 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 6b178e1247b5..ff9fb6ba0c5c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1344,6 +1344,22 @@ static void netlink_data_ready(struct sock *sk, int len)
1344 * queueing. 1344 * queueing.
1345 */ 1345 */
1346 1346
1347static void __netlink_release(struct sock *sk)
1348{
1349 /*
1350 * Last sock_put should drop referrence to sk->sk_net. It has already
1351 * been dropped in netlink_kernel_create. Taking referrence to stopping
1352 * namespace is not an option.
1353 * Take referrence to a socket to remove it from netlink lookup table
1354 * _alive_ and after that destroy it in the context of init_net.
1355 */
1356
1357 sock_hold(sk);
1358 sock_release(sk->sk_socket);
1359 sk->sk_net = get_net(&init_net);
1360 sock_put(sk);
1361}
1362
1347struct sock * 1363struct sock *
1348netlink_kernel_create(struct net *net, int unit, unsigned int groups, 1364netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1349 void (*input)(struct sk_buff *skb), 1365 void (*input)(struct sk_buff *skb),
@@ -1362,8 +1378,18 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1362 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) 1378 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
1363 return NULL; 1379 return NULL;
1364 1380
1365 if (__netlink_create(net, sock, cb_mutex, unit) < 0) 1381 /*
1366 goto out_sock_release; 1382 * We have to just have a reference on the net from sk, but don't
1383 * get_net it. Besides, we cannot get and then put the net here.
1384 * So we create one inside init_net and the move it to net.
1385 */
1386
1387 if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
1388 goto out_sock_release_nosk;
1389
1390 sk = sock->sk;
1391 put_net(sk->sk_net);
1392 sk->sk_net = net;
1367 1393
1368 if (groups < 32) 1394 if (groups < 32)
1369 groups = 32; 1395 groups = 32;
@@ -1372,7 +1398,6 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1372 if (!listeners) 1398 if (!listeners)
1373 goto out_sock_release; 1399 goto out_sock_release;
1374 1400
1375 sk = sock->sk;
1376 sk->sk_data_ready = netlink_data_ready; 1401 sk->sk_data_ready = netlink_data_ready;
1377 if (input) 1402 if (input)
1378 nlk_sk(sk)->netlink_rcv = input; 1403 nlk_sk(sk)->netlink_rcv = input;
@@ -1395,14 +1420,14 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1395 nl_table[unit].registered++; 1420 nl_table[unit].registered++;
1396 } 1421 }
1397 netlink_table_ungrab(); 1422 netlink_table_ungrab();
1398
1399 /* Do not hold an extra referrence to a namespace as this socket is
1400 * internal to a namespace and does not prevent it to stop. */
1401 put_net(net);
1402 return sk; 1423 return sk;
1403 1424
1404out_sock_release: 1425out_sock_release:
1405 kfree(listeners); 1426 kfree(listeners);
1427 __netlink_release(sk);
1428 return NULL;
1429
1430out_sock_release_nosk:
1406 sock_release(sock); 1431 sock_release(sock);
1407 return NULL; 1432 return NULL;
1408} 1433}
@@ -1415,18 +1440,7 @@ netlink_kernel_release(struct sock *sk)
1415 if (sk == NULL || sk->sk_socket == NULL) 1440 if (sk == NULL || sk->sk_socket == NULL)
1416 return; 1441 return;
1417 1442
1418 /* 1443 __netlink_release(sk);
1419 * Last sock_put should drop referrence to sk->sk_net. It has already
1420 * been dropped in netlink_kernel_create. Taking referrence to stopping
1421 * namespace is not an option.
1422 * Take referrence to a socket to remove it from netlink lookup table
1423 * _alive_ and after that destroy it in the context of init_net.
1424 */
1425 sock_hold(sk);
1426 sock_release(sk->sk_socket);
1427
1428 sk->sk_net = get_net(&init_net);
1429 sock_put(sk);
1430} 1444}
1431EXPORT_SYMBOL(netlink_kernel_release); 1445EXPORT_SYMBOL(netlink_kernel_release);
1432 1446