aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDenis V. Lunev <den@openvz.org>2008-01-19 02:55:19 -0500
committerDavid S. Miller <davem@davemloft.net>2008-01-28 18:08:08 -0500
commit775516bfa2bd7993620c9039191a0c30b8d8a496 (patch)
tree7385f35edda9f4124b802ccf337c55070d2e00bc
parentb7c6ba6eb1234e35a74fb8ba8123232a7b1ba9e4 (diff)
[NETNS]: Namespace stop vs 'ip r l' race.
During network namespace stop process kernel side netlink sockets belonging to a namespace should be closed. They should not prevent namespace to stop, so they do not increment namespace usage counter. Though this counter will be put during last sock_put. The raplacement of the correct netns for init_ns solves the problem only partial as socket to be stoped until proper stop is a valid netlink kernel socket and can be looked up by the user processes. This is not a problem until it resides in initial namespace (no processes inside this net), but this is not true for init_net. So, hold the referrence for a socket, remove it from lookup tables and only after that change namespace and perform a last put. Signed-off-by: Denis V. Lunev <den@openvz.org> Tested-by: Alexey Dobriyan <adobriyan@openvz.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/core/rtnetlink.c15
-rw-r--r--net/ipv4/fib_frontend.c7
-rw-r--r--net/netlink/af_netlink.c15
3 files changed, 18 insertions, 19 deletions
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 02cf848f71d2..ddbdde82a700 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1368,25 +1368,14 @@ static int rtnetlink_net_init(struct net *net)
1368 rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); 1368 rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
1369 if (!sk) 1369 if (!sk)
1370 return -ENOMEM; 1370 return -ENOMEM;
1371
1372 /* Don't hold an extra reference on the namespace */
1373 put_net(sk->sk_net);
1374 net->rtnl = sk; 1371 net->rtnl = sk;
1375 return 0; 1372 return 0;
1376} 1373}
1377 1374
1378static void rtnetlink_net_exit(struct net *net) 1375static void rtnetlink_net_exit(struct net *net)
1379{ 1376{
1380 struct sock *sk = net->rtnl; 1377 netlink_kernel_release(net->rtnl);
1381 if (sk) { 1378 net->rtnl = NULL;
1382 /* At the last minute lie and say this is a socket for the
1383 * initial network namespace. So the socket will be safe to
1384 * free.
1385 */
1386 sk->sk_net = get_net(&init_net);
1387 netlink_kernel_release(net->rtnl);
1388 net->rtnl = NULL;
1389 }
1390} 1379}
1391 1380
1392static struct pernet_operations rtnetlink_net_ops = { 1381static struct pernet_operations rtnetlink_net_ops = {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index e787d2151152..62bd791c204e 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -869,19 +869,14 @@ static int nl_fib_lookup_init(struct net *net)
869 nl_fib_input, NULL, THIS_MODULE); 869 nl_fib_input, NULL, THIS_MODULE);
870 if (sk == NULL) 870 if (sk == NULL)
871 return -EAFNOSUPPORT; 871 return -EAFNOSUPPORT;
872 /* Don't hold an extra reference on the namespace */
873 put_net(sk->sk_net);
874 net->ipv4.fibnl = sk; 872 net->ipv4.fibnl = sk;
875 return 0; 873 return 0;
876} 874}
877 875
878static void nl_fib_lookup_exit(struct net *net) 876static void nl_fib_lookup_exit(struct net *net)
879{ 877{
880 /* At the last minute lie and say this is a socket for the
881 * initial network namespace. So the socket will be safe to free.
882 */
883 net->ipv4.fibnl->sk_net = get_net(&init_net);
884 netlink_kernel_release(net->ipv4.fibnl); 878 netlink_kernel_release(net->ipv4.fibnl);
879 net->ipv4.fibnl = NULL;
885} 880}
886 881
887static void fib_disable_ip(struct net_device *dev, int force) 882static void fib_disable_ip(struct net_device *dev, int force)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 626a58206298..6b178e1247b5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1396,6 +1396,9 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
1396 } 1396 }
1397 netlink_table_ungrab(); 1397 netlink_table_ungrab();
1398 1398
1399 /* Do not hold an extra referrence to a namespace as this socket is
1400 * internal to a namespace and does not prevent it to stop. */
1401 put_net(net);
1399 return sk; 1402 return sk;
1400 1403
1401out_sock_release: 1404out_sock_release:
@@ -1411,7 +1414,19 @@ netlink_kernel_release(struct sock *sk)
1411{ 1414{
1412 if (sk == NULL || sk->sk_socket == NULL) 1415 if (sk == NULL || sk->sk_socket == NULL)
1413 return; 1416 return;
1417
1418 /*
1419 * Last sock_put should drop referrence to sk->sk_net. It has already
1420 * been dropped in netlink_kernel_create. Taking referrence to stopping
1421 * namespace is not an option.
1422 * Take referrence to a socket to remove it from netlink lookup table
1423 * _alive_ and after that destroy it in the context of init_net.
1424 */
1425 sock_hold(sk);
1414 sock_release(sk->sk_socket); 1426 sock_release(sk->sk_socket);
1427
1428 sk->sk_net = get_net(&init_net);
1429 sock_put(sk);
1415} 1430}
1416EXPORT_SYMBOL(netlink_kernel_release); 1431EXPORT_SYMBOL(netlink_kernel_release);
1417 1432