diff options
author | Denis V. Lunev <den@openvz.org> | 2008-01-19 02:55:19 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-01-28 18:08:08 -0500 |
commit | 775516bfa2bd7993620c9039191a0c30b8d8a496 (patch) | |
tree | 7385f35edda9f4124b802ccf337c55070d2e00bc | |
parent | b7c6ba6eb1234e35a74fb8ba8123232a7b1ba9e4 (diff) |
[NETNS]: Namespace stop vs 'ip r l' race.
During network namespace stop process kernel side netlink sockets
belonging to a namespace should be closed. They should not prevent
namespace to stop, so they do not increment namespace usage
counter. Though this counter will be put during last sock_put.
The raplacement of the correct netns for init_ns solves the problem
only partial as socket to be stoped until proper stop is a valid
netlink kernel socket and can be looked up by the user processes. This
is not a problem until it resides in initial namespace (no processes
inside this net), but this is not true for init_net.
So, hold the referrence for a socket, remove it from lookup tables and
only after that change namespace and perform a last put.
Signed-off-by: Denis V. Lunev <den@openvz.org>
Tested-by: Alexey Dobriyan <adobriyan@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/core/rtnetlink.c | 15 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 7 | ||||
-rw-r--r-- | net/netlink/af_netlink.c | 15 |
3 files changed, 18 insertions, 19 deletions
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 02cf848f71d2..ddbdde82a700 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -1368,25 +1368,14 @@ static int rtnetlink_net_init(struct net *net) | |||
1368 | rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); | 1368 | rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); |
1369 | if (!sk) | 1369 | if (!sk) |
1370 | return -ENOMEM; | 1370 | return -ENOMEM; |
1371 | |||
1372 | /* Don't hold an extra reference on the namespace */ | ||
1373 | put_net(sk->sk_net); | ||
1374 | net->rtnl = sk; | 1371 | net->rtnl = sk; |
1375 | return 0; | 1372 | return 0; |
1376 | } | 1373 | } |
1377 | 1374 | ||
1378 | static void rtnetlink_net_exit(struct net *net) | 1375 | static void rtnetlink_net_exit(struct net *net) |
1379 | { | 1376 | { |
1380 | struct sock *sk = net->rtnl; | 1377 | netlink_kernel_release(net->rtnl); |
1381 | if (sk) { | 1378 | net->rtnl = NULL; |
1382 | /* At the last minute lie and say this is a socket for the | ||
1383 | * initial network namespace. So the socket will be safe to | ||
1384 | * free. | ||
1385 | */ | ||
1386 | sk->sk_net = get_net(&init_net); | ||
1387 | netlink_kernel_release(net->rtnl); | ||
1388 | net->rtnl = NULL; | ||
1389 | } | ||
1390 | } | 1379 | } |
1391 | 1380 | ||
1392 | static struct pernet_operations rtnetlink_net_ops = { | 1381 | static struct pernet_operations rtnetlink_net_ops = { |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index e787d2151152..62bd791c204e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -869,19 +869,14 @@ static int nl_fib_lookup_init(struct net *net) | |||
869 | nl_fib_input, NULL, THIS_MODULE); | 869 | nl_fib_input, NULL, THIS_MODULE); |
870 | if (sk == NULL) | 870 | if (sk == NULL) |
871 | return -EAFNOSUPPORT; | 871 | return -EAFNOSUPPORT; |
872 | /* Don't hold an extra reference on the namespace */ | ||
873 | put_net(sk->sk_net); | ||
874 | net->ipv4.fibnl = sk; | 872 | net->ipv4.fibnl = sk; |
875 | return 0; | 873 | return 0; |
876 | } | 874 | } |
877 | 875 | ||
878 | static void nl_fib_lookup_exit(struct net *net) | 876 | static void nl_fib_lookup_exit(struct net *net) |
879 | { | 877 | { |
880 | /* At the last minute lie and say this is a socket for the | ||
881 | * initial network namespace. So the socket will be safe to free. | ||
882 | */ | ||
883 | net->ipv4.fibnl->sk_net = get_net(&init_net); | ||
884 | netlink_kernel_release(net->ipv4.fibnl); | 878 | netlink_kernel_release(net->ipv4.fibnl); |
879 | net->ipv4.fibnl = NULL; | ||
885 | } | 880 | } |
886 | 881 | ||
887 | static void fib_disable_ip(struct net_device *dev, int force) | 882 | static void fib_disable_ip(struct net_device *dev, int force) |
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 626a58206298..6b178e1247b5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
@@ -1396,6 +1396,9 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups, | |||
1396 | } | 1396 | } |
1397 | netlink_table_ungrab(); | 1397 | netlink_table_ungrab(); |
1398 | 1398 | ||
1399 | /* Do not hold an extra referrence to a namespace as this socket is | ||
1400 | * internal to a namespace and does not prevent it to stop. */ | ||
1401 | put_net(net); | ||
1399 | return sk; | 1402 | return sk; |
1400 | 1403 | ||
1401 | out_sock_release: | 1404 | out_sock_release: |
@@ -1411,7 +1414,19 @@ netlink_kernel_release(struct sock *sk) | |||
1411 | { | 1414 | { |
1412 | if (sk == NULL || sk->sk_socket == NULL) | 1415 | if (sk == NULL || sk->sk_socket == NULL) |
1413 | return; | 1416 | return; |
1417 | |||
1418 | /* | ||
1419 | * Last sock_put should drop referrence to sk->sk_net. It has already | ||
1420 | * been dropped in netlink_kernel_create. Taking referrence to stopping | ||
1421 | * namespace is not an option. | ||
1422 | * Take referrence to a socket to remove it from netlink lookup table | ||
1423 | * _alive_ and after that destroy it in the context of init_net. | ||
1424 | */ | ||
1425 | sock_hold(sk); | ||
1414 | sock_release(sk->sk_socket); | 1426 | sock_release(sk->sk_socket); |
1427 | |||
1428 | sk->sk_net = get_net(&init_net); | ||
1429 | sock_put(sk); | ||
1415 | } | 1430 | } |
1416 | EXPORT_SYMBOL(netlink_kernel_release); | 1431 | EXPORT_SYMBOL(netlink_kernel_release); |
1417 | 1432 | ||