diff options
author | Hans Schillstrom <hans@schillstrom.com> | 2011-05-03 16:09:30 -0400 |
---|---|---|
committer | Pablo Neira Ayuso <pablo@netfilter.org> | 2011-05-10 03:52:33 -0400 |
commit | 1ae132b0347907ac95b8bc9dba37934f59d2a508 (patch) | |
tree | 12d525adcb59dc199c9dd40924499bab03621c59 /net | |
parent | 103a9778e07bcc0cd34b5c35a87281454eec719e (diff) |
IPVS: Change of socket usage to enable name space exit.
If the sync daemons run in a name space while it crashes
or get killed, there is no way to stop them except for a reboot.
When all patches are there, ip_vs_core will handle register_pernet_(),
i.e. ip_vs_sync_init() and ip_vs_sync_cleanup() will be removed.
Kernel threads should not increment the use count of a socket.
By calling sk_change_net() after creating a socket this is avoided.
sock_release cant be used intead sk_release_kernel() should be used.
Thanks Eric W Biederman for your advices.
Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
[horms@verge.net.au: minor edit to changelog]
Signed-off-by: Simon Horman <horms@verge.net.au>
Diffstat (limited to 'net')
-rw-r--r-- | net/netfilter/ipvs/ip_vs_core.c | 2 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_sync.c | 58 |
2 files changed, 38 insertions, 22 deletions
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 07accf6b240..a0791dc05a2 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c | |||
@@ -1896,7 +1896,7 @@ static int __net_init __ip_vs_init(struct net *net) | |||
1896 | 1896 | ||
1897 | static void __net_exit __ip_vs_cleanup(struct net *net) | 1897 | static void __net_exit __ip_vs_cleanup(struct net *net) |
1898 | { | 1898 | { |
1899 | IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen); | 1899 | IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen); |
1900 | } | 1900 | } |
1901 | 1901 | ||
1902 | static struct pernet_operations ipvs_core_ops = { | 1902 | static struct pernet_operations ipvs_core_ops = { |
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 3e7961e85e9..0cce9531082 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c | |||
@@ -1303,13 +1303,18 @@ static struct socket *make_send_sock(struct net *net) | |||
1303 | struct socket *sock; | 1303 | struct socket *sock; |
1304 | int result; | 1304 | int result; |
1305 | 1305 | ||
1306 | /* First create a socket */ | 1306 | /* First create a socket move it to right name space later */ |
1307 | result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); | 1307 | result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); |
1308 | if (result < 0) { | 1308 | if (result < 0) { |
1309 | pr_err("Error during creation of socket; terminating\n"); | 1309 | pr_err("Error during creation of socket; terminating\n"); |
1310 | return ERR_PTR(result); | 1310 | return ERR_PTR(result); |
1311 | } | 1311 | } |
1312 | 1312 | /* | |
1313 | * Kernel sockets that are a part of a namespace, should not | ||
1314 | * hold a reference to a namespace in order to allow to stop it. | ||
1315 | * After sk_change_net should be released using sk_release_kernel. | ||
1316 | */ | ||
1317 | sk_change_net(sock->sk, net); | ||
1313 | result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); | 1318 | result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); |
1314 | if (result < 0) { | 1319 | if (result < 0) { |
1315 | pr_err("Error setting outbound mcast interface\n"); | 1320 | pr_err("Error setting outbound mcast interface\n"); |
@@ -1334,8 +1339,8 @@ static struct socket *make_send_sock(struct net *net) | |||
1334 | 1339 | ||
1335 | return sock; | 1340 | return sock; |
1336 | 1341 | ||
1337 | error: | 1342 | error: |
1338 | sock_release(sock); | 1343 | sk_release_kernel(sock->sk); |
1339 | return ERR_PTR(result); | 1344 | return ERR_PTR(result); |
1340 | } | 1345 | } |
1341 | 1346 | ||
@@ -1350,12 +1355,17 @@ static struct socket *make_receive_sock(struct net *net) | |||
1350 | int result; | 1355 | int result; |
1351 | 1356 | ||
1352 | /* First create a socket */ | 1357 | /* First create a socket */ |
1353 | result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1); | 1358 | result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); |
1354 | if (result < 0) { | 1359 | if (result < 0) { |
1355 | pr_err("Error during creation of socket; terminating\n"); | 1360 | pr_err("Error during creation of socket; terminating\n"); |
1356 | return ERR_PTR(result); | 1361 | return ERR_PTR(result); |
1357 | } | 1362 | } |
1358 | 1363 | /* | |
1364 | * Kernel sockets that are a part of a namespace, should not | ||
1365 | * hold a reference to a namespace in order to allow to stop it. | ||
1366 | * After sk_change_net should be released using sk_release_kernel. | ||
1367 | */ | ||
1368 | sk_change_net(sock->sk, net); | ||
1359 | /* it is equivalent to the REUSEADDR option in user-space */ | 1369 | /* it is equivalent to the REUSEADDR option in user-space */ |
1360 | sock->sk->sk_reuse = 1; | 1370 | sock->sk->sk_reuse = 1; |
1361 | 1371 | ||
@@ -1377,8 +1387,8 @@ static struct socket *make_receive_sock(struct net *net) | |||
1377 | 1387 | ||
1378 | return sock; | 1388 | return sock; |
1379 | 1389 | ||
1380 | error: | 1390 | error: |
1381 | sock_release(sock); | 1391 | sk_release_kernel(sock->sk); |
1382 | return ERR_PTR(result); | 1392 | return ERR_PTR(result); |
1383 | } | 1393 | } |
1384 | 1394 | ||
@@ -1473,7 +1483,7 @@ static int sync_thread_master(void *data) | |||
1473 | ip_vs_sync_buff_release(sb); | 1483 | ip_vs_sync_buff_release(sb); |
1474 | 1484 | ||
1475 | /* release the sending multicast socket */ | 1485 | /* release the sending multicast socket */ |
1476 | sock_release(tinfo->sock); | 1486 | sk_release_kernel(tinfo->sock->sk); |
1477 | kfree(tinfo); | 1487 | kfree(tinfo); |
1478 | 1488 | ||
1479 | return 0; | 1489 | return 0; |
@@ -1513,7 +1523,7 @@ static int sync_thread_backup(void *data) | |||
1513 | } | 1523 | } |
1514 | 1524 | ||
1515 | /* release the sending multicast socket */ | 1525 | /* release the sending multicast socket */ |
1516 | sock_release(tinfo->sock); | 1526 | sk_release_kernel(tinfo->sock->sk); |
1517 | kfree(tinfo->buf); | 1527 | kfree(tinfo->buf); |
1518 | kfree(tinfo); | 1528 | kfree(tinfo); |
1519 | 1529 | ||
@@ -1601,7 +1611,7 @@ outtinfo: | |||
1601 | outbuf: | 1611 | outbuf: |
1602 | kfree(buf); | 1612 | kfree(buf); |
1603 | outsocket: | 1613 | outsocket: |
1604 | sock_release(sock); | 1614 | sk_release_kernel(sock->sk); |
1605 | out: | 1615 | out: |
1606 | return result; | 1616 | return result; |
1607 | } | 1617 | } |
@@ -1610,6 +1620,7 @@ out: | |||
1610 | int stop_sync_thread(struct net *net, int state) | 1620 | int stop_sync_thread(struct net *net, int state) |
1611 | { | 1621 | { |
1612 | struct netns_ipvs *ipvs = net_ipvs(net); | 1622 | struct netns_ipvs *ipvs = net_ipvs(net); |
1623 | int retc = -EINVAL; | ||
1613 | 1624 | ||
1614 | IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); | 1625 | IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); |
1615 | 1626 | ||
@@ -1629,7 +1640,7 @@ int stop_sync_thread(struct net *net, int state) | |||
1629 | spin_lock_bh(&ipvs->sync_lock); | 1640 | spin_lock_bh(&ipvs->sync_lock); |
1630 | ipvs->sync_state &= ~IP_VS_STATE_MASTER; | 1641 | ipvs->sync_state &= ~IP_VS_STATE_MASTER; |
1631 | spin_unlock_bh(&ipvs->sync_lock); | 1642 | spin_unlock_bh(&ipvs->sync_lock); |
1632 | kthread_stop(ipvs->master_thread); | 1643 | retc = kthread_stop(ipvs->master_thread); |
1633 | ipvs->master_thread = NULL; | 1644 | ipvs->master_thread = NULL; |
1634 | } else if (state == IP_VS_STATE_BACKUP) { | 1645 | } else if (state == IP_VS_STATE_BACKUP) { |
1635 | if (!ipvs->backup_thread) | 1646 | if (!ipvs->backup_thread) |
@@ -1639,16 +1650,14 @@ int stop_sync_thread(struct net *net, int state) | |||
1639 | task_pid_nr(ipvs->backup_thread)); | 1650 | task_pid_nr(ipvs->backup_thread)); |
1640 | 1651 | ||
1641 | ipvs->sync_state &= ~IP_VS_STATE_BACKUP; | 1652 | ipvs->sync_state &= ~IP_VS_STATE_BACKUP; |
1642 | kthread_stop(ipvs->backup_thread); | 1653 | retc = kthread_stop(ipvs->backup_thread); |
1643 | ipvs->backup_thread = NULL; | 1654 | ipvs->backup_thread = NULL; |
1644 | } else { | ||
1645 | return -EINVAL; | ||
1646 | } | 1655 | } |
1647 | 1656 | ||
1648 | /* decrease the module use count */ | 1657 | /* decrease the module use count */ |
1649 | ip_vs_use_count_dec(); | 1658 | ip_vs_use_count_dec(); |
1650 | 1659 | ||
1651 | return 0; | 1660 | return retc; |
1652 | } | 1661 | } |
1653 | 1662 | ||
1654 | /* | 1663 | /* |
@@ -1670,8 +1679,15 @@ static int __net_init __ip_vs_sync_init(struct net *net) | |||
1670 | 1679 | ||
1671 | static void __ip_vs_sync_cleanup(struct net *net) | 1680 | static void __ip_vs_sync_cleanup(struct net *net) |
1672 | { | 1681 | { |
1673 | stop_sync_thread(net, IP_VS_STATE_MASTER); | 1682 | int retc; |
1674 | stop_sync_thread(net, IP_VS_STATE_BACKUP); | 1683 | |
1684 | retc = stop_sync_thread(net, IP_VS_STATE_MASTER); | ||
1685 | if (retc && retc != -ESRCH) | ||
1686 | pr_err("Failed to stop Master Daemon\n"); | ||
1687 | |||
1688 | retc = stop_sync_thread(net, IP_VS_STATE_BACKUP); | ||
1689 | if (retc && retc != -ESRCH) | ||
1690 | pr_err("Failed to stop Backup Daemon\n"); | ||
1675 | } | 1691 | } |
1676 | 1692 | ||
1677 | static struct pernet_operations ipvs_sync_ops = { | 1693 | static struct pernet_operations ipvs_sync_ops = { |
@@ -1682,10 +1698,10 @@ static struct pernet_operations ipvs_sync_ops = { | |||
1682 | 1698 | ||
1683 | int __init ip_vs_sync_init(void) | 1699 | int __init ip_vs_sync_init(void) |
1684 | { | 1700 | { |
1685 | return register_pernet_subsys(&ipvs_sync_ops); | 1701 | return register_pernet_device(&ipvs_sync_ops); |
1686 | } | 1702 | } |
1687 | 1703 | ||
1688 | void ip_vs_sync_cleanup(void) | 1704 | void ip_vs_sync_cleanup(void) |
1689 | { | 1705 | { |
1690 | unregister_pernet_subsys(&ipvs_sync_ops); | 1706 | unregister_pernet_device(&ipvs_sync_ops); |
1691 | } | 1707 | } |