diff options
author | Hans Schillstrom <hans@schillstrom.com> | 2011-10-10 21:54:35 -0400 |
---|---|---|
committer | Pablo Neira Ayuso <pablo@netfilter.org> | 2011-10-12 12:32:15 -0400 |
commit | ae1d48b23d5e79efbcf0cef4f0ebb9742361af59 (patch) | |
tree | ee357eda6a7a031be2756133a9f31be325081f21 /net | |
parent | 98d9ae841ad620045d653fb05764e4a899f42dbd (diff) |
IPVS netns shutdown/startup dead-lock
ip_vs_mutext is used by both netns shutdown code and startup
and both implicit uses sk_lock-AF_INET mutex.
cleanup CPU-1 startup CPU-2
ip_vs_dst_event() ip_vs_genl_set_cmd()
sk_lock-AF_INET __ip_vs_mutex
sk_lock-AF_INET
__ip_vs_mutex
* DEAD LOCK *
A new mutex placed in ip_vs netns struct called sync_mutex is added.
Comments from Julian and Simon added.
This patch has been running for more than 3 month now and it seems to work.
Ver. 3
IP_VS_SO_GET_DAEMON in do_ip_vs_get_ctl protected by sync_mutex
instead of __ip_vs_mutex as sugested by Julian.
Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'net')
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ctl.c | 131 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_sync.c | 6 |
2 files changed, 86 insertions, 51 deletions
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 2b771dc708a3..3e5e08b78bfc 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c | |||
@@ -2283,6 +2283,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | |||
2283 | struct ip_vs_service *svc; | 2283 | struct ip_vs_service *svc; |
2284 | struct ip_vs_dest_user *udest_compat; | 2284 | struct ip_vs_dest_user *udest_compat; |
2285 | struct ip_vs_dest_user_kern udest; | 2285 | struct ip_vs_dest_user_kern udest; |
2286 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
2286 | 2287 | ||
2287 | if (!capable(CAP_NET_ADMIN)) | 2288 | if (!capable(CAP_NET_ADMIN)) |
2288 | return -EPERM; | 2289 | return -EPERM; |
@@ -2303,6 +2304,24 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | |||
2303 | /* increase the module use count */ | 2304 | /* increase the module use count */ |
2304 | ip_vs_use_count_inc(); | 2305 | ip_vs_use_count_inc(); |
2305 | 2306 | ||
2307 | /* Handle daemons since they have another lock */ | ||
2308 | if (cmd == IP_VS_SO_SET_STARTDAEMON || | ||
2309 | cmd == IP_VS_SO_SET_STOPDAEMON) { | ||
2310 | struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; | ||
2311 | |||
2312 | if (mutex_lock_interruptible(&ipvs->sync_mutex)) { | ||
2313 | ret = -ERESTARTSYS; | ||
2314 | goto out_dec; | ||
2315 | } | ||
2316 | if (cmd == IP_VS_SO_SET_STARTDAEMON) | ||
2317 | ret = start_sync_thread(net, dm->state, dm->mcast_ifn, | ||
2318 | dm->syncid); | ||
2319 | else | ||
2320 | ret = stop_sync_thread(net, dm->state); | ||
2321 | mutex_unlock(&ipvs->sync_mutex); | ||
2322 | goto out_dec; | ||
2323 | } | ||
2324 | |||
2306 | if (mutex_lock_interruptible(&__ip_vs_mutex)) { | 2325 | if (mutex_lock_interruptible(&__ip_vs_mutex)) { |
2307 | ret = -ERESTARTSYS; | 2326 | ret = -ERESTARTSYS; |
2308 | goto out_dec; | 2327 | goto out_dec; |
@@ -2316,15 +2335,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) | |||
2316 | /* Set timeout values for (tcp tcpfin udp) */ | 2335 | /* Set timeout values for (tcp tcpfin udp) */ |
2317 | ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg); | 2336 | ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg); |
2318 | goto out_unlock; | 2337 | goto out_unlock; |
2319 | } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { | ||
2320 | struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; | ||
2321 | ret = start_sync_thread(net, dm->state, dm->mcast_ifn, | ||
2322 | dm->syncid); | ||
2323 | goto out_unlock; | ||
2324 | } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { | ||
2325 | struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; | ||
2326 | ret = stop_sync_thread(net, dm->state); | ||
2327 | goto out_unlock; | ||
2328 | } | 2338 | } |
2329 | 2339 | ||
2330 | usvc_compat = (struct ip_vs_service_user *)arg; | 2340 | usvc_compat = (struct ip_vs_service_user *)arg; |
@@ -2584,6 +2594,33 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2584 | 2594 | ||
2585 | if (copy_from_user(arg, user, copylen) != 0) | 2595 | if (copy_from_user(arg, user, copylen) != 0) |
2586 | return -EFAULT; | 2596 | return -EFAULT; |
2597 | /* | ||
2598 | * Handle daemons first since it has its own locking | ||
2599 | */ | ||
2600 | if (cmd == IP_VS_SO_GET_DAEMON) { | ||
2601 | struct ip_vs_daemon_user d[2]; | ||
2602 | |||
2603 | memset(&d, 0, sizeof(d)); | ||
2604 | if (mutex_lock_interruptible(&ipvs->sync_mutex)) | ||
2605 | return -ERESTARTSYS; | ||
2606 | |||
2607 | if (ipvs->sync_state & IP_VS_STATE_MASTER) { | ||
2608 | d[0].state = IP_VS_STATE_MASTER; | ||
2609 | strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, | ||
2610 | sizeof(d[0].mcast_ifn)); | ||
2611 | d[0].syncid = ipvs->master_syncid; | ||
2612 | } | ||
2613 | if (ipvs->sync_state & IP_VS_STATE_BACKUP) { | ||
2614 | d[1].state = IP_VS_STATE_BACKUP; | ||
2615 | strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, | ||
2616 | sizeof(d[1].mcast_ifn)); | ||
2617 | d[1].syncid = ipvs->backup_syncid; | ||
2618 | } | ||
2619 | if (copy_to_user(user, &d, sizeof(d)) != 0) | ||
2620 | ret = -EFAULT; | ||
2621 | mutex_unlock(&ipvs->sync_mutex); | ||
2622 | return ret; | ||
2623 | } | ||
2587 | 2624 | ||
2588 | if (mutex_lock_interruptible(&__ip_vs_mutex)) | 2625 | if (mutex_lock_interruptible(&__ip_vs_mutex)) |
2589 | return -ERESTARTSYS; | 2626 | return -ERESTARTSYS; |
@@ -2681,28 +2718,6 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2681 | } | 2718 | } |
2682 | break; | 2719 | break; |
2683 | 2720 | ||
2684 | case IP_VS_SO_GET_DAEMON: | ||
2685 | { | ||
2686 | struct ip_vs_daemon_user d[2]; | ||
2687 | |||
2688 | memset(&d, 0, sizeof(d)); | ||
2689 | if (ipvs->sync_state & IP_VS_STATE_MASTER) { | ||
2690 | d[0].state = IP_VS_STATE_MASTER; | ||
2691 | strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, | ||
2692 | sizeof(d[0].mcast_ifn)); | ||
2693 | d[0].syncid = ipvs->master_syncid; | ||
2694 | } | ||
2695 | if (ipvs->sync_state & IP_VS_STATE_BACKUP) { | ||
2696 | d[1].state = IP_VS_STATE_BACKUP; | ||
2697 | strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, | ||
2698 | sizeof(d[1].mcast_ifn)); | ||
2699 | d[1].syncid = ipvs->backup_syncid; | ||
2700 | } | ||
2701 | if (copy_to_user(user, &d, sizeof(d)) != 0) | ||
2702 | ret = -EFAULT; | ||
2703 | } | ||
2704 | break; | ||
2705 | |||
2706 | default: | 2721 | default: |
2707 | ret = -EINVAL; | 2722 | ret = -EINVAL; |
2708 | } | 2723 | } |
@@ -3205,7 +3220,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb, | |||
3205 | struct net *net = skb_sknet(skb); | 3220 | struct net *net = skb_sknet(skb); |
3206 | struct netns_ipvs *ipvs = net_ipvs(net); | 3221 | struct netns_ipvs *ipvs = net_ipvs(net); |
3207 | 3222 | ||
3208 | mutex_lock(&__ip_vs_mutex); | 3223 | mutex_lock(&ipvs->sync_mutex); |
3209 | if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { | 3224 | if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { |
3210 | if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, | 3225 | if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, |
3211 | ipvs->master_mcast_ifn, | 3226 | ipvs->master_mcast_ifn, |
@@ -3225,7 +3240,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb, | |||
3225 | } | 3240 | } |
3226 | 3241 | ||
3227 | nla_put_failure: | 3242 | nla_put_failure: |
3228 | mutex_unlock(&__ip_vs_mutex); | 3243 | mutex_unlock(&ipvs->sync_mutex); |
3229 | 3244 | ||
3230 | return skb->len; | 3245 | return skb->len; |
3231 | } | 3246 | } |
@@ -3271,13 +3286,9 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) | |||
3271 | return ip_vs_set_timeout(net, &t); | 3286 | return ip_vs_set_timeout(net, &t); |
3272 | } | 3287 | } |
3273 | 3288 | ||
3274 | static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) | 3289 | static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) |
3275 | { | 3290 | { |
3276 | struct ip_vs_service *svc = NULL; | ||
3277 | struct ip_vs_service_user_kern usvc; | ||
3278 | struct ip_vs_dest_user_kern udest; | ||
3279 | int ret = 0, cmd; | 3291 | int ret = 0, cmd; |
3280 | int need_full_svc = 0, need_full_dest = 0; | ||
3281 | struct net *net; | 3292 | struct net *net; |
3282 | struct netns_ipvs *ipvs; | 3293 | struct netns_ipvs *ipvs; |
3283 | 3294 | ||
@@ -3285,19 +3296,10 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) | |||
3285 | ipvs = net_ipvs(net); | 3296 | ipvs = net_ipvs(net); |
3286 | cmd = info->genlhdr->cmd; | 3297 | cmd = info->genlhdr->cmd; |
3287 | 3298 | ||
3288 | mutex_lock(&__ip_vs_mutex); | 3299 | if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { |
3289 | |||
3290 | if (cmd == IPVS_CMD_FLUSH) { | ||
3291 | ret = ip_vs_flush(net); | ||
3292 | goto out; | ||
3293 | } else if (cmd == IPVS_CMD_SET_CONFIG) { | ||
3294 | ret = ip_vs_genl_set_config(net, info->attrs); | ||
3295 | goto out; | ||
3296 | } else if (cmd == IPVS_CMD_NEW_DAEMON || | ||
3297 | cmd == IPVS_CMD_DEL_DAEMON) { | ||
3298 | |||
3299 | struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; | 3300 | struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; |
3300 | 3301 | ||
3302 | mutex_lock(&ipvs->sync_mutex); | ||
3301 | if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || | 3303 | if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || |
3302 | nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, | 3304 | nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, |
3303 | info->attrs[IPVS_CMD_ATTR_DAEMON], | 3305 | info->attrs[IPVS_CMD_ATTR_DAEMON], |
@@ -3310,6 +3312,33 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) | |||
3310 | ret = ip_vs_genl_new_daemon(net, daemon_attrs); | 3312 | ret = ip_vs_genl_new_daemon(net, daemon_attrs); |
3311 | else | 3313 | else |
3312 | ret = ip_vs_genl_del_daemon(net, daemon_attrs); | 3314 | ret = ip_vs_genl_del_daemon(net, daemon_attrs); |
3315 | out: | ||
3316 | mutex_unlock(&ipvs->sync_mutex); | ||
3317 | } | ||
3318 | return ret; | ||
3319 | } | ||
3320 | |||
3321 | static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) | ||
3322 | { | ||
3323 | struct ip_vs_service *svc = NULL; | ||
3324 | struct ip_vs_service_user_kern usvc; | ||
3325 | struct ip_vs_dest_user_kern udest; | ||
3326 | int ret = 0, cmd; | ||
3327 | int need_full_svc = 0, need_full_dest = 0; | ||
3328 | struct net *net; | ||
3329 | struct netns_ipvs *ipvs; | ||
3330 | |||
3331 | net = skb_sknet(skb); | ||
3332 | ipvs = net_ipvs(net); | ||
3333 | cmd = info->genlhdr->cmd; | ||
3334 | |||
3335 | mutex_lock(&__ip_vs_mutex); | ||
3336 | |||
3337 | if (cmd == IPVS_CMD_FLUSH) { | ||
3338 | ret = ip_vs_flush(net); | ||
3339 | goto out; | ||
3340 | } else if (cmd == IPVS_CMD_SET_CONFIG) { | ||
3341 | ret = ip_vs_genl_set_config(net, info->attrs); | ||
3313 | goto out; | 3342 | goto out; |
3314 | } else if (cmd == IPVS_CMD_ZERO && | 3343 | } else if (cmd == IPVS_CMD_ZERO && |
3315 | !info->attrs[IPVS_CMD_ATTR_SERVICE]) { | 3344 | !info->attrs[IPVS_CMD_ATTR_SERVICE]) { |
@@ -3536,13 +3565,13 @@ static struct genl_ops ip_vs_genl_ops[] __read_mostly = { | |||
3536 | .cmd = IPVS_CMD_NEW_DAEMON, | 3565 | .cmd = IPVS_CMD_NEW_DAEMON, |
3537 | .flags = GENL_ADMIN_PERM, | 3566 | .flags = GENL_ADMIN_PERM, |
3538 | .policy = ip_vs_cmd_policy, | 3567 | .policy = ip_vs_cmd_policy, |
3539 | .doit = ip_vs_genl_set_cmd, | 3568 | .doit = ip_vs_genl_set_daemon, |
3540 | }, | 3569 | }, |
3541 | { | 3570 | { |
3542 | .cmd = IPVS_CMD_DEL_DAEMON, | 3571 | .cmd = IPVS_CMD_DEL_DAEMON, |
3543 | .flags = GENL_ADMIN_PERM, | 3572 | .flags = GENL_ADMIN_PERM, |
3544 | .policy = ip_vs_cmd_policy, | 3573 | .policy = ip_vs_cmd_policy, |
3545 | .doit = ip_vs_genl_set_cmd, | 3574 | .doit = ip_vs_genl_set_daemon, |
3546 | }, | 3575 | }, |
3547 | { | 3576 | { |
3548 | .cmd = IPVS_CMD_GET_DAEMON, | 3577 | .cmd = IPVS_CMD_GET_DAEMON, |
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 7ee7215b8ba0..3cdd479f9b5d 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c | |||
@@ -61,6 +61,7 @@ | |||
61 | 61 | ||
62 | #define SYNC_PROTO_VER 1 /* Protocol version in header */ | 62 | #define SYNC_PROTO_VER 1 /* Protocol version in header */ |
63 | 63 | ||
64 | static struct lock_class_key __ipvs_sync_key; | ||
64 | /* | 65 | /* |
65 | * IPVS sync connection entry | 66 | * IPVS sync connection entry |
66 | * Version 0, i.e. original version. | 67 | * Version 0, i.e. original version. |
@@ -1545,6 +1546,7 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) | |||
1545 | IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", | 1546 | IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", |
1546 | sizeof(struct ip_vs_sync_conn_v0)); | 1547 | sizeof(struct ip_vs_sync_conn_v0)); |
1547 | 1548 | ||
1549 | |||
1548 | if (state == IP_VS_STATE_MASTER) { | 1550 | if (state == IP_VS_STATE_MASTER) { |
1549 | if (ipvs->master_thread) | 1551 | if (ipvs->master_thread) |
1550 | return -EEXIST; | 1552 | return -EEXIST; |
@@ -1667,6 +1669,7 @@ int __net_init ip_vs_sync_net_init(struct net *net) | |||
1667 | { | 1669 | { |
1668 | struct netns_ipvs *ipvs = net_ipvs(net); | 1670 | struct netns_ipvs *ipvs = net_ipvs(net); |
1669 | 1671 | ||
1672 | __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key); | ||
1670 | INIT_LIST_HEAD(&ipvs->sync_queue); | 1673 | INIT_LIST_HEAD(&ipvs->sync_queue); |
1671 | spin_lock_init(&ipvs->sync_lock); | 1674 | spin_lock_init(&ipvs->sync_lock); |
1672 | spin_lock_init(&ipvs->sync_buff_lock); | 1675 | spin_lock_init(&ipvs->sync_buff_lock); |
@@ -1680,7 +1683,9 @@ int __net_init ip_vs_sync_net_init(struct net *net) | |||
1680 | void ip_vs_sync_net_cleanup(struct net *net) | 1683 | void ip_vs_sync_net_cleanup(struct net *net) |
1681 | { | 1684 | { |
1682 | int retc; | 1685 | int retc; |
1686 | struct netns_ipvs *ipvs = net_ipvs(net); | ||
1683 | 1687 | ||
1688 | mutex_lock(&ipvs->sync_mutex); | ||
1684 | retc = stop_sync_thread(net, IP_VS_STATE_MASTER); | 1689 | retc = stop_sync_thread(net, IP_VS_STATE_MASTER); |
1685 | if (retc && retc != -ESRCH) | 1690 | if (retc && retc != -ESRCH) |
1686 | pr_err("Failed to stop Master Daemon\n"); | 1691 | pr_err("Failed to stop Master Daemon\n"); |
@@ -1688,4 +1693,5 @@ void ip_vs_sync_net_cleanup(struct net *net) | |||
1688 | retc = stop_sync_thread(net, IP_VS_STATE_BACKUP); | 1693 | retc = stop_sync_thread(net, IP_VS_STATE_BACKUP); |
1689 | if (retc && retc != -ESRCH) | 1694 | if (retc && retc != -ESRCH) |
1690 | pr_err("Failed to stop Backup Daemon\n"); | 1695 | pr_err("Failed to stop Backup Daemon\n"); |
1696 | mutex_unlock(&ipvs->sync_mutex); | ||
1691 | } | 1697 | } |