aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJulian Anastasov <ja@ssi.bg>2013-10-09 02:24:27 -0400
committerSimon Horman <horms@verge.net.au>2013-10-14 21:36:01 -0400
commit9e4e948a3edafd2b7f4dc14c395e146ffd0d9611 (patch)
tree2432d5bc712bc0ed0ab9f978807190d093bcd1d3
parent120c9794a3ee2f9b1548a1b0b252652e3c134f59 (diff)
ipvs: avoid rcu_barrier during netns cleanup
commit 578bc3ef1e473a ("ipvs: reorganize dest trash") added rcu_barrier() on cleanup to wait dest users and schedulers like LBLC and LBLCR to put their last dest reference. Using rcu_barrier with many namespaces is problematic. Trying to fix it by freeing dest with kfree_rcu is not a solution, RCU callbacks can run in parallel and execution order is random. Fix it by creating new function ip_vs_dest_put_and_free() which is heavier than ip_vs_dest_put(). We will use it just for schedulers like LBLC, LBLCR that can delay their dest release. By default, dests reference is above 0 if they are present in service and it is 0 when deleted but still in trash list. Change the dest trash code to use ip_vs_dest_put_and_free(), so that refcnt -1 can be used for freeing. As result, such checks remain in slow path and the rcu_barrier() from netns cleanup can be removed. Signed-off-by: Julian Anastasov <ja@ssi.bg> Signed-off-by: Simon Horman <horms@verge.net.au>
-rw-r--r--include/net/ip_vs.h6
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c2
4 files changed, 9 insertions, 7 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 1c2e1b9f6b86..cd7275f9c463 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1442,6 +1442,12 @@ static inline void ip_vs_dest_put(struct ip_vs_dest *dest)
1442 atomic_dec(&dest->refcnt); 1442 atomic_dec(&dest->refcnt);
1443} 1443}
1444 1444
1445static inline void ip_vs_dest_put_and_free(struct ip_vs_dest *dest)
1446{
1447 if (atomic_dec_return(&dest->refcnt) < 0)
1448 kfree(dest);
1449}
1450
1445/* 1451/*
1446 * IPVS sync daemon data and function prototypes 1452 * IPVS sync daemon data and function prototypes
1447 * (from ip_vs_sync.c) 1453 * (from ip_vs_sync.c)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index a3df9bddc4f7..62786a495cea 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -704,7 +704,7 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest)
704 __ip_vs_dst_cache_reset(dest); 704 __ip_vs_dst_cache_reset(dest);
705 __ip_vs_svc_put(svc, false); 705 __ip_vs_svc_put(svc, false);
706 free_percpu(dest->stats.cpustats); 706 free_percpu(dest->stats.cpustats);
707 kfree(dest); 707 ip_vs_dest_put_and_free(dest);
708} 708}
709 709
710/* 710/*
@@ -3820,10 +3820,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
3820{ 3820{
3821 struct netns_ipvs *ipvs = net_ipvs(net); 3821 struct netns_ipvs *ipvs = net_ipvs(net);
3822 3822
3823 /* Some dest can be in grace period even before cleanup, we have to
3824 * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called.
3825 */
3826 rcu_barrier();
3827 ip_vs_trash_cleanup(net); 3823 ip_vs_trash_cleanup(net);
3828 ip_vs_stop_estimator(net, &ipvs->tot_stats); 3824 ip_vs_stop_estimator(net, &ipvs->tot_stats);
3829 ip_vs_control_net_cleanup_sysctl(net); 3825 ip_vs_control_net_cleanup_sysctl(net);
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index eff13c94498e..ca056a331e60 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -136,7 +136,7 @@ static void ip_vs_lblc_rcu_free(struct rcu_head *head)
136 struct ip_vs_lblc_entry, 136 struct ip_vs_lblc_entry,
137 rcu_head); 137 rcu_head);
138 138
139 ip_vs_dest_put(en->dest); 139 ip_vs_dest_put_and_free(en->dest);
140 kfree(en); 140 kfree(en);
141} 141}
142 142
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 0b8550089a2e..3f21a2f47de1 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -130,7 +130,7 @@ static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head)
130 struct ip_vs_dest_set_elem *e; 130 struct ip_vs_dest_set_elem *e;
131 131
132 e = container_of(head, struct ip_vs_dest_set_elem, rcu_head); 132 e = container_of(head, struct ip_vs_dest_set_elem, rcu_head);
133 ip_vs_dest_put(e->dest); 133 ip_vs_dest_put_and_free(e->dest);
134 kfree(e); 134 kfree(e);
135} 135}
136 136