aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorSasha Levin <sasha.levin@oracle.com>2016-01-18 19:23:51 -0500
committerPablo Neira Ayuso <pablo@netfilter.org>2016-01-20 08:15:31 -0500
commitb16c29191dc89bd877af99a7b04ce4866728a3e0 (patch)
tree4f8e1b54e5f4ec83b20c5a01eaee489a21394c3d /net
parent35b815392a6b6c268baf3b63d7f2ba350597024f (diff)
netfilter: nf_conntrack: use safer way to lock all buckets
When we need to lock all buckets in the connection hashtable we'd attempt to lock 1024 spinlocks, which is way more preemption levels than supported by the kernel. Furthermore, this behavior was hidden by checking if lockdep is enabled, and if it was - use only 8 buckets(!). Fix this by using a global lock and synchronize all buckets on it when we need to lock them all. This is pretty heavyweight, but is only done when we need to resize the hashtable, and that doesn't happen often enough (or at all). Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Acked-by: Jesper Dangaard Brouer <brouer@redhat.com> Reviewed-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'net')
-rw-r--r--net/netfilter/nf_conntrack_core.c38
-rw-r--r--net/netfilter/nf_conntrack_helper.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c2
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c4
4 files changed, 32 insertions, 14 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3cb3cb831591..58882de06bd7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -66,6 +66,21 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
66__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); 66__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
67EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); 67EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
68 68
69static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
70static __read_mostly bool nf_conntrack_locks_all;
71
72void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
73{
74 spin_lock(lock);
75 while (unlikely(nf_conntrack_locks_all)) {
76 spin_unlock(lock);
77 spin_lock(&nf_conntrack_locks_all_lock);
78 spin_unlock(&nf_conntrack_locks_all_lock);
79 spin_lock(lock);
80 }
81}
82EXPORT_SYMBOL_GPL(nf_conntrack_lock);
83
69static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2) 84static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
70{ 85{
71 h1 %= CONNTRACK_LOCKS; 86 h1 %= CONNTRACK_LOCKS;
@@ -82,12 +97,12 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
82 h1 %= CONNTRACK_LOCKS; 97 h1 %= CONNTRACK_LOCKS;
83 h2 %= CONNTRACK_LOCKS; 98 h2 %= CONNTRACK_LOCKS;
84 if (h1 <= h2) { 99 if (h1 <= h2) {
85 spin_lock(&nf_conntrack_locks[h1]); 100 nf_conntrack_lock(&nf_conntrack_locks[h1]);
86 if (h1 != h2) 101 if (h1 != h2)
87 spin_lock_nested(&nf_conntrack_locks[h2], 102 spin_lock_nested(&nf_conntrack_locks[h2],
88 SINGLE_DEPTH_NESTING); 103 SINGLE_DEPTH_NESTING);
89 } else { 104 } else {
90 spin_lock(&nf_conntrack_locks[h2]); 105 nf_conntrack_lock(&nf_conntrack_locks[h2]);
91 spin_lock_nested(&nf_conntrack_locks[h1], 106 spin_lock_nested(&nf_conntrack_locks[h1],
92 SINGLE_DEPTH_NESTING); 107 SINGLE_DEPTH_NESTING);
93 } 108 }
@@ -102,16 +117,19 @@ static void nf_conntrack_all_lock(void)
102{ 117{
103 int i; 118 int i;
104 119
105 for (i = 0; i < CONNTRACK_LOCKS; i++) 120 spin_lock(&nf_conntrack_locks_all_lock);
106 spin_lock_nested(&nf_conntrack_locks[i], i); 121 nf_conntrack_locks_all = true;
122
123 for (i = 0; i < CONNTRACK_LOCKS; i++) {
124 spin_lock(&nf_conntrack_locks[i]);
125 spin_unlock(&nf_conntrack_locks[i]);
126 }
107} 127}
108 128
109static void nf_conntrack_all_unlock(void) 129static void nf_conntrack_all_unlock(void)
110{ 130{
111 int i; 131 nf_conntrack_locks_all = false;
112 132 spin_unlock(&nf_conntrack_locks_all_lock);
113 for (i = 0; i < CONNTRACK_LOCKS; i++)
114 spin_unlock(&nf_conntrack_locks[i]);
115} 133}
116 134
117unsigned int nf_conntrack_htable_size __read_mostly; 135unsigned int nf_conntrack_htable_size __read_mostly;
@@ -757,7 +775,7 @@ restart:
757 hash = hash_bucket(_hash, net); 775 hash = hash_bucket(_hash, net);
758 for (; i < net->ct.htable_size; i++) { 776 for (; i < net->ct.htable_size; i++) {
759 lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS]; 777 lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
760 spin_lock(lockp); 778 nf_conntrack_lock(lockp);
761 if (read_seqcount_retry(&net->ct.generation, sequence)) { 779 if (read_seqcount_retry(&net->ct.generation, sequence)) {
762 spin_unlock(lockp); 780 spin_unlock(lockp);
763 goto restart; 781 goto restart;
@@ -1382,7 +1400,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
1382 for (; *bucket < net->ct.htable_size; (*bucket)++) { 1400 for (; *bucket < net->ct.htable_size; (*bucket)++) {
1383 lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS]; 1401 lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
1384 local_bh_disable(); 1402 local_bh_disable();
1385 spin_lock(lockp); 1403 nf_conntrack_lock(lockp);
1386 if (*bucket < net->ct.htable_size) { 1404 if (*bucket < net->ct.htable_size) {
1387 hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { 1405 hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
1388 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) 1406 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index bd9d31537905..3b40ec575cd5 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -425,7 +425,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
425 } 425 }
426 local_bh_disable(); 426 local_bh_disable();
427 for (i = 0; i < net->ct.htable_size; i++) { 427 for (i = 0; i < net->ct.htable_size; i++) {
428 spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); 428 nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
429 if (i < net->ct.htable_size) { 429 if (i < net->ct.htable_size) {
430 hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) 430 hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
431 unhelp(h, me); 431 unhelp(h, me);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index dbb1bb3edb45..355e8552fd5b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -840,7 +840,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
840 for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) { 840 for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
841restart: 841restart:
842 lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS]; 842 lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
843 spin_lock(lockp); 843 nf_conntrack_lock(lockp);
844 if (cb->args[0] >= net->ct.htable_size) { 844 if (cb->args[0] >= net->ct.htable_size) {
845 spin_unlock(lockp); 845 spin_unlock(lockp);
846 goto out; 846 goto out;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 5d010f27ac01..94837d236ab0 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -307,12 +307,12 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
307 307
308 local_bh_disable(); 308 local_bh_disable();
309 for (i = 0; i < net->ct.htable_size; i++) { 309 for (i = 0; i < net->ct.htable_size; i++) {
310 spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); 310 nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
311 if (i < net->ct.htable_size) { 311 if (i < net->ct.htable_size) {
312 hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) 312 hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
313 untimeout(h, timeout); 313 untimeout(h, timeout);
314 } 314 }
315 spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); 315 nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
316 } 316 }
317 local_bh_enable(); 317 local_bh_enable();
318} 318}