aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Dumazet <dada1@cosmosbay.com>2009-03-25 16:05:46 -0400
committerPatrick McHardy <kaber@trash.net>2009-03-25 16:05:46 -0400
commitea781f197d6a835cbb93a0bf88ee1696296ed8aa (patch)
tree820fe7df1199d8bb6c793e664e480ea56ecf612e
parent1f9352ae2253a97b07b34dcf16ffa3b4ca12c558 (diff)
netfilter: nf_conntrack: use SLAB_DESTROY_BY_RCU and get rid of call_rcu()
Use "hlist_nulls" infrastructure we added in 2.6.29 for RCUification of UDP & TCP. This permits an easy conversion from call_rcu() based hash lists to a SLAB_DESTROY_BY_RCU one. Avoiding call_rcu() delay at nf_conn freeing time has numerous gains. First, it doesnt fill RCU queues (up to 10000 elements per cpu). This reduces OOM possibility, if queued elements are not taken into account This reduces latency problems when RCU queue size hits hilimit and triggers emergency mode. - It allows fast reuse of just freed elements, permitting better use of CPU cache. - We delete rcu_head from "struct nf_conn", shrinking size of this structure by 8 or 16 bytes. This patch only takes care of "struct nf_conn". call_rcu() is still used for less critical conntrack parts, that may be converted later if necessary. Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: Patrick McHardy <kaber@trash.net>
-rw-r--r--include/net/netfilter/nf_conntrack.h14
-rw-r--r--include/net/netfilter/nf_conntrack_tuple.h6
-rw-r--r--include/net/netns/conntrack.h5
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c63
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c123
-rw-r--r--net/netfilter/nf_conntrack_expect.c2
-rw-r--r--net/netfilter/nf_conntrack_helper.c7
-rw-r--r--net/netfilter/nf_conntrack_netlink.c20
-rw-r--r--net/netfilter/nf_conntrack_standalone.c57
-rw-r--r--net/netfilter/xt_connlimit.c6
11 files changed, 174 insertions, 131 deletions
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 4dfb793c3f15..6c3f964de9e1 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -91,8 +91,7 @@ struct nf_conn_help {
91#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 91#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
92#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 92#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
93 93
94struct nf_conn 94struct nf_conn {
95{
96 /* Usage count in here is 1 for hash table/destruct timer, 1 per skb, 95 /* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
97 plus 1 for any connection(s) we are `master' for */ 96 plus 1 for any connection(s) we are `master' for */
98 struct nf_conntrack ct_general; 97 struct nf_conntrack ct_general;
@@ -126,7 +125,6 @@ struct nf_conn
126#ifdef CONFIG_NET_NS 125#ifdef CONFIG_NET_NS
127 struct net *ct_net; 126 struct net *ct_net;
128#endif 127#endif
129 struct rcu_head rcu;
130}; 128};
131 129
132static inline struct nf_conn * 130static inline struct nf_conn *
@@ -190,9 +188,13 @@ static inline void nf_ct_put(struct nf_conn *ct)
190extern int nf_ct_l3proto_try_module_get(unsigned short l3proto); 188extern int nf_ct_l3proto_try_module_get(unsigned short l3proto);
191extern void nf_ct_l3proto_module_put(unsigned short l3proto); 189extern void nf_ct_l3proto_module_put(unsigned short l3proto);
192 190
193extern struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced); 191/*
194extern void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, 192 * Allocate a hashtable of hlist_head (if nulls == 0),
195 unsigned int size); 193 * or hlist_nulls_head (if nulls == 1)
194 */
195extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls);
196
197extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size);
196 198
197extern struct nf_conntrack_tuple_hash * 199extern struct nf_conntrack_tuple_hash *
198__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple); 200__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple);
diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h
index f2f6aa73dc10..2628c154d40e 100644
--- a/include/net/netfilter/nf_conntrack_tuple.h
+++ b/include/net/netfilter/nf_conntrack_tuple.h
@@ -12,6 +12,7 @@
12 12
13#include <linux/netfilter/x_tables.h> 13#include <linux/netfilter/x_tables.h>
14#include <linux/netfilter/nf_conntrack_tuple_common.h> 14#include <linux/netfilter/nf_conntrack_tuple_common.h>
15#include <linux/list_nulls.h>
15 16
16/* A `tuple' is a structure containing the information to uniquely 17/* A `tuple' is a structure containing the information to uniquely
17 identify a connection. ie. if two packets have the same tuple, they 18 identify a connection. ie. if two packets have the same tuple, they
@@ -146,9 +147,8 @@ static inline void nf_ct_dump_tuple(const struct nf_conntrack_tuple *t)
146 ((enum ip_conntrack_dir)(h)->tuple.dst.dir) 147 ((enum ip_conntrack_dir)(h)->tuple.dst.dir)
147 148
148/* Connections have two entries in the hash table: one for each way */ 149/* Connections have two entries in the hash table: one for each way */
149struct nf_conntrack_tuple_hash 150struct nf_conntrack_tuple_hash {
150{ 151 struct hlist_nulls_node hnnode;
151 struct hlist_node hnode;
152 struct nf_conntrack_tuple tuple; 152 struct nf_conntrack_tuple tuple;
153}; 153};
154 154
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index f4498a62881b..9dc58402bc09 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -2,6 +2,7 @@
2#define __NETNS_CONNTRACK_H 2#define __NETNS_CONNTRACK_H
3 3
4#include <linux/list.h> 4#include <linux/list.h>
5#include <linux/list_nulls.h>
5#include <asm/atomic.h> 6#include <asm/atomic.h>
6 7
7struct ctl_table_header; 8struct ctl_table_header;
@@ -10,9 +11,9 @@ struct nf_conntrack_ecache;
10struct netns_ct { 11struct netns_ct {
11 atomic_t count; 12 atomic_t count;
12 unsigned int expect_count; 13 unsigned int expect_count;
13 struct hlist_head *hash; 14 struct hlist_nulls_head *hash;
14 struct hlist_head *expect_hash; 15 struct hlist_head *expect_hash;
15 struct hlist_head unconfirmed; 16 struct hlist_nulls_head unconfirmed;
16 struct ip_conntrack_stat *stat; 17 struct ip_conntrack_stat *stat;
17#ifdef CONFIG_NF_CONNTRACK_EVENTS 18#ifdef CONFIG_NF_CONNTRACK_EVENTS
18 struct nf_conntrack_ecache *ecache; 19 struct nf_conntrack_ecache *ecache;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 6ba5c557690c..8668a3defda6 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -25,40 +25,42 @@ struct ct_iter_state {
25 unsigned int bucket; 25 unsigned int bucket;
26}; 26};
27 27
28static struct hlist_node *ct_get_first(struct seq_file *seq) 28static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
29{ 29{
30 struct net *net = seq_file_net(seq); 30 struct net *net = seq_file_net(seq);
31 struct ct_iter_state *st = seq->private; 31 struct ct_iter_state *st = seq->private;
32 struct hlist_node *n; 32 struct hlist_nulls_node *n;
33 33
34 for (st->bucket = 0; 34 for (st->bucket = 0;
35 st->bucket < nf_conntrack_htable_size; 35 st->bucket < nf_conntrack_htable_size;
36 st->bucket++) { 36 st->bucket++) {
37 n = rcu_dereference(net->ct.hash[st->bucket].first); 37 n = rcu_dereference(net->ct.hash[st->bucket].first);
38 if (n) 38 if (!is_a_nulls(n))
39 return n; 39 return n;
40 } 40 }
41 return NULL; 41 return NULL;
42} 42}
43 43
44static struct hlist_node *ct_get_next(struct seq_file *seq, 44static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
45 struct hlist_node *head) 45 struct hlist_nulls_node *head)
46{ 46{
47 struct net *net = seq_file_net(seq); 47 struct net *net = seq_file_net(seq);
48 struct ct_iter_state *st = seq->private; 48 struct ct_iter_state *st = seq->private;
49 49
50 head = rcu_dereference(head->next); 50 head = rcu_dereference(head->next);
51 while (head == NULL) { 51 while (is_a_nulls(head)) {
52 if (++st->bucket >= nf_conntrack_htable_size) 52 if (likely(get_nulls_value(head) == st->bucket)) {
53 return NULL; 53 if (++st->bucket >= nf_conntrack_htable_size)
54 return NULL;
55 }
54 head = rcu_dereference(net->ct.hash[st->bucket].first); 56 head = rcu_dereference(net->ct.hash[st->bucket].first);
55 } 57 }
56 return head; 58 return head;
57} 59}
58 60
59static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos) 61static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
60{ 62{
61 struct hlist_node *head = ct_get_first(seq); 63 struct hlist_nulls_node *head = ct_get_first(seq);
62 64
63 if (head) 65 if (head)
64 while (pos && (head = ct_get_next(seq, head))) 66 while (pos && (head = ct_get_next(seq, head)))
@@ -87,69 +89,76 @@ static void ct_seq_stop(struct seq_file *s, void *v)
87 89
88static int ct_seq_show(struct seq_file *s, void *v) 90static int ct_seq_show(struct seq_file *s, void *v)
89{ 91{
90 const struct nf_conntrack_tuple_hash *hash = v; 92 struct nf_conntrack_tuple_hash *hash = v;
91 const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); 93 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
92 const struct nf_conntrack_l3proto *l3proto; 94 const struct nf_conntrack_l3proto *l3proto;
93 const struct nf_conntrack_l4proto *l4proto; 95 const struct nf_conntrack_l4proto *l4proto;
96 int ret = 0;
94 97
95 NF_CT_ASSERT(ct); 98 NF_CT_ASSERT(ct);
99 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
100 return 0;
101
96 102
97 /* we only want to print DIR_ORIGINAL */ 103 /* we only want to print DIR_ORIGINAL */
98 if (NF_CT_DIRECTION(hash)) 104 if (NF_CT_DIRECTION(hash))
99 return 0; 105 goto release;
100 if (nf_ct_l3num(ct) != AF_INET) 106 if (nf_ct_l3num(ct) != AF_INET)
101 return 0; 107 goto release;
102 108
103 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); 109 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
104 NF_CT_ASSERT(l3proto); 110 NF_CT_ASSERT(l3proto);
105 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 111 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
106 NF_CT_ASSERT(l4proto); 112 NF_CT_ASSERT(l4proto);
107 113
114 ret = -ENOSPC;
108 if (seq_printf(s, "%-8s %u %ld ", 115 if (seq_printf(s, "%-8s %u %ld ",
109 l4proto->name, nf_ct_protonum(ct), 116 l4proto->name, nf_ct_protonum(ct),
110 timer_pending(&ct->timeout) 117 timer_pending(&ct->timeout)
111 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) 118 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
112 return -ENOSPC; 119 goto release;
113 120
114 if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct)) 121 if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct))
115 return -ENOSPC; 122 goto release;
116 123
117 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 124 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
118 l3proto, l4proto)) 125 l3proto, l4proto))
119 return -ENOSPC; 126 goto release;
120 127
121 if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) 128 if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
122 return -ENOSPC; 129 goto release;
123 130
124 if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) 131 if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
125 if (seq_printf(s, "[UNREPLIED] ")) 132 if (seq_printf(s, "[UNREPLIED] "))
126 return -ENOSPC; 133 goto release;
127 134
128 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, 135 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
129 l3proto, l4proto)) 136 l3proto, l4proto))
130 return -ENOSPC; 137 goto release;
131 138
132 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) 139 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
133 return -ENOSPC; 140 goto release;
134 141
135 if (test_bit(IPS_ASSURED_BIT, &ct->status)) 142 if (test_bit(IPS_ASSURED_BIT, &ct->status))
136 if (seq_printf(s, "[ASSURED] ")) 143 if (seq_printf(s, "[ASSURED] "))
137 return -ENOSPC; 144 goto release;
138 145
139#ifdef CONFIG_NF_CONNTRACK_MARK 146#ifdef CONFIG_NF_CONNTRACK_MARK
140 if (seq_printf(s, "mark=%u ", ct->mark)) 147 if (seq_printf(s, "mark=%u ", ct->mark))
141 return -ENOSPC; 148 goto release;
142#endif 149#endif
143 150
144#ifdef CONFIG_NF_CONNTRACK_SECMARK 151#ifdef CONFIG_NF_CONNTRACK_SECMARK
145 if (seq_printf(s, "secmark=%u ", ct->secmark)) 152 if (seq_printf(s, "secmark=%u ", ct->secmark))
146 return -ENOSPC; 153 goto release;
147#endif 154#endif
148 155
149 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) 156 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
150 return -ENOSPC; 157 goto release;
151 158 ret = 0;
152 return 0; 159release:
160 nf_ct_put(ct);
161 return ret;
153} 162}
154 163
155static const struct seq_operations ct_seq_ops = { 164static const struct seq_operations ct_seq_ops = {
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index a65cf692359f..fe65187810f0 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -679,7 +679,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
679static int __net_init nf_nat_net_init(struct net *net) 679static int __net_init nf_nat_net_init(struct net *net)
680{ 680{
681 net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 681 net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
682 &net->ipv4.nat_vmalloced); 682 &net->ipv4.nat_vmalloced, 0);
683 if (!net->ipv4.nat_bysource) 683 if (!net->ipv4.nat_bysource)
684 return -ENOMEM; 684 return -ENOMEM;
685 return 0; 685 return 0;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 54e983f13898..c55bbdc7d429 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -29,6 +29,7 @@
29#include <linux/netdevice.h> 29#include <linux/netdevice.h>
30#include <linux/socket.h> 30#include <linux/socket.h>
31#include <linux/mm.h> 31#include <linux/mm.h>
32#include <linux/rculist_nulls.h>
32 33
33#include <net/netfilter/nf_conntrack.h> 34#include <net/netfilter/nf_conntrack.h>
34#include <net/netfilter/nf_conntrack_l3proto.h> 35#include <net/netfilter/nf_conntrack_l3proto.h>
@@ -163,8 +164,8 @@ static void
163clean_from_lists(struct nf_conn *ct) 164clean_from_lists(struct nf_conn *ct)
164{ 165{
165 pr_debug("clean_from_lists(%p)\n", ct); 166 pr_debug("clean_from_lists(%p)\n", ct);
166 hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); 167 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
167 hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode); 168 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode);
168 169
169 /* Destroy all pending expectations */ 170 /* Destroy all pending expectations */
170 nf_ct_remove_expectations(ct); 171 nf_ct_remove_expectations(ct);
@@ -204,8 +205,8 @@ destroy_conntrack(struct nf_conntrack *nfct)
204 205
205 /* We overload first tuple to link into unconfirmed list. */ 206 /* We overload first tuple to link into unconfirmed list. */
206 if (!nf_ct_is_confirmed(ct)) { 207 if (!nf_ct_is_confirmed(ct)) {
207 BUG_ON(hlist_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode)); 208 BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
208 hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); 209 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
209 } 210 }
210 211
211 NF_CT_STAT_INC(net, delete); 212 NF_CT_STAT_INC(net, delete);
@@ -242,18 +243,26 @@ static void death_by_timeout(unsigned long ul_conntrack)
242 nf_ct_put(ct); 243 nf_ct_put(ct);
243} 244}
244 245
246/*
247 * Warning :
248 * - Caller must take a reference on returned object
249 * and recheck nf_ct_tuple_equal(tuple, &h->tuple)
250 * OR
251 * - Caller must lock nf_conntrack_lock before calling this function
252 */
245struct nf_conntrack_tuple_hash * 253struct nf_conntrack_tuple_hash *
246__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) 254__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
247{ 255{
248 struct nf_conntrack_tuple_hash *h; 256 struct nf_conntrack_tuple_hash *h;
249 struct hlist_node *n; 257 struct hlist_nulls_node *n;
250 unsigned int hash = hash_conntrack(tuple); 258 unsigned int hash = hash_conntrack(tuple);
251 259
252 /* Disable BHs the entire time since we normally need to disable them 260 /* Disable BHs the entire time since we normally need to disable them
253 * at least once for the stats anyway. 261 * at least once for the stats anyway.
254 */ 262 */
255 local_bh_disable(); 263 local_bh_disable();
256 hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { 264begin:
265 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
257 if (nf_ct_tuple_equal(tuple, &h->tuple)) { 266 if (nf_ct_tuple_equal(tuple, &h->tuple)) {
258 NF_CT_STAT_INC(net, found); 267 NF_CT_STAT_INC(net, found);
259 local_bh_enable(); 268 local_bh_enable();
@@ -261,6 +270,13 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
261 } 270 }
262 NF_CT_STAT_INC(net, searched); 271 NF_CT_STAT_INC(net, searched);
263 } 272 }
273 /*
274 * if the nulls value we got at the end of this lookup is
275 * not the expected one, we must restart lookup.
276 * We probably met an item that was moved to another chain.
277 */
278 if (get_nulls_value(n) != hash)
279 goto begin;
264 local_bh_enable(); 280 local_bh_enable();
265 281
266 return NULL; 282 return NULL;
@@ -275,11 +291,18 @@ nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
275 struct nf_conn *ct; 291 struct nf_conn *ct;
276 292
277 rcu_read_lock(); 293 rcu_read_lock();
294begin:
278 h = __nf_conntrack_find(net, tuple); 295 h = __nf_conntrack_find(net, tuple);
279 if (h) { 296 if (h) {
280 ct = nf_ct_tuplehash_to_ctrack(h); 297 ct = nf_ct_tuplehash_to_ctrack(h);
281 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) 298 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
282 h = NULL; 299 h = NULL;
300 else {
301 if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple))) {
302 nf_ct_put(ct);
303 goto begin;
304 }
305 }
283 } 306 }
284 rcu_read_unlock(); 307 rcu_read_unlock();
285 308
@@ -293,9 +316,9 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
293{ 316{
294 struct net *net = nf_ct_net(ct); 317 struct net *net = nf_ct_net(ct);
295 318
296 hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, 319 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
297 &net->ct.hash[hash]); 320 &net->ct.hash[hash]);
298 hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode, 321 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
299 &net->ct.hash[repl_hash]); 322 &net->ct.hash[repl_hash]);
300} 323}
301 324
@@ -318,7 +341,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
318 struct nf_conntrack_tuple_hash *h; 341 struct nf_conntrack_tuple_hash *h;
319 struct nf_conn *ct; 342 struct nf_conn *ct;
320 struct nf_conn_help *help; 343 struct nf_conn_help *help;
321 struct hlist_node *n; 344 struct hlist_nulls_node *n;
322 enum ip_conntrack_info ctinfo; 345 enum ip_conntrack_info ctinfo;
323 struct net *net; 346 struct net *net;
324 347
@@ -350,17 +373,17 @@ __nf_conntrack_confirm(struct sk_buff *skb)
350 /* See if there's one in the list already, including reverse: 373 /* See if there's one in the list already, including reverse:
351 NAT could have grabbed it without realizing, since we're 374 NAT could have grabbed it without realizing, since we're
352 not in the hash. If there is, we lost race. */ 375 not in the hash. If there is, we lost race. */
353 hlist_for_each_entry(h, n, &net->ct.hash[hash], hnode) 376 hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
354 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 377 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
355 &h->tuple)) 378 &h->tuple))
356 goto out; 379 goto out;
357 hlist_for_each_entry(h, n, &net->ct.hash[repl_hash], hnode) 380 hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
358 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, 381 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
359 &h->tuple)) 382 &h->tuple))
360 goto out; 383 goto out;
361 384
362 /* Remove from unconfirmed list */ 385 /* Remove from unconfirmed list */
363 hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); 386 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
364 387
365 __nf_conntrack_hash_insert(ct, hash, repl_hash); 388 __nf_conntrack_hash_insert(ct, hash, repl_hash);
366 /* Timer relative to confirmation time, not original 389 /* Timer relative to confirmation time, not original
@@ -399,14 +422,14 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
399{ 422{
400 struct net *net = nf_ct_net(ignored_conntrack); 423 struct net *net = nf_ct_net(ignored_conntrack);
401 struct nf_conntrack_tuple_hash *h; 424 struct nf_conntrack_tuple_hash *h;
402 struct hlist_node *n; 425 struct hlist_nulls_node *n;
403 unsigned int hash = hash_conntrack(tuple); 426 unsigned int hash = hash_conntrack(tuple);
404 427
405 /* Disable BHs the entire time since we need to disable them at 428 /* Disable BHs the entire time since we need to disable them at
406 * least once for the stats anyway. 429 * least once for the stats anyway.
407 */ 430 */
408 rcu_read_lock_bh(); 431 rcu_read_lock_bh();
409 hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { 432 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
410 if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && 433 if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
411 nf_ct_tuple_equal(tuple, &h->tuple)) { 434 nf_ct_tuple_equal(tuple, &h->tuple)) {
412 NF_CT_STAT_INC(net, found); 435 NF_CT_STAT_INC(net, found);
@@ -430,14 +453,14 @@ static noinline int early_drop(struct net *net, unsigned int hash)
430 /* Use oldest entry, which is roughly LRU */ 453 /* Use oldest entry, which is roughly LRU */
431 struct nf_conntrack_tuple_hash *h; 454 struct nf_conntrack_tuple_hash *h;
432 struct nf_conn *ct = NULL, *tmp; 455 struct nf_conn *ct = NULL, *tmp;
433 struct hlist_node *n; 456 struct hlist_nulls_node *n;
434 unsigned int i, cnt = 0; 457 unsigned int i, cnt = 0;
435 int dropped = 0; 458 int dropped = 0;
436 459
437 rcu_read_lock(); 460 rcu_read_lock();
438 for (i = 0; i < nf_conntrack_htable_size; i++) { 461 for (i = 0; i < nf_conntrack_htable_size; i++) {
439 hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], 462 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
440 hnode) { 463 hnnode) {
441 tmp = nf_ct_tuplehash_to_ctrack(h); 464 tmp = nf_ct_tuplehash_to_ctrack(h);
442 if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) 465 if (!test_bit(IPS_ASSURED_BIT, &tmp->status))
443 ct = tmp; 466 ct = tmp;
@@ -508,27 +531,19 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
508#ifdef CONFIG_NET_NS 531#ifdef CONFIG_NET_NS
509 ct->ct_net = net; 532 ct->ct_net = net;
510#endif 533#endif
511 INIT_RCU_HEAD(&ct->rcu);
512 534
513 return ct; 535 return ct;
514} 536}
515EXPORT_SYMBOL_GPL(nf_conntrack_alloc); 537EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
516 538
517static void nf_conntrack_free_rcu(struct rcu_head *head)
518{
519 struct nf_conn *ct = container_of(head, struct nf_conn, rcu);
520
521 nf_ct_ext_free(ct);
522 kmem_cache_free(nf_conntrack_cachep, ct);
523}
524
525void nf_conntrack_free(struct nf_conn *ct) 539void nf_conntrack_free(struct nf_conn *ct)
526{ 540{
527 struct net *net = nf_ct_net(ct); 541 struct net *net = nf_ct_net(ct);
528 542
529 nf_ct_ext_destroy(ct); 543 nf_ct_ext_destroy(ct);
530 atomic_dec(&net->ct.count); 544 atomic_dec(&net->ct.count);
531 call_rcu(&ct->rcu, nf_conntrack_free_rcu); 545 nf_ct_ext_free(ct);
546 kmem_cache_free(nf_conntrack_cachep, ct);
532} 547}
533EXPORT_SYMBOL_GPL(nf_conntrack_free); 548EXPORT_SYMBOL_GPL(nf_conntrack_free);
534 549
@@ -594,7 +609,7 @@ init_conntrack(struct net *net,
594 } 609 }
595 610
596 /* Overload tuple linked list to put us in unconfirmed list. */ 611 /* Overload tuple linked list to put us in unconfirmed list. */
597 hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, 612 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
598 &net->ct.unconfirmed); 613 &net->ct.unconfirmed);
599 614
600 spin_unlock_bh(&nf_conntrack_lock); 615 spin_unlock_bh(&nf_conntrack_lock);
@@ -934,17 +949,17 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
934{ 949{
935 struct nf_conntrack_tuple_hash *h; 950 struct nf_conntrack_tuple_hash *h;
936 struct nf_conn *ct; 951 struct nf_conn *ct;
937 struct hlist_node *n; 952 struct hlist_nulls_node *n;
938 953
939 spin_lock_bh(&nf_conntrack_lock); 954 spin_lock_bh(&nf_conntrack_lock);
940 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { 955 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
941 hlist_for_each_entry(h, n, &net->ct.hash[*bucket], hnode) { 956 hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
942 ct = nf_ct_tuplehash_to_ctrack(h); 957 ct = nf_ct_tuplehash_to_ctrack(h);
943 if (iter(ct, data)) 958 if (iter(ct, data))
944 goto found; 959 goto found;
945 } 960 }
946 } 961 }
947 hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode) { 962 hlist_nulls_for_each_entry(h, n, &net->ct.unconfirmed, hnnode) {
948 ct = nf_ct_tuplehash_to_ctrack(h); 963 ct = nf_ct_tuplehash_to_ctrack(h);
949 if (iter(ct, data)) 964 if (iter(ct, data))
950 set_bit(IPS_DYING_BIT, &ct->status); 965 set_bit(IPS_DYING_BIT, &ct->status);
@@ -992,7 +1007,7 @@ static int kill_all(struct nf_conn *i, void *data)
992 return 1; 1007 return 1;
993} 1008}
994 1009
995void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int size) 1010void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
996{ 1011{
997 if (vmalloced) 1012 if (vmalloced)
998 vfree(hash); 1013 vfree(hash);
@@ -1060,26 +1075,28 @@ void nf_conntrack_cleanup(struct net *net)
1060 } 1075 }
1061} 1076}
1062 1077
1063struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced) 1078void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls)
1064{ 1079{
1065 struct hlist_head *hash; 1080 struct hlist_nulls_head *hash;
1066 unsigned int size, i; 1081 unsigned int nr_slots, i;
1082 size_t sz;
1067 1083
1068 *vmalloced = 0; 1084 *vmalloced = 0;
1069 1085
1070 size = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_head)); 1086 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
1071 hash = (void*)__get_free_pages(GFP_KERNEL|__GFP_NOWARN, 1087 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
1072 get_order(sizeof(struct hlist_head) 1088 sz = nr_slots * sizeof(struct hlist_nulls_head);
1073 * size)); 1089 hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1090 get_order(sz));
1074 if (!hash) { 1091 if (!hash) {
1075 *vmalloced = 1; 1092 *vmalloced = 1;
1076 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); 1093 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1077 hash = vmalloc(sizeof(struct hlist_head) * size); 1094 hash = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
1078 } 1095 }
1079 1096
1080 if (hash) 1097 if (hash && nulls)
1081 for (i = 0; i < size; i++) 1098 for (i = 0; i < nr_slots; i++)
1082 INIT_HLIST_HEAD(&hash[i]); 1099 INIT_HLIST_NULLS_HEAD(&hash[i], i);
1083 1100
1084 return hash; 1101 return hash;
1085} 1102}
@@ -1090,7 +1107,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1090 int i, bucket, vmalloced, old_vmalloced; 1107 int i, bucket, vmalloced, old_vmalloced;
1091 unsigned int hashsize, old_size; 1108 unsigned int hashsize, old_size;
1092 int rnd; 1109 int rnd;
1093 struct hlist_head *hash, *old_hash; 1110 struct hlist_nulls_head *hash, *old_hash;
1094 struct nf_conntrack_tuple_hash *h; 1111 struct nf_conntrack_tuple_hash *h;
1095 1112
1096 /* On boot, we can set this without any fancy locking. */ 1113 /* On boot, we can set this without any fancy locking. */
@@ -1101,7 +1118,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1101 if (!hashsize) 1118 if (!hashsize)
1102 return -EINVAL; 1119 return -EINVAL;
1103 1120
1104 hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced); 1121 hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1);
1105 if (!hash) 1122 if (!hash)
1106 return -ENOMEM; 1123 return -ENOMEM;
1107 1124
@@ -1116,12 +1133,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1116 */ 1133 */
1117 spin_lock_bh(&nf_conntrack_lock); 1134 spin_lock_bh(&nf_conntrack_lock);
1118 for (i = 0; i < nf_conntrack_htable_size; i++) { 1135 for (i = 0; i < nf_conntrack_htable_size; i++) {
1119 while (!hlist_empty(&init_net.ct.hash[i])) { 1136 while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
1120 h = hlist_entry(init_net.ct.hash[i].first, 1137 h = hlist_nulls_entry(init_net.ct.hash[i].first,
1121 struct nf_conntrack_tuple_hash, hnode); 1138 struct nf_conntrack_tuple_hash, hnnode);
1122 hlist_del_rcu(&h->hnode); 1139 hlist_nulls_del_rcu(&h->hnnode);
1123 bucket = __hash_conntrack(&h->tuple, hashsize, rnd); 1140 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1124 hlist_add_head_rcu(&h->hnode, &hash[bucket]); 1141 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
1125 } 1142 }
1126 } 1143 }
1127 old_size = nf_conntrack_htable_size; 1144 old_size = nf_conntrack_htable_size;
@@ -1172,7 +1189,7 @@ static int nf_conntrack_init_init_net(void)
1172 1189
1173 nf_conntrack_cachep = kmem_cache_create("nf_conntrack", 1190 nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
1174 sizeof(struct nf_conn), 1191 sizeof(struct nf_conn),
1175 0, 0, NULL); 1192 0, SLAB_DESTROY_BY_RCU, NULL);
1176 if (!nf_conntrack_cachep) { 1193 if (!nf_conntrack_cachep) {
1177 printk(KERN_ERR "Unable to create nf_conn slab cache\n"); 1194 printk(KERN_ERR "Unable to create nf_conn slab cache\n");
1178 ret = -ENOMEM; 1195 ret = -ENOMEM;
@@ -1202,7 +1219,7 @@ static int nf_conntrack_init_net(struct net *net)
1202 int ret; 1219 int ret;
1203 1220
1204 atomic_set(&net->ct.count, 0); 1221 atomic_set(&net->ct.count, 0);
1205 INIT_HLIST_HEAD(&net->ct.unconfirmed); 1222 INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0);
1206 net->ct.stat = alloc_percpu(struct ip_conntrack_stat); 1223 net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
1207 if (!net->ct.stat) { 1224 if (!net->ct.stat) {
1208 ret = -ENOMEM; 1225 ret = -ENOMEM;
@@ -1212,7 +1229,7 @@ static int nf_conntrack_init_net(struct net *net)
1212 if (ret < 0) 1229 if (ret < 0)
1213 goto err_ecache; 1230 goto err_ecache;
1214 net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1231 net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
1215 &net->ct.hash_vmalloc); 1232 &net->ct.hash_vmalloc, 1);
1216 if (!net->ct.hash) { 1233 if (!net->ct.hash) {
1217 ret = -ENOMEM; 1234 ret = -ENOMEM;
1218 printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); 1235 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 357ba39d4c8d..3940f996a2e4 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -604,7 +604,7 @@ int nf_conntrack_expect_init(struct net *net)
604 604
605 net->ct.expect_count = 0; 605 net->ct.expect_count = 0;
606 net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 606 net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
607 &net->ct.expect_vmalloc); 607 &net->ct.expect_vmalloc, 0);
608 if (net->ct.expect_hash == NULL) 608 if (net->ct.expect_hash == NULL)
609 goto err1; 609 goto err1;
610 610
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 805cfdd42303..30b8e9009f99 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -159,6 +159,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
159 struct nf_conntrack_tuple_hash *h; 159 struct nf_conntrack_tuple_hash *h;
160 struct nf_conntrack_expect *exp; 160 struct nf_conntrack_expect *exp;
161 const struct hlist_node *n, *next; 161 const struct hlist_node *n, *next;
162 const struct hlist_nulls_node *nn;
162 unsigned int i; 163 unsigned int i;
163 164
164 /* Get rid of expectations */ 165 /* Get rid of expectations */
@@ -175,10 +176,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
175 } 176 }
176 177
177 /* Get rid of expecteds, set helpers to NULL. */ 178 /* Get rid of expecteds, set helpers to NULL. */
178 hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode) 179 hlist_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode)
179 unhelp(h, me); 180 unhelp(h, me);
180 for (i = 0; i < nf_conntrack_htable_size; i++) { 181 for (i = 0; i < nf_conntrack_htable_size; i++) {
181 hlist_for_each_entry(h, n, &net->ct.hash[i], hnode) 182 hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
182 unhelp(h, me); 183 unhelp(h, me);
183 } 184 }
184} 185}
@@ -218,7 +219,7 @@ int nf_conntrack_helper_init(void)
218 219
219 nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ 220 nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
220 nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 221 nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize,
221 &nf_ct_helper_vmalloc); 222 &nf_ct_helper_vmalloc, 0);
222 if (!nf_ct_helper_hash) 223 if (!nf_ct_helper_hash)
223 return -ENOMEM; 224 return -ENOMEM;
224 225
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 1b75c9efb0eb..349bbefe5517 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -19,6 +19,7 @@
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/kernel.h> 20#include <linux/kernel.h>
21#include <linux/rculist.h> 21#include <linux/rculist.h>
22#include <linux/rculist_nulls.h>
22#include <linux/types.h> 23#include <linux/types.h>
23#include <linux/timer.h> 24#include <linux/timer.h>
24#include <linux/skbuff.h> 25#include <linux/skbuff.h>
@@ -536,7 +537,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
536{ 537{
537 struct nf_conn *ct, *last; 538 struct nf_conn *ct, *last;
538 struct nf_conntrack_tuple_hash *h; 539 struct nf_conntrack_tuple_hash *h;
539 struct hlist_node *n; 540 struct hlist_nulls_node *n;
540 struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); 541 struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
541 u_int8_t l3proto = nfmsg->nfgen_family; 542 u_int8_t l3proto = nfmsg->nfgen_family;
542 543
@@ -544,27 +545,27 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
544 last = (struct nf_conn *)cb->args[1]; 545 last = (struct nf_conn *)cb->args[1];
545 for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { 546 for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
546restart: 547restart:
547 hlist_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]], 548 hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]],
548 hnode) { 549 hnnode) {
549 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) 550 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
550 continue; 551 continue;
551 ct = nf_ct_tuplehash_to_ctrack(h); 552 ct = nf_ct_tuplehash_to_ctrack(h);
553 if (!atomic_inc_not_zero(&ct->ct_general.use))
554 continue;
552 /* Dump entries of a given L3 protocol number. 555 /* Dump entries of a given L3 protocol number.
553 * If it is not specified, ie. l3proto == 0, 556 * If it is not specified, ie. l3proto == 0,
554 * then dump everything. */ 557 * then dump everything. */
555 if (l3proto && nf_ct_l3num(ct) != l3proto) 558 if (l3proto && nf_ct_l3num(ct) != l3proto)
556 continue; 559 goto releasect;
557 if (cb->args[1]) { 560 if (cb->args[1]) {
558 if (ct != last) 561 if (ct != last)
559 continue; 562 goto releasect;
560 cb->args[1] = 0; 563 cb->args[1] = 0;
561 } 564 }
562 if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, 565 if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
563 cb->nlh->nlmsg_seq, 566 cb->nlh->nlmsg_seq,
564 IPCTNL_MSG_CT_NEW, 567 IPCTNL_MSG_CT_NEW,
565 1, ct) < 0) { 568 1, ct) < 0) {
566 if (!atomic_inc_not_zero(&ct->ct_general.use))
567 continue;
568 cb->args[1] = (unsigned long)ct; 569 cb->args[1] = (unsigned long)ct;
569 goto out; 570 goto out;
570 } 571 }
@@ -577,6 +578,8 @@ restart:
577 if (acct) 578 if (acct)
578 memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX])); 579 memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]));
579 } 580 }
581releasect:
582 nf_ct_put(ct);
580 } 583 }
581 if (cb->args[1]) { 584 if (cb->args[1]) {
582 cb->args[1] = 0; 585 cb->args[1] = 0;
@@ -1242,13 +1245,12 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
1242 if (err < 0) 1245 if (err < 0)
1243 goto err2; 1246 goto err2;
1244 1247
1245 master_h = __nf_conntrack_find(&init_net, &master); 1248 master_h = nf_conntrack_find_get(&init_net, &master);
1246 if (master_h == NULL) { 1249 if (master_h == NULL) {
1247 err = -ENOENT; 1250 err = -ENOENT;
1248 goto err2; 1251 goto err2;
1249 } 1252 }
1250 master_ct = nf_ct_tuplehash_to_ctrack(master_h); 1253 master_ct = nf_ct_tuplehash_to_ctrack(master_h);
1251 nf_conntrack_get(&master_ct->ct_general);
1252 __set_bit(IPS_EXPECTED_BIT, &ct->status); 1254 __set_bit(IPS_EXPECTED_BIT, &ct->status);
1253 ct->master = master_ct; 1255 ct->master = master_ct;
1254 } 1256 }
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 4da54b0b9233..193515381970 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -44,40 +44,42 @@ struct ct_iter_state {
44 unsigned int bucket; 44 unsigned int bucket;
45}; 45};
46 46
47static struct hlist_node *ct_get_first(struct seq_file *seq) 47static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
48{ 48{
49 struct net *net = seq_file_net(seq); 49 struct net *net = seq_file_net(seq);
50 struct ct_iter_state *st = seq->private; 50 struct ct_iter_state *st = seq->private;
51 struct hlist_node *n; 51 struct hlist_nulls_node *n;
52 52
53 for (st->bucket = 0; 53 for (st->bucket = 0;
54 st->bucket < nf_conntrack_htable_size; 54 st->bucket < nf_conntrack_htable_size;
55 st->bucket++) { 55 st->bucket++) {
56 n = rcu_dereference(net->ct.hash[st->bucket].first); 56 n = rcu_dereference(net->ct.hash[st->bucket].first);
57 if (n) 57 if (!is_a_nulls(n))
58 return n; 58 return n;
59 } 59 }
60 return NULL; 60 return NULL;
61} 61}
62 62
63static struct hlist_node *ct_get_next(struct seq_file *seq, 63static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
64 struct hlist_node *head) 64 struct hlist_nulls_node *head)
65{ 65{
66 struct net *net = seq_file_net(seq); 66 struct net *net = seq_file_net(seq);
67 struct ct_iter_state *st = seq->private; 67 struct ct_iter_state *st = seq->private;
68 68
69 head = rcu_dereference(head->next); 69 head = rcu_dereference(head->next);
70 while (head == NULL) { 70 while (is_a_nulls(head)) {
71 if (++st->bucket >= nf_conntrack_htable_size) 71 if (likely(get_nulls_value(head) == st->bucket)) {
72 return NULL; 72 if (++st->bucket >= nf_conntrack_htable_size)
73 return NULL;
74 }
73 head = rcu_dereference(net->ct.hash[st->bucket].first); 75 head = rcu_dereference(net->ct.hash[st->bucket].first);
74 } 76 }
75 return head; 77 return head;
76} 78}
77 79
78static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos) 80static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
79{ 81{
80 struct hlist_node *head = ct_get_first(seq); 82 struct hlist_nulls_node *head = ct_get_first(seq);
81 83
82 if (head) 84 if (head)
83 while (pos && (head = ct_get_next(seq, head))) 85 while (pos && (head = ct_get_next(seq, head)))
@@ -107,67 +109,74 @@ static void ct_seq_stop(struct seq_file *s, void *v)
107/* return 0 on success, 1 in case of error */ 109/* return 0 on success, 1 in case of error */
108static int ct_seq_show(struct seq_file *s, void *v) 110static int ct_seq_show(struct seq_file *s, void *v)
109{ 111{
110 const struct nf_conntrack_tuple_hash *hash = v; 112 struct nf_conntrack_tuple_hash *hash = v;
111 const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); 113 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
112 const struct nf_conntrack_l3proto *l3proto; 114 const struct nf_conntrack_l3proto *l3proto;
113 const struct nf_conntrack_l4proto *l4proto; 115 const struct nf_conntrack_l4proto *l4proto;
116 int ret = 0;
114 117
115 NF_CT_ASSERT(ct); 118 NF_CT_ASSERT(ct);
119 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
120 return 0;
116 121
117 /* we only want to print DIR_ORIGINAL */ 122 /* we only want to print DIR_ORIGINAL */
118 if (NF_CT_DIRECTION(hash)) 123 if (NF_CT_DIRECTION(hash))
119 return 0; 124 goto release;
120 125
121 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); 126 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
122 NF_CT_ASSERT(l3proto); 127 NF_CT_ASSERT(l3proto);
123 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 128 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
124 NF_CT_ASSERT(l4proto); 129 NF_CT_ASSERT(l4proto);
125 130
131 ret = -ENOSPC;
126 if (seq_printf(s, "%-8s %u %-8s %u %ld ", 132 if (seq_printf(s, "%-8s %u %-8s %u %ld ",
127 l3proto->name, nf_ct_l3num(ct), 133 l3proto->name, nf_ct_l3num(ct),
128 l4proto->name, nf_ct_protonum(ct), 134 l4proto->name, nf_ct_protonum(ct),
129 timer_pending(&ct->timeout) 135 timer_pending(&ct->timeout)
130 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) 136 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
131 return -ENOSPC; 137 goto release;
132 138
133 if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct)) 139 if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct))
134 return -ENOSPC; 140 goto release;
135 141
136 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 142 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
137 l3proto, l4proto)) 143 l3proto, l4proto))
138 return -ENOSPC; 144 goto release;
139 145
140 if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) 146 if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
141 return -ENOSPC; 147 goto release;
142 148
143 if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) 149 if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
144 if (seq_printf(s, "[UNREPLIED] ")) 150 if (seq_printf(s, "[UNREPLIED] "))
145 return -ENOSPC; 151 goto release;
146 152
147 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, 153 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
148 l3proto, l4proto)) 154 l3proto, l4proto))
149 return -ENOSPC; 155 goto release;
150 156
151 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) 157 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
152 return -ENOSPC; 158 goto release;
153 159
154 if (test_bit(IPS_ASSURED_BIT, &ct->status)) 160 if (test_bit(IPS_ASSURED_BIT, &ct->status))
155 if (seq_printf(s, "[ASSURED] ")) 161 if (seq_printf(s, "[ASSURED] "))
156 return -ENOSPC; 162 goto release;
157 163
158#if defined(CONFIG_NF_CONNTRACK_MARK) 164#if defined(CONFIG_NF_CONNTRACK_MARK)
159 if (seq_printf(s, "mark=%u ", ct->mark)) 165 if (seq_printf(s, "mark=%u ", ct->mark))
160 return -ENOSPC; 166 goto release;
161#endif 167#endif
162 168
163#ifdef CONFIG_NF_CONNTRACK_SECMARK 169#ifdef CONFIG_NF_CONNTRACK_SECMARK
164 if (seq_printf(s, "secmark=%u ", ct->secmark)) 170 if (seq_printf(s, "secmark=%u ", ct->secmark))
165 return -ENOSPC; 171 goto release;
166#endif 172#endif
167 173
168 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) 174 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
169 return -ENOSPC; 175 goto release;
170 176
177 ret = 0;
178release:
179 nf_ct_put(ct);
171 return 0; 180 return 0;
172} 181}
173 182
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 7f404cc64c83..680980954395 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -108,7 +108,7 @@ static int count_them(struct xt_connlimit_data *data,
108 const struct nf_conntrack_tuple_hash *found; 108 const struct nf_conntrack_tuple_hash *found;
109 struct xt_connlimit_conn *conn; 109 struct xt_connlimit_conn *conn;
110 struct xt_connlimit_conn *tmp; 110 struct xt_connlimit_conn *tmp;
111 const struct nf_conn *found_ct; 111 struct nf_conn *found_ct;
112 struct list_head *hash; 112 struct list_head *hash;
113 bool addit = true; 113 bool addit = true;
114 int matches = 0; 114 int matches = 0;
@@ -123,7 +123,7 @@ static int count_them(struct xt_connlimit_data *data,
123 123
124 /* check the saved connections */ 124 /* check the saved connections */
125 list_for_each_entry_safe(conn, tmp, hash, list) { 125 list_for_each_entry_safe(conn, tmp, hash, list) {
126 found = __nf_conntrack_find(&init_net, &conn->tuple); 126 found = nf_conntrack_find_get(&init_net, &conn->tuple);
127 found_ct = NULL; 127 found_ct = NULL;
128 128
129 if (found != NULL) 129 if (found != NULL)
@@ -151,6 +151,7 @@ static int count_them(struct xt_connlimit_data *data,
151 * we do not care about connections which are 151 * we do not care about connections which are
152 * closed already -> ditch it 152 * closed already -> ditch it
153 */ 153 */
154 nf_ct_put(found_ct);
154 list_del(&conn->list); 155 list_del(&conn->list);
155 kfree(conn); 156 kfree(conn);
156 continue; 157 continue;
@@ -160,6 +161,7 @@ static int count_them(struct xt_connlimit_data *data,
160 match->family)) 161 match->family))
161 /* same source network -> be counted! */ 162 /* same source network -> be counted! */
162 ++matches; 163 ++matches;
164 nf_ct_put(found_ct);
163 } 165 }
164 166
165 rcu_read_unlock(); 167 rcu_read_unlock();