aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-12-29 17:25:52 -0500
committerDavid S. Miller <davem@davemloft.net>2018-12-29 17:25:52 -0500
commitf7d18ef6a95f399544d7b767291980cecddc32eb (patch)
tree099abe4cf86b7bc733323d7cbafe843696cdf795
parent0d9c9a238faf925823bde866182c663b6d734f2e (diff)
parenta007232066f6839d6f256bab21e825d968f1a163 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
Pablo Neira Ayuso says: ==================== Netfilter fixes for net The following patchset contains Netfilter fixes for net, specifically fixes for the nf_conncount infrastructure which is causing troubles since 5c789e131cbb9 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search"). Patches aim to simplify this infrastructure while fixing up the problems: 1) Use fixed size CONNCOUNT_SLOTS in nf_conncount, from Shawn Bohrer. 2) Incorrect signedness in age calculation from find_or_evict(), from Florian Westphal. 3) Proper locking for the garbage collector workqueue callback, first make a patch to count how many nodes can be collected without holding locks, then grab lock and release them. Also from Florian. 4) Restart node lookup from the insertion path, after releasing nodes via packet path garbage collection. Shawn Bohrer described a scenario that may result in inserting a connection in an already dead list node. Patch from Florian. 5) Merge lookup and add function to avoid a hold release and re-grab. From Florian. 6) Be safe and iterate over the node lists under the spinlock. 7) Speculative list nodes removal via garbage collection, check if list node got a connection while it was scheduled for deletion via gc. 8) Accidental argument swap in find_next_bit() that leads to more frequent scheduling of the workqueue. From Florian Westphal. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/netfilter/nf_conntrack_count.h19
-rw-r--r--net/netfilter/nf_conncount.c290
-rw-r--r--net/netfilter/nf_tables_api.c2
-rw-r--r--net/netfilter/nft_connlimit.c14
4 files changed, 136 insertions, 189 deletions
diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index 4b2b2baf8ab4..f32fc8289473 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -5,17 +5,10 @@
5 5
6struct nf_conncount_data; 6struct nf_conncount_data;
7 7
8enum nf_conncount_list_add {
9 NF_CONNCOUNT_ADDED, /* list add was ok */
10 NF_CONNCOUNT_ERR, /* -ENOMEM, must drop skb */
11 NF_CONNCOUNT_SKIP, /* list is already reclaimed by gc */
12};
13
14struct nf_conncount_list { 8struct nf_conncount_list {
15 spinlock_t list_lock; 9 spinlock_t list_lock;
16 struct list_head head; /* connections with the same filtering key */ 10 struct list_head head; /* connections with the same filtering key */
17 unsigned int count; /* length of list */ 11 unsigned int count; /* length of list */
18 bool dead;
19}; 12};
20 13
21struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, 14struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
@@ -29,18 +22,12 @@ unsigned int nf_conncount_count(struct net *net,
29 const struct nf_conntrack_tuple *tuple, 22 const struct nf_conntrack_tuple *tuple,
30 const struct nf_conntrack_zone *zone); 23 const struct nf_conntrack_zone *zone);
31 24
32void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list, 25int nf_conncount_add(struct net *net, struct nf_conncount_list *list,
33 const struct nf_conntrack_tuple *tuple, 26 const struct nf_conntrack_tuple *tuple,
34 const struct nf_conntrack_zone *zone, 27 const struct nf_conntrack_zone *zone);
35 bool *addit);
36 28
37void nf_conncount_list_init(struct nf_conncount_list *list); 29void nf_conncount_list_init(struct nf_conncount_list *list);
38 30
39enum nf_conncount_list_add
40nf_conncount_add(struct nf_conncount_list *list,
41 const struct nf_conntrack_tuple *tuple,
42 const struct nf_conntrack_zone *zone);
43
44bool nf_conncount_gc_list(struct net *net, 31bool nf_conncount_gc_list(struct net *net,
45 struct nf_conncount_list *list); 32 struct nf_conncount_list *list);
46 33
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 9cd180bda092..7554c56b2e63 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -33,12 +33,6 @@
33 33
34#define CONNCOUNT_SLOTS 256U 34#define CONNCOUNT_SLOTS 256U
35 35
36#ifdef CONFIG_LOCKDEP
37#define CONNCOUNT_LOCK_SLOTS 8U
38#else
39#define CONNCOUNT_LOCK_SLOTS 256U
40#endif
41
42#define CONNCOUNT_GC_MAX_NODES 8 36#define CONNCOUNT_GC_MAX_NODES 8
43#define MAX_KEYLEN 5 37#define MAX_KEYLEN 5
44 38
@@ -49,8 +43,6 @@ struct nf_conncount_tuple {
49 struct nf_conntrack_zone zone; 43 struct nf_conntrack_zone zone;
50 int cpu; 44 int cpu;
51 u32 jiffies32; 45 u32 jiffies32;
52 bool dead;
53 struct rcu_head rcu_head;
54}; 46};
55 47
56struct nf_conncount_rb { 48struct nf_conncount_rb {
@@ -60,7 +52,7 @@ struct nf_conncount_rb {
60 struct rcu_head rcu_head; 52 struct rcu_head rcu_head;
61}; 53};
62 54
63static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp; 55static spinlock_t nf_conncount_locks[CONNCOUNT_SLOTS] __cacheline_aligned_in_smp;
64 56
65struct nf_conncount_data { 57struct nf_conncount_data {
66 unsigned int keylen; 58 unsigned int keylen;
@@ -89,79 +81,25 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
89 return memcmp(a, b, klen * sizeof(u32)); 81 return memcmp(a, b, klen * sizeof(u32));
90} 82}
91 83
92enum nf_conncount_list_add 84static void conn_free(struct nf_conncount_list *list,
93nf_conncount_add(struct nf_conncount_list *list,
94 const struct nf_conntrack_tuple *tuple,
95 const struct nf_conntrack_zone *zone)
96{
97 struct nf_conncount_tuple *conn;
98
99 if (WARN_ON_ONCE(list->count > INT_MAX))
100 return NF_CONNCOUNT_ERR;
101
102 conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
103 if (conn == NULL)
104 return NF_CONNCOUNT_ERR;
105
106 conn->tuple = *tuple;
107 conn->zone = *zone;
108 conn->cpu = raw_smp_processor_id();
109 conn->jiffies32 = (u32)jiffies;
110 conn->dead = false;
111 spin_lock_bh(&list->list_lock);
112 if (list->dead == true) {
113 kmem_cache_free(conncount_conn_cachep, conn);
114 spin_unlock_bh(&list->list_lock);
115 return NF_CONNCOUNT_SKIP;
116 }
117 list_add_tail(&conn->node, &list->head);
118 list->count++;
119 spin_unlock_bh(&list->list_lock);
120 return NF_CONNCOUNT_ADDED;
121}
122EXPORT_SYMBOL_GPL(nf_conncount_add);
123
124static void __conn_free(struct rcu_head *h)
125{
126 struct nf_conncount_tuple *conn;
127
128 conn = container_of(h, struct nf_conncount_tuple, rcu_head);
129 kmem_cache_free(conncount_conn_cachep, conn);
130}
131
132static bool conn_free(struct nf_conncount_list *list,
133 struct nf_conncount_tuple *conn) 85 struct nf_conncount_tuple *conn)
134{ 86{
135 bool free_entry = false; 87 lockdep_assert_held(&list->list_lock);
136
137 spin_lock_bh(&list->list_lock);
138
139 if (conn->dead) {
140 spin_unlock_bh(&list->list_lock);
141 return free_entry;
142 }
143 88
144 list->count--; 89 list->count--;
145 conn->dead = true; 90 list_del(&conn->node);
146 list_del_rcu(&conn->node);
147 if (list->count == 0) {
148 list->dead = true;
149 free_entry = true;
150 }
151 91
152 spin_unlock_bh(&list->list_lock); 92 kmem_cache_free(conncount_conn_cachep, conn);
153 call_rcu(&conn->rcu_head, __conn_free);
154 return free_entry;
155} 93}
156 94
157static const struct nf_conntrack_tuple_hash * 95static const struct nf_conntrack_tuple_hash *
158find_or_evict(struct net *net, struct nf_conncount_list *list, 96find_or_evict(struct net *net, struct nf_conncount_list *list,
159 struct nf_conncount_tuple *conn, bool *free_entry) 97 struct nf_conncount_tuple *conn)
160{ 98{
161 const struct nf_conntrack_tuple_hash *found; 99 const struct nf_conntrack_tuple_hash *found;
162 unsigned long a, b; 100 unsigned long a, b;
163 int cpu = raw_smp_processor_id(); 101 int cpu = raw_smp_processor_id();
164 __s32 age; 102 u32 age;
165 103
166 found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple); 104 found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
167 if (found) 105 if (found)
@@ -176,52 +114,45 @@ find_or_evict(struct net *net, struct nf_conncount_list *list,
176 */ 114 */
177 age = a - b; 115 age = a - b;
178 if (conn->cpu == cpu || age >= 2) { 116 if (conn->cpu == cpu || age >= 2) {
179 *free_entry = conn_free(list, conn); 117 conn_free(list, conn);
180 return ERR_PTR(-ENOENT); 118 return ERR_PTR(-ENOENT);
181 } 119 }
182 120
183 return ERR_PTR(-EAGAIN); 121 return ERR_PTR(-EAGAIN);
184} 122}
185 123
186void nf_conncount_lookup(struct net *net, 124static int __nf_conncount_add(struct net *net,
187 struct nf_conncount_list *list, 125 struct nf_conncount_list *list,
188 const struct nf_conntrack_tuple *tuple, 126 const struct nf_conntrack_tuple *tuple,
189 const struct nf_conntrack_zone *zone, 127 const struct nf_conntrack_zone *zone)
190 bool *addit)
191{ 128{
192 const struct nf_conntrack_tuple_hash *found; 129 const struct nf_conntrack_tuple_hash *found;
193 struct nf_conncount_tuple *conn, *conn_n; 130 struct nf_conncount_tuple *conn, *conn_n;
194 struct nf_conn *found_ct; 131 struct nf_conn *found_ct;
195 unsigned int collect = 0; 132 unsigned int collect = 0;
196 bool free_entry = false;
197
198 /* best effort only */
199 *addit = tuple ? true : false;
200 133
201 /* check the saved connections */ 134 /* check the saved connections */
202 list_for_each_entry_safe(conn, conn_n, &list->head, node) { 135 list_for_each_entry_safe(conn, conn_n, &list->head, node) {
203 if (collect > CONNCOUNT_GC_MAX_NODES) 136 if (collect > CONNCOUNT_GC_MAX_NODES)
204 break; 137 break;
205 138
206 found = find_or_evict(net, list, conn, &free_entry); 139 found = find_or_evict(net, list, conn);
207 if (IS_ERR(found)) { 140 if (IS_ERR(found)) {
208 /* Not found, but might be about to be confirmed */ 141 /* Not found, but might be about to be confirmed */
209 if (PTR_ERR(found) == -EAGAIN) { 142 if (PTR_ERR(found) == -EAGAIN) {
210 if (!tuple)
211 continue;
212
213 if (nf_ct_tuple_equal(&conn->tuple, tuple) && 143 if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
214 nf_ct_zone_id(&conn->zone, conn->zone.dir) == 144 nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
215 nf_ct_zone_id(zone, zone->dir)) 145 nf_ct_zone_id(zone, zone->dir))
216 *addit = false; 146 return 0; /* already exists */
217 } else if (PTR_ERR(found) == -ENOENT) 147 } else {
218 collect++; 148 collect++;
149 }
219 continue; 150 continue;
220 } 151 }
221 152
222 found_ct = nf_ct_tuplehash_to_ctrack(found); 153 found_ct = nf_ct_tuplehash_to_ctrack(found);
223 154
224 if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) && 155 if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
225 nf_ct_zone_equal(found_ct, zone, zone->dir)) { 156 nf_ct_zone_equal(found_ct, zone, zone->dir)) {
226 /* 157 /*
227 * We should not see tuples twice unless someone hooks 158 * We should not see tuples twice unless someone hooks
@@ -229,7 +160,8 @@ void nf_conncount_lookup(struct net *net,
229 * 160 *
230 * Attempt to avoid a re-add in this case. 161 * Attempt to avoid a re-add in this case.
231 */ 162 */
232 *addit = false; 163 nf_ct_put(found_ct);
164 return 0;
233 } else if (already_closed(found_ct)) { 165 } else if (already_closed(found_ct)) {
234 /* 166 /*
235 * we do not care about connections which are 167 * we do not care about connections which are
@@ -243,19 +175,48 @@ void nf_conncount_lookup(struct net *net,
243 175
244 nf_ct_put(found_ct); 176 nf_ct_put(found_ct);
245 } 177 }
178
179 if (WARN_ON_ONCE(list->count > INT_MAX))
180 return -EOVERFLOW;
181
182 conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
183 if (conn == NULL)
184 return -ENOMEM;
185
186 conn->tuple = *tuple;
187 conn->zone = *zone;
188 conn->cpu = raw_smp_processor_id();
189 conn->jiffies32 = (u32)jiffies;
190 list_add_tail(&conn->node, &list->head);
191 list->count++;
192 return 0;
246} 193}
247EXPORT_SYMBOL_GPL(nf_conncount_lookup); 194
195int nf_conncount_add(struct net *net,
196 struct nf_conncount_list *list,
197 const struct nf_conntrack_tuple *tuple,
198 const struct nf_conntrack_zone *zone)
199{
200 int ret;
201
202 /* check the saved connections */
203 spin_lock_bh(&list->list_lock);
204 ret = __nf_conncount_add(net, list, tuple, zone);
205 spin_unlock_bh(&list->list_lock);
206
207 return ret;
208}
209EXPORT_SYMBOL_GPL(nf_conncount_add);
248 210
249void nf_conncount_list_init(struct nf_conncount_list *list) 211void nf_conncount_list_init(struct nf_conncount_list *list)
250{ 212{
251 spin_lock_init(&list->list_lock); 213 spin_lock_init(&list->list_lock);
252 INIT_LIST_HEAD(&list->head); 214 INIT_LIST_HEAD(&list->head);
253 list->count = 0; 215 list->count = 0;
254 list->dead = false;
255} 216}
256EXPORT_SYMBOL_GPL(nf_conncount_list_init); 217EXPORT_SYMBOL_GPL(nf_conncount_list_init);
257 218
258/* Return true if the list is empty */ 219/* Return true if the list is empty. Must be called with BH disabled. */
259bool nf_conncount_gc_list(struct net *net, 220bool nf_conncount_gc_list(struct net *net,
260 struct nf_conncount_list *list) 221 struct nf_conncount_list *list)
261{ 222{
@@ -263,17 +224,17 @@ bool nf_conncount_gc_list(struct net *net,
263 struct nf_conncount_tuple *conn, *conn_n; 224 struct nf_conncount_tuple *conn, *conn_n;
264 struct nf_conn *found_ct; 225 struct nf_conn *found_ct;
265 unsigned int collected = 0; 226 unsigned int collected = 0;
266 bool free_entry = false;
267 bool ret = false; 227 bool ret = false;
268 228
229 /* don't bother if other cpu is already doing GC */
230 if (!spin_trylock(&list->list_lock))
231 return false;
232
269 list_for_each_entry_safe(conn, conn_n, &list->head, node) { 233 list_for_each_entry_safe(conn, conn_n, &list->head, node) {
270 found = find_or_evict(net, list, conn, &free_entry); 234 found = find_or_evict(net, list, conn);
271 if (IS_ERR(found)) { 235 if (IS_ERR(found)) {
272 if (PTR_ERR(found) == -ENOENT) { 236 if (PTR_ERR(found) == -ENOENT)
273 if (free_entry)
274 return true;
275 collected++; 237 collected++;
276 }
277 continue; 238 continue;
278 } 239 }
279 240
@@ -284,23 +245,19 @@ bool nf_conncount_gc_list(struct net *net,
284 * closed already -> ditch it 245 * closed already -> ditch it
285 */ 246 */
286 nf_ct_put(found_ct); 247 nf_ct_put(found_ct);
287 if (conn_free(list, conn)) 248 conn_free(list, conn);
288 return true;
289 collected++; 249 collected++;
290 continue; 250 continue;
291 } 251 }
292 252
293 nf_ct_put(found_ct); 253 nf_ct_put(found_ct);
294 if (collected > CONNCOUNT_GC_MAX_NODES) 254 if (collected > CONNCOUNT_GC_MAX_NODES)
295 return false; 255 break;
296 } 256 }
297 257
298 spin_lock_bh(&list->list_lock); 258 if (!list->count)
299 if (!list->count) {
300 list->dead = true;
301 ret = true; 259 ret = true;
302 } 260 spin_unlock(&list->list_lock);
303 spin_unlock_bh(&list->list_lock);
304 261
305 return ret; 262 return ret;
306} 263}
@@ -314,6 +271,7 @@ static void __tree_nodes_free(struct rcu_head *h)
314 kmem_cache_free(conncount_rb_cachep, rbconn); 271 kmem_cache_free(conncount_rb_cachep, rbconn);
315} 272}
316 273
274/* caller must hold tree nf_conncount_locks[] lock */
317static void tree_nodes_free(struct rb_root *root, 275static void tree_nodes_free(struct rb_root *root,
318 struct nf_conncount_rb *gc_nodes[], 276 struct nf_conncount_rb *gc_nodes[],
319 unsigned int gc_count) 277 unsigned int gc_count)
@@ -323,8 +281,10 @@ static void tree_nodes_free(struct rb_root *root,
323 while (gc_count) { 281 while (gc_count) {
324 rbconn = gc_nodes[--gc_count]; 282 rbconn = gc_nodes[--gc_count];
325 spin_lock(&rbconn->list.list_lock); 283 spin_lock(&rbconn->list.list_lock);
326 rb_erase(&rbconn->node, root); 284 if (!rbconn->list.count) {
327 call_rcu(&rbconn->rcu_head, __tree_nodes_free); 285 rb_erase(&rbconn->node, root);
286 call_rcu(&rbconn->rcu_head, __tree_nodes_free);
287 }
328 spin_unlock(&rbconn->list.list_lock); 288 spin_unlock(&rbconn->list.list_lock);
329 } 289 }
330} 290}
@@ -341,20 +301,19 @@ insert_tree(struct net *net,
341 struct rb_root *root, 301 struct rb_root *root,
342 unsigned int hash, 302 unsigned int hash,
343 const u32 *key, 303 const u32 *key,
344 u8 keylen,
345 const struct nf_conntrack_tuple *tuple, 304 const struct nf_conntrack_tuple *tuple,
346 const struct nf_conntrack_zone *zone) 305 const struct nf_conntrack_zone *zone)
347{ 306{
348 enum nf_conncount_list_add ret;
349 struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES]; 307 struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
350 struct rb_node **rbnode, *parent; 308 struct rb_node **rbnode, *parent;
351 struct nf_conncount_rb *rbconn; 309 struct nf_conncount_rb *rbconn;
352 struct nf_conncount_tuple *conn; 310 struct nf_conncount_tuple *conn;
353 unsigned int count = 0, gc_count = 0; 311 unsigned int count = 0, gc_count = 0;
354 bool node_found = false; 312 u8 keylen = data->keylen;
355 313 bool do_gc = true;
356 spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
357 314
315 spin_lock_bh(&nf_conncount_locks[hash]);
316restart:
358 parent = NULL; 317 parent = NULL;
359 rbnode = &(root->rb_node); 318 rbnode = &(root->rb_node);
360 while (*rbnode) { 319 while (*rbnode) {
@@ -368,45 +327,32 @@ insert_tree(struct net *net,
368 } else if (diff > 0) { 327 } else if (diff > 0) {
369 rbnode = &((*rbnode)->rb_right); 328 rbnode = &((*rbnode)->rb_right);
370 } else { 329 } else {
371 /* unlikely: other cpu added node already */ 330 int ret;
372 node_found = true; 331
373 ret = nf_conncount_add(&rbconn->list, tuple, zone); 332 ret = nf_conncount_add(net, &rbconn->list, tuple, zone);
374 if (ret == NF_CONNCOUNT_ERR) { 333 if (ret)
375 count = 0; /* hotdrop */ 334 count = 0; /* hotdrop */
376 } else if (ret == NF_CONNCOUNT_ADDED) { 335 else
377 count = rbconn->list.count; 336 count = rbconn->list.count;
378 } else { 337 tree_nodes_free(root, gc_nodes, gc_count);
379 /* NF_CONNCOUNT_SKIP, rbconn is already 338 goto out_unlock;
380 * reclaimed by gc, insert a new tree node
381 */
382 node_found = false;
383 }
384 break;
385 } 339 }
386 340
387 if (gc_count >= ARRAY_SIZE(gc_nodes)) 341 if (gc_count >= ARRAY_SIZE(gc_nodes))
388 continue; 342 continue;
389 343
390 if (nf_conncount_gc_list(net, &rbconn->list)) 344 if (do_gc && nf_conncount_gc_list(net, &rbconn->list))
391 gc_nodes[gc_count++] = rbconn; 345 gc_nodes[gc_count++] = rbconn;
392 } 346 }
393 347
394 if (gc_count) { 348 if (gc_count) {
395 tree_nodes_free(root, gc_nodes, gc_count); 349 tree_nodes_free(root, gc_nodes, gc_count);
396 /* tree_node_free before new allocation permits 350 schedule_gc_worker(data, hash);
397 * allocator to re-use newly free'd object. 351 gc_count = 0;
398 * 352 do_gc = false;
399 * This is a rare event; in most cases we will find 353 goto restart;
400 * existing node to re-use. (or gc_count is 0).
401 */
402
403 if (gc_count >= ARRAY_SIZE(gc_nodes))
404 schedule_gc_worker(data, hash);
405 } 354 }
406 355
407 if (node_found)
408 goto out_unlock;
409
410 /* expected case: match, insert new node */ 356 /* expected case: match, insert new node */
411 rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC); 357 rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
412 if (rbconn == NULL) 358 if (rbconn == NULL)
@@ -430,7 +376,7 @@ insert_tree(struct net *net,
430 rb_link_node_rcu(&rbconn->node, parent, rbnode); 376 rb_link_node_rcu(&rbconn->node, parent, rbnode);
431 rb_insert_color(&rbconn->node, root); 377 rb_insert_color(&rbconn->node, root);
432out_unlock: 378out_unlock:
433 spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); 379 spin_unlock_bh(&nf_conncount_locks[hash]);
434 return count; 380 return count;
435} 381}
436 382
@@ -441,7 +387,6 @@ count_tree(struct net *net,
441 const struct nf_conntrack_tuple *tuple, 387 const struct nf_conntrack_tuple *tuple,
442 const struct nf_conntrack_zone *zone) 388 const struct nf_conntrack_zone *zone)
443{ 389{
444 enum nf_conncount_list_add ret;
445 struct rb_root *root; 390 struct rb_root *root;
446 struct rb_node *parent; 391 struct rb_node *parent;
447 struct nf_conncount_rb *rbconn; 392 struct nf_conncount_rb *rbconn;
@@ -454,7 +399,6 @@ count_tree(struct net *net,
454 parent = rcu_dereference_raw(root->rb_node); 399 parent = rcu_dereference_raw(root->rb_node);
455 while (parent) { 400 while (parent) {
456 int diff; 401 int diff;
457 bool addit;
458 402
459 rbconn = rb_entry(parent, struct nf_conncount_rb, node); 403 rbconn = rb_entry(parent, struct nf_conncount_rb, node);
460 404
@@ -464,31 +408,36 @@ count_tree(struct net *net,
464 } else if (diff > 0) { 408 } else if (diff > 0) {
465 parent = rcu_dereference_raw(parent->rb_right); 409 parent = rcu_dereference_raw(parent->rb_right);
466 } else { 410 } else {
467 /* same source network -> be counted! */ 411 int ret;
468 nf_conncount_lookup(net, &rbconn->list, tuple, zone,
469 &addit);
470 412
471 if (!addit) 413 if (!tuple) {
414 nf_conncount_gc_list(net, &rbconn->list);
472 return rbconn->list.count; 415 return rbconn->list.count;
416 }
473 417
474 ret = nf_conncount_add(&rbconn->list, tuple, zone); 418 spin_lock_bh(&rbconn->list.list_lock);
475 if (ret == NF_CONNCOUNT_ERR) { 419 /* Node might be about to be free'd.
476 return 0; /* hotdrop */ 420 * We need to defer to insert_tree() in this case.
477 } else if (ret == NF_CONNCOUNT_ADDED) { 421 */
478 return rbconn->list.count; 422 if (rbconn->list.count == 0) {
479 } else { 423 spin_unlock_bh(&rbconn->list.list_lock);
480 /* NF_CONNCOUNT_SKIP, rbconn is already
481 * reclaimed by gc, insert a new tree node
482 */
483 break; 424 break;
484 } 425 }
426
427 /* same source network -> be counted! */
428 ret = __nf_conncount_add(net, &rbconn->list, tuple, zone);
429 spin_unlock_bh(&rbconn->list.list_lock);
430 if (ret)
431 return 0; /* hotdrop */
432 else
433 return rbconn->list.count;
485 } 434 }
486 } 435 }
487 436
488 if (!tuple) 437 if (!tuple)
489 return 0; 438 return 0;
490 439
491 return insert_tree(net, data, root, hash, key, keylen, tuple, zone); 440 return insert_tree(net, data, root, hash, key, tuple, zone);
492} 441}
493 442
494static void tree_gc_worker(struct work_struct *work) 443static void tree_gc_worker(struct work_struct *work)
@@ -499,27 +448,47 @@ static void tree_gc_worker(struct work_struct *work)
499 struct rb_node *node; 448 struct rb_node *node;
500 unsigned int tree, next_tree, gc_count = 0; 449 unsigned int tree, next_tree, gc_count = 0;
501 450
502 tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS; 451 tree = data->gc_tree % CONNCOUNT_SLOTS;
503 root = &data->root[tree]; 452 root = &data->root[tree];
504 453
454 local_bh_disable();
505 rcu_read_lock(); 455 rcu_read_lock();
506 for (node = rb_first(root); node != NULL; node = rb_next(node)) { 456 for (node = rb_first(root); node != NULL; node = rb_next(node)) {
507 rbconn = rb_entry(node, struct nf_conncount_rb, node); 457 rbconn = rb_entry(node, struct nf_conncount_rb, node);
508 if (nf_conncount_gc_list(data->net, &rbconn->list)) 458 if (nf_conncount_gc_list(data->net, &rbconn->list))
509 gc_nodes[gc_count++] = rbconn; 459 gc_count++;
510 } 460 }
511 rcu_read_unlock(); 461 rcu_read_unlock();
462 local_bh_enable();
463
464 cond_resched();
512 465
513 spin_lock_bh(&nf_conncount_locks[tree]); 466 spin_lock_bh(&nf_conncount_locks[tree]);
467 if (gc_count < ARRAY_SIZE(gc_nodes))
468 goto next; /* do not bother */
514 469
515 if (gc_count) { 470 gc_count = 0;
516 tree_nodes_free(root, gc_nodes, gc_count); 471 node = rb_first(root);
472 while (node != NULL) {
473 rbconn = rb_entry(node, struct nf_conncount_rb, node);
474 node = rb_next(node);
475
476 if (rbconn->list.count > 0)
477 continue;
478
479 gc_nodes[gc_count++] = rbconn;
480 if (gc_count >= ARRAY_SIZE(gc_nodes)) {
481 tree_nodes_free(root, gc_nodes, gc_count);
482 gc_count = 0;
483 }
517 } 484 }
518 485
486 tree_nodes_free(root, gc_nodes, gc_count);
487next:
519 clear_bit(tree, data->pending_trees); 488 clear_bit(tree, data->pending_trees);
520 489
521 next_tree = (tree + 1) % CONNCOUNT_SLOTS; 490 next_tree = (tree + 1) % CONNCOUNT_SLOTS;
522 next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS); 491 next_tree = find_next_bit(data->pending_trees, CONNCOUNT_SLOTS, next_tree);
523 492
524 if (next_tree < CONNCOUNT_SLOTS) { 493 if (next_tree < CONNCOUNT_SLOTS) {
525 data->gc_tree = next_tree; 494 data->gc_tree = next_tree;
@@ -621,10 +590,7 @@ static int __init nf_conncount_modinit(void)
621{ 590{
622 int i; 591 int i;
623 592
624 BUILD_BUG_ON(CONNCOUNT_LOCK_SLOTS > CONNCOUNT_SLOTS); 593 for (i = 0; i < CONNCOUNT_SLOTS; ++i)
625 BUILD_BUG_ON((CONNCOUNT_SLOTS % CONNCOUNT_LOCK_SLOTS) != 0);
626
627 for (i = 0; i < CONNCOUNT_LOCK_SLOTS; ++i)
628 spin_lock_init(&nf_conncount_locks[i]); 594 spin_lock_init(&nf_conncount_locks[i]);
629 595
630 conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple", 596 conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple",
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index fec814dace5a..2b0a93300dd7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5727,6 +5727,8 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
5727 goto nla_put_failure; 5727 goto nla_put_failure;
5728 5728
5729 nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK); 5729 nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);
5730 if (!nest)
5731 goto nla_put_failure;
5730 if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) || 5732 if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) ||
5731 nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority))) 5733 nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority)))
5732 goto nla_put_failure; 5734 goto nla_put_failure;
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index b90d96ba4a12..af1497ab9464 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -30,7 +30,6 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
30 enum ip_conntrack_info ctinfo; 30 enum ip_conntrack_info ctinfo;
31 const struct nf_conn *ct; 31 const struct nf_conn *ct;
32 unsigned int count; 32 unsigned int count;
33 bool addit;
34 33
35 tuple_ptr = &tuple; 34 tuple_ptr = &tuple;
36 35
@@ -44,19 +43,12 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
44 return; 43 return;
45 } 44 }
46 45
47 nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone, 46 if (nf_conncount_add(nft_net(pkt), &priv->list, tuple_ptr, zone)) {
48 &addit);
49 count = priv->list.count;
50
51 if (!addit)
52 goto out;
53
54 if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) {
55 regs->verdict.code = NF_DROP; 47 regs->verdict.code = NF_DROP;
56 return; 48 return;
57 } 49 }
58 count++; 50
59out: 51 count = priv->list.count;
60 52
61 if ((count > priv->limit) ^ priv->invert) { 53 if ((count > priv->limit) ^ priv->invert) {
62 regs->verdict.code = NFT_BREAK; 54 regs->verdict.code = NFT_BREAK;