diff options
Diffstat (limited to 'net')
| -rw-r--r-- | net/netfilter/nf_conncount.c | 290 | ||||
| -rw-r--r-- | net/netfilter/nf_tables_api.c | 2 | ||||
| -rw-r--r-- | net/netfilter/nft_connlimit.c | 14 |
3 files changed, 133 insertions, 173 deletions
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 9cd180bda092..7554c56b2e63 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c | |||
| @@ -33,12 +33,6 @@ | |||
| 33 | 33 | ||
| 34 | #define CONNCOUNT_SLOTS 256U | 34 | #define CONNCOUNT_SLOTS 256U |
| 35 | 35 | ||
| 36 | #ifdef CONFIG_LOCKDEP | ||
| 37 | #define CONNCOUNT_LOCK_SLOTS 8U | ||
| 38 | #else | ||
| 39 | #define CONNCOUNT_LOCK_SLOTS 256U | ||
| 40 | #endif | ||
| 41 | |||
| 42 | #define CONNCOUNT_GC_MAX_NODES 8 | 36 | #define CONNCOUNT_GC_MAX_NODES 8 |
| 43 | #define MAX_KEYLEN 5 | 37 | #define MAX_KEYLEN 5 |
| 44 | 38 | ||
| @@ -49,8 +43,6 @@ struct nf_conncount_tuple { | |||
| 49 | struct nf_conntrack_zone zone; | 43 | struct nf_conntrack_zone zone; |
| 50 | int cpu; | 44 | int cpu; |
| 51 | u32 jiffies32; | 45 | u32 jiffies32; |
| 52 | bool dead; | ||
| 53 | struct rcu_head rcu_head; | ||
| 54 | }; | 46 | }; |
| 55 | 47 | ||
| 56 | struct nf_conncount_rb { | 48 | struct nf_conncount_rb { |
| @@ -60,7 +52,7 @@ struct nf_conncount_rb { | |||
| 60 | struct rcu_head rcu_head; | 52 | struct rcu_head rcu_head; |
| 61 | }; | 53 | }; |
| 62 | 54 | ||
| 63 | static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp; | 55 | static spinlock_t nf_conncount_locks[CONNCOUNT_SLOTS] __cacheline_aligned_in_smp; |
| 64 | 56 | ||
| 65 | struct nf_conncount_data { | 57 | struct nf_conncount_data { |
| 66 | unsigned int keylen; | 58 | unsigned int keylen; |
| @@ -89,79 +81,25 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen) | |||
| 89 | return memcmp(a, b, klen * sizeof(u32)); | 81 | return memcmp(a, b, klen * sizeof(u32)); |
| 90 | } | 82 | } |
| 91 | 83 | ||
| 92 | enum nf_conncount_list_add | 84 | static void conn_free(struct nf_conncount_list *list, |
| 93 | nf_conncount_add(struct nf_conncount_list *list, | ||
| 94 | const struct nf_conntrack_tuple *tuple, | ||
| 95 | const struct nf_conntrack_zone *zone) | ||
| 96 | { | ||
| 97 | struct nf_conncount_tuple *conn; | ||
| 98 | |||
| 99 | if (WARN_ON_ONCE(list->count > INT_MAX)) | ||
| 100 | return NF_CONNCOUNT_ERR; | ||
| 101 | |||
| 102 | conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); | ||
| 103 | if (conn == NULL) | ||
| 104 | return NF_CONNCOUNT_ERR; | ||
| 105 | |||
| 106 | conn->tuple = *tuple; | ||
| 107 | conn->zone = *zone; | ||
| 108 | conn->cpu = raw_smp_processor_id(); | ||
| 109 | conn->jiffies32 = (u32)jiffies; | ||
| 110 | conn->dead = false; | ||
| 111 | spin_lock_bh(&list->list_lock); | ||
| 112 | if (list->dead == true) { | ||
| 113 | kmem_cache_free(conncount_conn_cachep, conn); | ||
| 114 | spin_unlock_bh(&list->list_lock); | ||
| 115 | return NF_CONNCOUNT_SKIP; | ||
| 116 | } | ||
| 117 | list_add_tail(&conn->node, &list->head); | ||
| 118 | list->count++; | ||
| 119 | spin_unlock_bh(&list->list_lock); | ||
| 120 | return NF_CONNCOUNT_ADDED; | ||
| 121 | } | ||
| 122 | EXPORT_SYMBOL_GPL(nf_conncount_add); | ||
| 123 | |||
| 124 | static void __conn_free(struct rcu_head *h) | ||
| 125 | { | ||
| 126 | struct nf_conncount_tuple *conn; | ||
| 127 | |||
| 128 | conn = container_of(h, struct nf_conncount_tuple, rcu_head); | ||
| 129 | kmem_cache_free(conncount_conn_cachep, conn); | ||
| 130 | } | ||
| 131 | |||
| 132 | static bool conn_free(struct nf_conncount_list *list, | ||
| 133 | struct nf_conncount_tuple *conn) | 85 | struct nf_conncount_tuple *conn) |
| 134 | { | 86 | { |
| 135 | bool free_entry = false; | 87 | lockdep_assert_held(&list->list_lock); |
| 136 | |||
| 137 | spin_lock_bh(&list->list_lock); | ||
| 138 | |||
| 139 | if (conn->dead) { | ||
| 140 | spin_unlock_bh(&list->list_lock); | ||
| 141 | return free_entry; | ||
| 142 | } | ||
| 143 | 88 | ||
| 144 | list->count--; | 89 | list->count--; |
| 145 | conn->dead = true; | 90 | list_del(&conn->node); |
| 146 | list_del_rcu(&conn->node); | ||
| 147 | if (list->count == 0) { | ||
| 148 | list->dead = true; | ||
| 149 | free_entry = true; | ||
| 150 | } | ||
| 151 | 91 | ||
| 152 | spin_unlock_bh(&list->list_lock); | 92 | kmem_cache_free(conncount_conn_cachep, conn); |
| 153 | call_rcu(&conn->rcu_head, __conn_free); | ||
| 154 | return free_entry; | ||
| 155 | } | 93 | } |
| 156 | 94 | ||
| 157 | static const struct nf_conntrack_tuple_hash * | 95 | static const struct nf_conntrack_tuple_hash * |
| 158 | find_or_evict(struct net *net, struct nf_conncount_list *list, | 96 | find_or_evict(struct net *net, struct nf_conncount_list *list, |
| 159 | struct nf_conncount_tuple *conn, bool *free_entry) | 97 | struct nf_conncount_tuple *conn) |
| 160 | { | 98 | { |
| 161 | const struct nf_conntrack_tuple_hash *found; | 99 | const struct nf_conntrack_tuple_hash *found; |
| 162 | unsigned long a, b; | 100 | unsigned long a, b; |
| 163 | int cpu = raw_smp_processor_id(); | 101 | int cpu = raw_smp_processor_id(); |
| 164 | __s32 age; | 102 | u32 age; |
| 165 | 103 | ||
| 166 | found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple); | 104 | found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple); |
| 167 | if (found) | 105 | if (found) |
| @@ -176,52 +114,45 @@ find_or_evict(struct net *net, struct nf_conncount_list *list, | |||
| 176 | */ | 114 | */ |
| 177 | age = a - b; | 115 | age = a - b; |
| 178 | if (conn->cpu == cpu || age >= 2) { | 116 | if (conn->cpu == cpu || age >= 2) { |
| 179 | *free_entry = conn_free(list, conn); | 117 | conn_free(list, conn); |
| 180 | return ERR_PTR(-ENOENT); | 118 | return ERR_PTR(-ENOENT); |
| 181 | } | 119 | } |
| 182 | 120 | ||
| 183 | return ERR_PTR(-EAGAIN); | 121 | return ERR_PTR(-EAGAIN); |
| 184 | } | 122 | } |
| 185 | 123 | ||
| 186 | void nf_conncount_lookup(struct net *net, | 124 | static int __nf_conncount_add(struct net *net, |
| 187 | struct nf_conncount_list *list, | 125 | struct nf_conncount_list *list, |
| 188 | const struct nf_conntrack_tuple *tuple, | 126 | const struct nf_conntrack_tuple *tuple, |
| 189 | const struct nf_conntrack_zone *zone, | 127 | const struct nf_conntrack_zone *zone) |
| 190 | bool *addit) | ||
| 191 | { | 128 | { |
| 192 | const struct nf_conntrack_tuple_hash *found; | 129 | const struct nf_conntrack_tuple_hash *found; |
| 193 | struct nf_conncount_tuple *conn, *conn_n; | 130 | struct nf_conncount_tuple *conn, *conn_n; |
| 194 | struct nf_conn *found_ct; | 131 | struct nf_conn *found_ct; |
| 195 | unsigned int collect = 0; | 132 | unsigned int collect = 0; |
| 196 | bool free_entry = false; | ||
| 197 | |||
| 198 | /* best effort only */ | ||
| 199 | *addit = tuple ? true : false; | ||
| 200 | 133 | ||
| 201 | /* check the saved connections */ | 134 | /* check the saved connections */ |
| 202 | list_for_each_entry_safe(conn, conn_n, &list->head, node) { | 135 | list_for_each_entry_safe(conn, conn_n, &list->head, node) { |
| 203 | if (collect > CONNCOUNT_GC_MAX_NODES) | 136 | if (collect > CONNCOUNT_GC_MAX_NODES) |
| 204 | break; | 137 | break; |
| 205 | 138 | ||
| 206 | found = find_or_evict(net, list, conn, &free_entry); | 139 | found = find_or_evict(net, list, conn); |
| 207 | if (IS_ERR(found)) { | 140 | if (IS_ERR(found)) { |
| 208 | /* Not found, but might be about to be confirmed */ | 141 | /* Not found, but might be about to be confirmed */ |
| 209 | if (PTR_ERR(found) == -EAGAIN) { | 142 | if (PTR_ERR(found) == -EAGAIN) { |
| 210 | if (!tuple) | ||
| 211 | continue; | ||
| 212 | |||
| 213 | if (nf_ct_tuple_equal(&conn->tuple, tuple) && | 143 | if (nf_ct_tuple_equal(&conn->tuple, tuple) && |
| 214 | nf_ct_zone_id(&conn->zone, conn->zone.dir) == | 144 | nf_ct_zone_id(&conn->zone, conn->zone.dir) == |
| 215 | nf_ct_zone_id(zone, zone->dir)) | 145 | nf_ct_zone_id(zone, zone->dir)) |
| 216 | *addit = false; | 146 | return 0; /* already exists */ |
| 217 | } else if (PTR_ERR(found) == -ENOENT) | 147 | } else { |
| 218 | collect++; | 148 | collect++; |
| 149 | } | ||
| 219 | continue; | 150 | continue; |
| 220 | } | 151 | } |
| 221 | 152 | ||
| 222 | found_ct = nf_ct_tuplehash_to_ctrack(found); | 153 | found_ct = nf_ct_tuplehash_to_ctrack(found); |
| 223 | 154 | ||
| 224 | if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) && | 155 | if (nf_ct_tuple_equal(&conn->tuple, tuple) && |
| 225 | nf_ct_zone_equal(found_ct, zone, zone->dir)) { | 156 | nf_ct_zone_equal(found_ct, zone, zone->dir)) { |
| 226 | /* | 157 | /* |
| 227 | * We should not see tuples twice unless someone hooks | 158 | * We should not see tuples twice unless someone hooks |
| @@ -229,7 +160,8 @@ void nf_conncount_lookup(struct net *net, | |||
| 229 | * | 160 | * |
| 230 | * Attempt to avoid a re-add in this case. | 161 | * Attempt to avoid a re-add in this case. |
| 231 | */ | 162 | */ |
| 232 | *addit = false; | 163 | nf_ct_put(found_ct); |
| 164 | return 0; | ||
| 233 | } else if (already_closed(found_ct)) { | 165 | } else if (already_closed(found_ct)) { |
| 234 | /* | 166 | /* |
| 235 | * we do not care about connections which are | 167 | * we do not care about connections which are |
| @@ -243,19 +175,48 @@ void nf_conncount_lookup(struct net *net, | |||
| 243 | 175 | ||
| 244 | nf_ct_put(found_ct); | 176 | nf_ct_put(found_ct); |
| 245 | } | 177 | } |
| 178 | |||
| 179 | if (WARN_ON_ONCE(list->count > INT_MAX)) | ||
| 180 | return -EOVERFLOW; | ||
| 181 | |||
| 182 | conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); | ||
| 183 | if (conn == NULL) | ||
| 184 | return -ENOMEM; | ||
| 185 | |||
| 186 | conn->tuple = *tuple; | ||
| 187 | conn->zone = *zone; | ||
| 188 | conn->cpu = raw_smp_processor_id(); | ||
| 189 | conn->jiffies32 = (u32)jiffies; | ||
| 190 | list_add_tail(&conn->node, &list->head); | ||
| 191 | list->count++; | ||
| 192 | return 0; | ||
| 246 | } | 193 | } |
| 247 | EXPORT_SYMBOL_GPL(nf_conncount_lookup); | 194 | |
| 195 | int nf_conncount_add(struct net *net, | ||
| 196 | struct nf_conncount_list *list, | ||
| 197 | const struct nf_conntrack_tuple *tuple, | ||
| 198 | const struct nf_conntrack_zone *zone) | ||
| 199 | { | ||
| 200 | int ret; | ||
| 201 | |||
| 202 | /* check the saved connections */ | ||
| 203 | spin_lock_bh(&list->list_lock); | ||
| 204 | ret = __nf_conncount_add(net, list, tuple, zone); | ||
| 205 | spin_unlock_bh(&list->list_lock); | ||
| 206 | |||
| 207 | return ret; | ||
| 208 | } | ||
| 209 | EXPORT_SYMBOL_GPL(nf_conncount_add); | ||
| 248 | 210 | ||
| 249 | void nf_conncount_list_init(struct nf_conncount_list *list) | 211 | void nf_conncount_list_init(struct nf_conncount_list *list) |
| 250 | { | 212 | { |
| 251 | spin_lock_init(&list->list_lock); | 213 | spin_lock_init(&list->list_lock); |
| 252 | INIT_LIST_HEAD(&list->head); | 214 | INIT_LIST_HEAD(&list->head); |
| 253 | list->count = 0; | 215 | list->count = 0; |
| 254 | list->dead = false; | ||
| 255 | } | 216 | } |
| 256 | EXPORT_SYMBOL_GPL(nf_conncount_list_init); | 217 | EXPORT_SYMBOL_GPL(nf_conncount_list_init); |
| 257 | 218 | ||
| 258 | /* Return true if the list is empty */ | 219 | /* Return true if the list is empty. Must be called with BH disabled. */ |
| 259 | bool nf_conncount_gc_list(struct net *net, | 220 | bool nf_conncount_gc_list(struct net *net, |
| 260 | struct nf_conncount_list *list) | 221 | struct nf_conncount_list *list) |
| 261 | { | 222 | { |
| @@ -263,17 +224,17 @@ bool nf_conncount_gc_list(struct net *net, | |||
| 263 | struct nf_conncount_tuple *conn, *conn_n; | 224 | struct nf_conncount_tuple *conn, *conn_n; |
| 264 | struct nf_conn *found_ct; | 225 | struct nf_conn *found_ct; |
| 265 | unsigned int collected = 0; | 226 | unsigned int collected = 0; |
| 266 | bool free_entry = false; | ||
| 267 | bool ret = false; | 227 | bool ret = false; |
| 268 | 228 | ||
| 229 | /* don't bother if other cpu is already doing GC */ | ||
| 230 | if (!spin_trylock(&list->list_lock)) | ||
| 231 | return false; | ||
| 232 | |||
| 269 | list_for_each_entry_safe(conn, conn_n, &list->head, node) { | 233 | list_for_each_entry_safe(conn, conn_n, &list->head, node) { |
| 270 | found = find_or_evict(net, list, conn, &free_entry); | 234 | found = find_or_evict(net, list, conn); |
| 271 | if (IS_ERR(found)) { | 235 | if (IS_ERR(found)) { |
| 272 | if (PTR_ERR(found) == -ENOENT) { | 236 | if (PTR_ERR(found) == -ENOENT) |
| 273 | if (free_entry) | ||
| 274 | return true; | ||
| 275 | collected++; | 237 | collected++; |
| 276 | } | ||
| 277 | continue; | 238 | continue; |
| 278 | } | 239 | } |
| 279 | 240 | ||
| @@ -284,23 +245,19 @@ bool nf_conncount_gc_list(struct net *net, | |||
| 284 | * closed already -> ditch it | 245 | * closed already -> ditch it |
| 285 | */ | 246 | */ |
| 286 | nf_ct_put(found_ct); | 247 | nf_ct_put(found_ct); |
| 287 | if (conn_free(list, conn)) | 248 | conn_free(list, conn); |
| 288 | return true; | ||
| 289 | collected++; | 249 | collected++; |
| 290 | continue; | 250 | continue; |
| 291 | } | 251 | } |
| 292 | 252 | ||
| 293 | nf_ct_put(found_ct); | 253 | nf_ct_put(found_ct); |
| 294 | if (collected > CONNCOUNT_GC_MAX_NODES) | 254 | if (collected > CONNCOUNT_GC_MAX_NODES) |
| 295 | return false; | 255 | break; |
| 296 | } | 256 | } |
| 297 | 257 | ||
| 298 | spin_lock_bh(&list->list_lock); | 258 | if (!list->count) |
| 299 | if (!list->count) { | ||
| 300 | list->dead = true; | ||
| 301 | ret = true; | 259 | ret = true; |
| 302 | } | 260 | spin_unlock(&list->list_lock); |
| 303 | spin_unlock_bh(&list->list_lock); | ||
| 304 | 261 | ||
| 305 | return ret; | 262 | return ret; |
| 306 | } | 263 | } |
| @@ -314,6 +271,7 @@ static void __tree_nodes_free(struct rcu_head *h) | |||
| 314 | kmem_cache_free(conncount_rb_cachep, rbconn); | 271 | kmem_cache_free(conncount_rb_cachep, rbconn); |
| 315 | } | 272 | } |
| 316 | 273 | ||
| 274 | /* caller must hold tree nf_conncount_locks[] lock */ | ||
| 317 | static void tree_nodes_free(struct rb_root *root, | 275 | static void tree_nodes_free(struct rb_root *root, |
| 318 | struct nf_conncount_rb *gc_nodes[], | 276 | struct nf_conncount_rb *gc_nodes[], |
| 319 | unsigned int gc_count) | 277 | unsigned int gc_count) |
| @@ -323,8 +281,10 @@ static void tree_nodes_free(struct rb_root *root, | |||
| 323 | while (gc_count) { | 281 | while (gc_count) { |
| 324 | rbconn = gc_nodes[--gc_count]; | 282 | rbconn = gc_nodes[--gc_count]; |
| 325 | spin_lock(&rbconn->list.list_lock); | 283 | spin_lock(&rbconn->list.list_lock); |
| 326 | rb_erase(&rbconn->node, root); | 284 | if (!rbconn->list.count) { |
| 327 | call_rcu(&rbconn->rcu_head, __tree_nodes_free); | 285 | rb_erase(&rbconn->node, root); |
| 286 | call_rcu(&rbconn->rcu_head, __tree_nodes_free); | ||
| 287 | } | ||
| 328 | spin_unlock(&rbconn->list.list_lock); | 288 | spin_unlock(&rbconn->list.list_lock); |
| 329 | } | 289 | } |
| 330 | } | 290 | } |
| @@ -341,20 +301,19 @@ insert_tree(struct net *net, | |||
| 341 | struct rb_root *root, | 301 | struct rb_root *root, |
| 342 | unsigned int hash, | 302 | unsigned int hash, |
| 343 | const u32 *key, | 303 | const u32 *key, |
| 344 | u8 keylen, | ||
| 345 | const struct nf_conntrack_tuple *tuple, | 304 | const struct nf_conntrack_tuple *tuple, |
| 346 | const struct nf_conntrack_zone *zone) | 305 | const struct nf_conntrack_zone *zone) |
| 347 | { | 306 | { |
| 348 | enum nf_conncount_list_add ret; | ||
| 349 | struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES]; | 307 | struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES]; |
| 350 | struct rb_node **rbnode, *parent; | 308 | struct rb_node **rbnode, *parent; |
| 351 | struct nf_conncount_rb *rbconn; | 309 | struct nf_conncount_rb *rbconn; |
| 352 | struct nf_conncount_tuple *conn; | 310 | struct nf_conncount_tuple *conn; |
| 353 | unsigned int count = 0, gc_count = 0; | 311 | unsigned int count = 0, gc_count = 0; |
| 354 | bool node_found = false; | 312 | u8 keylen = data->keylen; |
| 355 | 313 | bool do_gc = true; | |
| 356 | spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); | ||
| 357 | 314 | ||
| 315 | spin_lock_bh(&nf_conncount_locks[hash]); | ||
| 316 | restart: | ||
| 358 | parent = NULL; | 317 | parent = NULL; |
| 359 | rbnode = &(root->rb_node); | 318 | rbnode = &(root->rb_node); |
| 360 | while (*rbnode) { | 319 | while (*rbnode) { |
| @@ -368,45 +327,32 @@ insert_tree(struct net *net, | |||
| 368 | } else if (diff > 0) { | 327 | } else if (diff > 0) { |
| 369 | rbnode = &((*rbnode)->rb_right); | 328 | rbnode = &((*rbnode)->rb_right); |
| 370 | } else { | 329 | } else { |
| 371 | /* unlikely: other cpu added node already */ | 330 | int ret; |
| 372 | node_found = true; | 331 | |
| 373 | ret = nf_conncount_add(&rbconn->list, tuple, zone); | 332 | ret = nf_conncount_add(net, &rbconn->list, tuple, zone); |
| 374 | if (ret == NF_CONNCOUNT_ERR) { | 333 | if (ret) |
| 375 | count = 0; /* hotdrop */ | 334 | count = 0; /* hotdrop */ |
| 376 | } else if (ret == NF_CONNCOUNT_ADDED) { | 335 | else |
| 377 | count = rbconn->list.count; | 336 | count = rbconn->list.count; |
| 378 | } else { | 337 | tree_nodes_free(root, gc_nodes, gc_count); |
| 379 | /* NF_CONNCOUNT_SKIP, rbconn is already | 338 | goto out_unlock; |
| 380 | * reclaimed by gc, insert a new tree node | ||
| 381 | */ | ||
| 382 | node_found = false; | ||
| 383 | } | ||
| 384 | break; | ||
| 385 | } | 339 | } |
| 386 | 340 | ||
| 387 | if (gc_count >= ARRAY_SIZE(gc_nodes)) | 341 | if (gc_count >= ARRAY_SIZE(gc_nodes)) |
| 388 | continue; | 342 | continue; |
| 389 | 343 | ||
| 390 | if (nf_conncount_gc_list(net, &rbconn->list)) | 344 | if (do_gc && nf_conncount_gc_list(net, &rbconn->list)) |
| 391 | gc_nodes[gc_count++] = rbconn; | 345 | gc_nodes[gc_count++] = rbconn; |
| 392 | } | 346 | } |
| 393 | 347 | ||
| 394 | if (gc_count) { | 348 | if (gc_count) { |
| 395 | tree_nodes_free(root, gc_nodes, gc_count); | 349 | tree_nodes_free(root, gc_nodes, gc_count); |
| 396 | /* tree_node_free before new allocation permits | 350 | schedule_gc_worker(data, hash); |
| 397 | * allocator to re-use newly free'd object. | 351 | gc_count = 0; |
| 398 | * | 352 | do_gc = false; |
| 399 | * This is a rare event; in most cases we will find | 353 | goto restart; |
| 400 | * existing node to re-use. (or gc_count is 0). | ||
| 401 | */ | ||
| 402 | |||
| 403 | if (gc_count >= ARRAY_SIZE(gc_nodes)) | ||
| 404 | schedule_gc_worker(data, hash); | ||
| 405 | } | 354 | } |
| 406 | 355 | ||
| 407 | if (node_found) | ||
| 408 | goto out_unlock; | ||
| 409 | |||
| 410 | /* expected case: match, insert new node */ | 356 | /* expected case: match, insert new node */ |
| 411 | rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC); | 357 | rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC); |
| 412 | if (rbconn == NULL) | 358 | if (rbconn == NULL) |
| @@ -430,7 +376,7 @@ insert_tree(struct net *net, | |||
| 430 | rb_link_node_rcu(&rbconn->node, parent, rbnode); | 376 | rb_link_node_rcu(&rbconn->node, parent, rbnode); |
| 431 | rb_insert_color(&rbconn->node, root); | 377 | rb_insert_color(&rbconn->node, root); |
| 432 | out_unlock: | 378 | out_unlock: |
| 433 | spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); | 379 | spin_unlock_bh(&nf_conncount_locks[hash]); |
| 434 | return count; | 380 | return count; |
| 435 | } | 381 | } |
| 436 | 382 | ||
| @@ -441,7 +387,6 @@ count_tree(struct net *net, | |||
| 441 | const struct nf_conntrack_tuple *tuple, | 387 | const struct nf_conntrack_tuple *tuple, |
| 442 | const struct nf_conntrack_zone *zone) | 388 | const struct nf_conntrack_zone *zone) |
| 443 | { | 389 | { |
| 444 | enum nf_conncount_list_add ret; | ||
| 445 | struct rb_root *root; | 390 | struct rb_root *root; |
| 446 | struct rb_node *parent; | 391 | struct rb_node *parent; |
| 447 | struct nf_conncount_rb *rbconn; | 392 | struct nf_conncount_rb *rbconn; |
| @@ -454,7 +399,6 @@ count_tree(struct net *net, | |||
| 454 | parent = rcu_dereference_raw(root->rb_node); | 399 | parent = rcu_dereference_raw(root->rb_node); |
| 455 | while (parent) { | 400 | while (parent) { |
| 456 | int diff; | 401 | int diff; |
| 457 | bool addit; | ||
| 458 | 402 | ||
| 459 | rbconn = rb_entry(parent, struct nf_conncount_rb, node); | 403 | rbconn = rb_entry(parent, struct nf_conncount_rb, node); |
| 460 | 404 | ||
| @@ -464,31 +408,36 @@ count_tree(struct net *net, | |||
| 464 | } else if (diff > 0) { | 408 | } else if (diff > 0) { |
| 465 | parent = rcu_dereference_raw(parent->rb_right); | 409 | parent = rcu_dereference_raw(parent->rb_right); |
| 466 | } else { | 410 | } else { |
| 467 | /* same source network -> be counted! */ | 411 | int ret; |
| 468 | nf_conncount_lookup(net, &rbconn->list, tuple, zone, | ||
| 469 | &addit); | ||
| 470 | 412 | ||
| 471 | if (!addit) | 413 | if (!tuple) { |
| 414 | nf_conncount_gc_list(net, &rbconn->list); | ||
| 472 | return rbconn->list.count; | 415 | return rbconn->list.count; |
| 416 | } | ||
| 473 | 417 | ||
| 474 | ret = nf_conncount_add(&rbconn->list, tuple, zone); | 418 | spin_lock_bh(&rbconn->list.list_lock); |
| 475 | if (ret == NF_CONNCOUNT_ERR) { | 419 | /* Node might be about to be free'd. |
| 476 | return 0; /* hotdrop */ | 420 | * We need to defer to insert_tree() in this case. |
| 477 | } else if (ret == NF_CONNCOUNT_ADDED) { | 421 | */ |
| 478 | return rbconn->list.count; | 422 | if (rbconn->list.count == 0) { |
| 479 | } else { | 423 | spin_unlock_bh(&rbconn->list.list_lock); |
| 480 | /* NF_CONNCOUNT_SKIP, rbconn is already | ||
| 481 | * reclaimed by gc, insert a new tree node | ||
| 482 | */ | ||
| 483 | break; | 424 | break; |
| 484 | } | 425 | } |
| 426 | |||
| 427 | /* same source network -> be counted! */ | ||
| 428 | ret = __nf_conncount_add(net, &rbconn->list, tuple, zone); | ||
| 429 | spin_unlock_bh(&rbconn->list.list_lock); | ||
| 430 | if (ret) | ||
| 431 | return 0; /* hotdrop */ | ||
| 432 | else | ||
| 433 | return rbconn->list.count; | ||
| 485 | } | 434 | } |
| 486 | } | 435 | } |
| 487 | 436 | ||
| 488 | if (!tuple) | 437 | if (!tuple) |
| 489 | return 0; | 438 | return 0; |
| 490 | 439 | ||
| 491 | return insert_tree(net, data, root, hash, key, keylen, tuple, zone); | 440 | return insert_tree(net, data, root, hash, key, tuple, zone); |
| 492 | } | 441 | } |
| 493 | 442 | ||
| 494 | static void tree_gc_worker(struct work_struct *work) | 443 | static void tree_gc_worker(struct work_struct *work) |
| @@ -499,27 +448,47 @@ static void tree_gc_worker(struct work_struct *work) | |||
| 499 | struct rb_node *node; | 448 | struct rb_node *node; |
| 500 | unsigned int tree, next_tree, gc_count = 0; | 449 | unsigned int tree, next_tree, gc_count = 0; |
| 501 | 450 | ||
| 502 | tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS; | 451 | tree = data->gc_tree % CONNCOUNT_SLOTS; |
| 503 | root = &data->root[tree]; | 452 | root = &data->root[tree]; |
| 504 | 453 | ||
| 454 | local_bh_disable(); | ||
| 505 | rcu_read_lock(); | 455 | rcu_read_lock(); |
| 506 | for (node = rb_first(root); node != NULL; node = rb_next(node)) { | 456 | for (node = rb_first(root); node != NULL; node = rb_next(node)) { |
| 507 | rbconn = rb_entry(node, struct nf_conncount_rb, node); | 457 | rbconn = rb_entry(node, struct nf_conncount_rb, node); |
| 508 | if (nf_conncount_gc_list(data->net, &rbconn->list)) | 458 | if (nf_conncount_gc_list(data->net, &rbconn->list)) |
| 509 | gc_nodes[gc_count++] = rbconn; | 459 | gc_count++; |
| 510 | } | 460 | } |
| 511 | rcu_read_unlock(); | 461 | rcu_read_unlock(); |
| 462 | local_bh_enable(); | ||
| 463 | |||
| 464 | cond_resched(); | ||
| 512 | 465 | ||
| 513 | spin_lock_bh(&nf_conncount_locks[tree]); | 466 | spin_lock_bh(&nf_conncount_locks[tree]); |
| 467 | if (gc_count < ARRAY_SIZE(gc_nodes)) | ||
| 468 | goto next; /* do not bother */ | ||
| 514 | 469 | ||
| 515 | if (gc_count) { | 470 | gc_count = 0; |
| 516 | tree_nodes_free(root, gc_nodes, gc_count); | 471 | node = rb_first(root); |
| 472 | while (node != NULL) { | ||
| 473 | rbconn = rb_entry(node, struct nf_conncount_rb, node); | ||
| 474 | node = rb_next(node); | ||
| 475 | |||
| 476 | if (rbconn->list.count > 0) | ||
| 477 | continue; | ||
| 478 | |||
| 479 | gc_nodes[gc_count++] = rbconn; | ||
| 480 | if (gc_count >= ARRAY_SIZE(gc_nodes)) { | ||
| 481 | tree_nodes_free(root, gc_nodes, gc_count); | ||
| 482 | gc_count = 0; | ||
| 483 | } | ||
| 517 | } | 484 | } |
| 518 | 485 | ||
| 486 | tree_nodes_free(root, gc_nodes, gc_count); | ||
| 487 | next: | ||
| 519 | clear_bit(tree, data->pending_trees); | 488 | clear_bit(tree, data->pending_trees); |
| 520 | 489 | ||
| 521 | next_tree = (tree + 1) % CONNCOUNT_SLOTS; | 490 | next_tree = (tree + 1) % CONNCOUNT_SLOTS; |
| 522 | next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS); | 491 | next_tree = find_next_bit(data->pending_trees, CONNCOUNT_SLOTS, next_tree); |
| 523 | 492 | ||
| 524 | if (next_tree < CONNCOUNT_SLOTS) { | 493 | if (next_tree < CONNCOUNT_SLOTS) { |
| 525 | data->gc_tree = next_tree; | 494 | data->gc_tree = next_tree; |
| @@ -621,10 +590,7 @@ static int __init nf_conncount_modinit(void) | |||
| 621 | { | 590 | { |
| 622 | int i; | 591 | int i; |
| 623 | 592 | ||
| 624 | BUILD_BUG_ON(CONNCOUNT_LOCK_SLOTS > CONNCOUNT_SLOTS); | 593 | for (i = 0; i < CONNCOUNT_SLOTS; ++i) |
| 625 | BUILD_BUG_ON((CONNCOUNT_SLOTS % CONNCOUNT_LOCK_SLOTS) != 0); | ||
| 626 | |||
| 627 | for (i = 0; i < CONNCOUNT_LOCK_SLOTS; ++i) | ||
| 628 | spin_lock_init(&nf_conncount_locks[i]); | 594 | spin_lock_init(&nf_conncount_locks[i]); |
| 629 | 595 | ||
| 630 | conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple", | 596 | conncount_conn_cachep = kmem_cache_create("nf_conncount_tuple", |
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fec814dace5a..2b0a93300dd7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c | |||
| @@ -5727,6 +5727,8 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, | |||
| 5727 | goto nla_put_failure; | 5727 | goto nla_put_failure; |
| 5728 | 5728 | ||
| 5729 | nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK); | 5729 | nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK); |
| 5730 | if (!nest) | ||
| 5731 | goto nla_put_failure; | ||
| 5730 | if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) || | 5732 | if (nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_NUM, htonl(flowtable->hooknum)) || |
| 5731 | nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority))) | 5733 | nla_put_be32(skb, NFTA_FLOWTABLE_HOOK_PRIORITY, htonl(flowtable->priority))) |
| 5732 | goto nla_put_failure; | 5734 | goto nla_put_failure; |
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index b90d96ba4a12..af1497ab9464 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c | |||
| @@ -30,7 +30,6 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv, | |||
| 30 | enum ip_conntrack_info ctinfo; | 30 | enum ip_conntrack_info ctinfo; |
| 31 | const struct nf_conn *ct; | 31 | const struct nf_conn *ct; |
| 32 | unsigned int count; | 32 | unsigned int count; |
| 33 | bool addit; | ||
| 34 | 33 | ||
| 35 | tuple_ptr = &tuple; | 34 | tuple_ptr = &tuple; |
| 36 | 35 | ||
| @@ -44,19 +43,12 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv, | |||
| 44 | return; | 43 | return; |
| 45 | } | 44 | } |
| 46 | 45 | ||
| 47 | nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone, | 46 | if (nf_conncount_add(nft_net(pkt), &priv->list, tuple_ptr, zone)) { |
| 48 | &addit); | ||
| 49 | count = priv->list.count; | ||
| 50 | |||
| 51 | if (!addit) | ||
| 52 | goto out; | ||
| 53 | |||
| 54 | if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) { | ||
| 55 | regs->verdict.code = NF_DROP; | 47 | regs->verdict.code = NF_DROP; |
| 56 | return; | 48 | return; |
| 57 | } | 49 | } |
| 58 | count++; | 50 | |
| 59 | out: | 51 | count = priv->list.count; |
| 60 | 52 | ||
| 61 | if ((count > priv->limit) ^ priv->invert) { | 53 | if ((count > priv->limit) ^ priv->invert) { |
| 62 | regs->verdict.code = NFT_BREAK; | 54 | regs->verdict.code = NFT_BREAK; |
