aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
authorFlorian Westphal <fw@strlen.de>2014-03-12 18:49:51 -0400
committerPablo Neira Ayuso <pablo@netfilter.org>2014-03-17 06:11:57 -0400
commit7d08487777c8b30dea34790734d708470faaf1e5 (patch)
tree143fc9e0ec00cab5fe98fe2dd89239e3c7386c03 /net/netfilter
parent50e0e9b12914dd82d1ece22d57bf8c146a1d1b52 (diff)
netfilter: connlimit: use rbtree for per-host conntrack obj storage
With current match design every invocation of the connlimit_match function means we have to perform (number_of_conntracks % 256) lookups in the conntrack table [ to perform GC/delete stale entries ]. This is also the reason why ____nf_conntrack_find() in perf top has > 20% cpu time per core. This patch changes the storage to rbtree which cuts down the number of ct objects that need testing. When looking up a new tuple, we only test the connections of the host objects we visit while searching for the wanted host/network (or the leaf we need to insert at). The slot count is reduced to 32. Increasing slot count doesn't speed up things much because of rbtree nature. before patch (50kpps rx, 10kpps tx): + 20.95% ksoftirqd/0 [nf_conntrack] [k] ____nf_conntrack_find + 20.50% ksoftirqd/1 [nf_conntrack] [k] ____nf_conntrack_find + 20.27% ksoftirqd/2 [nf_conntrack] [k] ____nf_conntrack_find + 5.76% ksoftirqd/1 [nf_conntrack] [k] hash_conntrack_raw + 5.39% ksoftirqd/2 [nf_conntrack] [k] hash_conntrack_raw + 5.35% ksoftirqd/0 [nf_conntrack] [k] hash_conntrack_raw after (90kpps, 51kpps tx): + 17.24% swapper [nf_conntrack] [k] ____nf_conntrack_find + 6.60% ksoftirqd/2 [nf_conntrack] [k] ____nf_conntrack_find + 2.73% swapper [nf_conntrack] [k] hash_conntrack_raw + 2.36% swapper [xt_connlimit] [k] count_tree Obvious disadvantages to previous version are the increase in code complexity and the increased memory cost. Partially based on Eric Dumazets fq scheduler. Reviewed-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/xt_connlimit.c224
1 files changed, 177 insertions, 47 deletions
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index dc5207f7a7fa..458464e7bd7a 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -19,6 +19,7 @@
19#include <linux/jhash.h> 19#include <linux/jhash.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/list.h> 21#include <linux/list.h>
22#include <linux/rbtree.h>
22#include <linux/module.h> 23#include <linux/module.h>
23#include <linux/random.h> 24#include <linux/random.h>
24#include <linux/skbuff.h> 25#include <linux/skbuff.h>
@@ -31,8 +32,9 @@
31#include <net/netfilter/nf_conntrack_tuple.h> 32#include <net/netfilter/nf_conntrack_tuple.h>
32#include <net/netfilter/nf_conntrack_zones.h> 33#include <net/netfilter/nf_conntrack_zones.h>
33 34
34#define CONNLIMIT_SLOTS 256 35#define CONNLIMIT_SLOTS 32
35#define CONNLIMIT_LOCK_SLOTS 32 36#define CONNLIMIT_LOCK_SLOTS 32
37#define CONNLIMIT_GC_MAX_NODES 8
36 38
37/* we will save the tuples of all connections we care about */ 39/* we will save the tuples of all connections we care about */
38struct xt_connlimit_conn { 40struct xt_connlimit_conn {
@@ -41,12 +43,20 @@ struct xt_connlimit_conn {
41 union nf_inet_addr addr; 43 union nf_inet_addr addr;
42}; 44};
43 45
46struct xt_connlimit_rb {
47 struct rb_node node;
48 struct hlist_head hhead; /* connections/hosts in same subnet */
49 union nf_inet_addr addr; /* search key */
50};
51
44struct xt_connlimit_data { 52struct xt_connlimit_data {
45 struct hlist_head iphash[CONNLIMIT_SLOTS]; 53 struct rb_root climit_root4[CONNLIMIT_SLOTS];
54 struct rb_root climit_root6[CONNLIMIT_SLOTS];
46 spinlock_t locks[CONNLIMIT_LOCK_SLOTS]; 55 spinlock_t locks[CONNLIMIT_LOCK_SLOTS];
47}; 56};
48 57
49static u_int32_t connlimit_rnd __read_mostly; 58static u_int32_t connlimit_rnd __read_mostly;
59static struct kmem_cache *connlimit_rb_cachep __read_mostly;
50static struct kmem_cache *connlimit_conn_cachep __read_mostly; 60static struct kmem_cache *connlimit_conn_cachep __read_mostly;
51 61
52static inline unsigned int connlimit_iphash(__be32 addr) 62static inline unsigned int connlimit_iphash(__be32 addr)
@@ -99,19 +109,33 @@ same_source_net(const union nf_inet_addr *addr,
99 } 109 }
100} 110}
101 111
102static int count_hlist(struct net *net, 112static bool add_hlist(struct hlist_head *head,
103 struct hlist_head *head, 113 const struct nf_conntrack_tuple *tuple,
104 const struct nf_conntrack_tuple *tuple, 114 const union nf_inet_addr *addr)
105 const union nf_inet_addr *addr, 115{
106 const union nf_inet_addr *mask, 116 struct xt_connlimit_conn *conn;
107 u_int8_t family, bool *addit) 117
118 conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
119 if (conn == NULL)
120 return false;
121 conn->tuple = *tuple;
122 conn->addr = *addr;
123 hlist_add_head(&conn->node, head);
124 return true;
125}
126
127static unsigned int check_hlist(struct net *net,
128 struct hlist_head *head,
129 const struct nf_conntrack_tuple *tuple,
130 bool *addit)
108{ 131{
109 const struct nf_conntrack_tuple_hash *found; 132 const struct nf_conntrack_tuple_hash *found;
110 struct xt_connlimit_conn *conn; 133 struct xt_connlimit_conn *conn;
111 struct hlist_node *n; 134 struct hlist_node *n;
112 struct nf_conn *found_ct; 135 struct nf_conn *found_ct;
113 int matches = 0; 136 unsigned int length = 0;
114 137
138 *addit = true;
115 rcu_read_lock(); 139 rcu_read_lock();
116 140
117 /* check the saved connections */ 141 /* check the saved connections */
@@ -144,30 +168,114 @@ static int count_hlist(struct net *net,
144 continue; 168 continue;
145 } 169 }
146 170
147 if (same_source_net(addr, mask, &conn->addr, family) == 0)
148 /* same source network -> be counted! */
149 ++matches;
150 nf_ct_put(found_ct); 171 nf_ct_put(found_ct);
172 length++;
151 } 173 }
152 174
153 rcu_read_unlock(); 175 rcu_read_unlock();
154 176
155 return matches; 177 return length;
156} 178}
157 179
158static bool add_hlist(struct hlist_head *head, 180static void tree_nodes_free(struct rb_root *root,
159 const struct nf_conntrack_tuple *tuple, 181 struct xt_connlimit_rb *gc_nodes[],
160 const union nf_inet_addr *addr) 182 unsigned int gc_count)
183{
184 struct xt_connlimit_rb *rbconn;
185
186 while (gc_count) {
187 rbconn = gc_nodes[--gc_count];
188 rb_erase(&rbconn->node, root);
189 kmem_cache_free(connlimit_rb_cachep, rbconn);
190 }
191}
192
193static unsigned int
194count_tree(struct net *net, struct rb_root *root,
195 const struct nf_conntrack_tuple *tuple,
196 const union nf_inet_addr *addr, const union nf_inet_addr *mask,
197 u8 family)
161{ 198{
199 struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
200 struct rb_node **rbnode, *parent;
201 struct xt_connlimit_rb *rbconn;
162 struct xt_connlimit_conn *conn; 202 struct xt_connlimit_conn *conn;
203 unsigned int gc_count;
204 bool no_gc = false;
205
206 restart:
207 gc_count = 0;
208 parent = NULL;
209 rbnode = &(root->rb_node);
210 while (*rbnode) {
211 int diff;
212 bool addit;
213
214 rbconn = container_of(*rbnode, struct xt_connlimit_rb, node);
215
216 parent = *rbnode;
217 diff = same_source_net(addr, mask, &rbconn->addr, family);
218 if (diff < 0) {
219 rbnode = &((*rbnode)->rb_left);
220 } else if (diff > 0) {
221 rbnode = &((*rbnode)->rb_right);
222 } else {
223 /* same source network -> be counted! */
224 unsigned int count;
225 count = check_hlist(net, &rbconn->hhead, tuple, &addit);
226
227 tree_nodes_free(root, gc_nodes, gc_count);
228 if (!addit)
229 return count;
230
231 if (!add_hlist(&rbconn->hhead, tuple, addr))
232 return 0; /* hotdrop */
233
234 return count + 1;
235 }
236
237 if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
238 continue;
239
240 /* only used for GC on hhead, retval and 'addit' ignored */
241 check_hlist(net, &rbconn->hhead, tuple, &addit);
242 if (hlist_empty(&rbconn->hhead))
243 gc_nodes[gc_count++] = rbconn;
244 }
245
246 if (gc_count) {
247 no_gc = true;
248 tree_nodes_free(root, gc_nodes, gc_count);
249 /* tree_node_free before new allocation permits
250 * allocator to re-use newly free'd object.
251 *
252 * This is a rare event; in most cases we will find
253 * existing node to re-use. (or gc_count is 0).
254 */
255 goto restart;
256 }
257
258 /* no match, need to insert new node */
259 rbconn = kmem_cache_alloc(connlimit_rb_cachep, GFP_ATOMIC);
260 if (rbconn == NULL)
261 return 0;
163 262
164 conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC); 263 conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
165 if (conn == NULL) 264 if (conn == NULL) {
166 return false; 265 kmem_cache_free(connlimit_rb_cachep, rbconn);
266 return 0;
267 }
268
167 conn->tuple = *tuple; 269 conn->tuple = *tuple;
168 conn->addr = *addr; 270 conn->addr = *addr;
169 hlist_add_head(&conn->node, head); 271 rbconn->addr = *addr;
170 return true; 272
273 INIT_HLIST_HEAD(&rbconn->hhead);
274 hlist_add_head(&conn->node, &rbconn->hhead);
275
276 rb_link_node(&rbconn->node, parent, rbnode);
277 rb_insert_color(&rbconn->node, root);
278 return 1;
171} 279}
172 280
173static int count_them(struct net *net, 281static int count_them(struct net *net,
@@ -177,26 +285,22 @@ static int count_them(struct net *net,
177 const union nf_inet_addr *mask, 285 const union nf_inet_addr *mask,
178 u_int8_t family) 286 u_int8_t family)
179{ 287{
180 struct hlist_head *hhead; 288 struct rb_root *root;
181 int count; 289 int count;
182 u32 hash; 290 u32 hash;
183 bool addit = true;
184 291
185 if (family == NFPROTO_IPV6) 292 if (family == NFPROTO_IPV6) {
186 hash = connlimit_iphash6(addr, mask); 293 hash = connlimit_iphash6(addr, mask);
187 else 294 root = &data->climit_root6[hash];
295 } else {
188 hash = connlimit_iphash(addr->ip & mask->ip); 296 hash = connlimit_iphash(addr->ip & mask->ip);
189 297 root = &data->climit_root4[hash];
190 hhead = &data->iphash[hash]; 298 }
191 299
192 spin_lock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]); 300 spin_lock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]);
193 count = count_hlist(net, hhead, tuple, addr, mask, family, &addit); 301
194 if (addit) { 302 count = count_tree(net, root, tuple, addr, mask, family);
195 if (add_hlist(hhead, tuple, addr)) 303
196 count++;
197 else
198 count = -ENOMEM;
199 }
200 spin_unlock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]); 304 spin_unlock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]);
201 305
202 return count; 306 return count;
@@ -212,7 +316,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
212 const struct nf_conntrack_tuple *tuple_ptr = &tuple; 316 const struct nf_conntrack_tuple *tuple_ptr = &tuple;
213 enum ip_conntrack_info ctinfo; 317 enum ip_conntrack_info ctinfo;
214 const struct nf_conn *ct; 318 const struct nf_conn *ct;
215 int connections; 319 unsigned int connections;
216 320
217 ct = nf_ct_get(skb, &ctinfo); 321 ct = nf_ct_get(skb, &ctinfo);
218 if (ct != NULL) 322 if (ct != NULL)
@@ -233,7 +337,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
233 337
234 connections = count_them(net, info->data, tuple_ptr, &addr, 338 connections = count_them(net, info->data, tuple_ptr, &addr,
235 &info->mask, par->family); 339 &info->mask, par->family);
236 if (connections < 0) 340 if (connections == 0)
237 /* kmalloc failed, drop it entirely */ 341 /* kmalloc failed, drop it entirely */
238 goto hotdrop; 342 goto hotdrop;
239 343
@@ -276,28 +380,44 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
276 for (i = 0; i < ARRAY_SIZE(info->data->locks); ++i) 380 for (i = 0; i < ARRAY_SIZE(info->data->locks); ++i)
277 spin_lock_init(&info->data->locks[i]); 381 spin_lock_init(&info->data->locks[i]);
278 382
279 for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) 383 for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
280 INIT_HLIST_HEAD(&info->data->iphash[i]); 384 info->data->climit_root4[i] = RB_ROOT;
385 for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
386 info->data->climit_root6[i] = RB_ROOT;
281 387
282 return 0; 388 return 0;
283} 389}
284 390
285static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) 391static void destroy_tree(struct rb_root *r)
286{ 392{
287 const struct xt_connlimit_info *info = par->matchinfo;
288 struct xt_connlimit_conn *conn; 393 struct xt_connlimit_conn *conn;
394 struct xt_connlimit_rb *rbconn;
289 struct hlist_node *n; 395 struct hlist_node *n;
290 struct hlist_head *hash = info->data->iphash; 396 struct rb_node *node;
291 unsigned int i;
292 397
293 nf_ct_l3proto_module_put(par->family); 398 while ((node = rb_first(r)) != NULL) {
399 rbconn = container_of(node, struct xt_connlimit_rb, node);
294 400
295 for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { 401 rb_erase(node, r);
296 hlist_for_each_entry_safe(conn, n, &hash[i], node) { 402
297 hlist_del(&conn->node); 403 hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node)
298 kmem_cache_free(connlimit_conn_cachep, conn); 404 kmem_cache_free(connlimit_conn_cachep, conn);
299 } 405
406 kmem_cache_free(connlimit_rb_cachep, rbconn);
300 } 407 }
408}
409
410static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
411{
412 const struct xt_connlimit_info *info = par->matchinfo;
413 unsigned int i;
414
415 nf_ct_l3proto_module_put(par->family);
416
417 for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
418 destroy_tree(&info->data->climit_root4[i]);
419 for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
420 destroy_tree(&info->data->climit_root6[i]);
301 421
302 kfree(info->data); 422 kfree(info->data);
303} 423}
@@ -326,9 +446,18 @@ static int __init connlimit_mt_init(void)
326 if (!connlimit_conn_cachep) 446 if (!connlimit_conn_cachep)
327 return -ENOMEM; 447 return -ENOMEM;
328 448
449 connlimit_rb_cachep = kmem_cache_create("xt_connlimit_rb",
450 sizeof(struct xt_connlimit_rb),
451 0, 0, NULL);
452 if (!connlimit_rb_cachep) {
453 kmem_cache_destroy(connlimit_conn_cachep);
454 return -ENOMEM;
455 }
329 ret = xt_register_match(&connlimit_mt_reg); 456 ret = xt_register_match(&connlimit_mt_reg);
330 if (ret != 0) 457 if (ret != 0) {
331 kmem_cache_destroy(connlimit_conn_cachep); 458 kmem_cache_destroy(connlimit_conn_cachep);
459 kmem_cache_destroy(connlimit_rb_cachep);
460 }
332 return ret; 461 return ret;
333} 462}
334 463
@@ -336,6 +465,7 @@ static void __exit connlimit_mt_exit(void)
336{ 465{
337 xt_unregister_match(&connlimit_mt_reg); 466 xt_unregister_match(&connlimit_mt_reg);
338 kmem_cache_destroy(connlimit_conn_cachep); 467 kmem_cache_destroy(connlimit_conn_cachep);
468 kmem_cache_destroy(connlimit_rb_cachep);
339} 469}
340 470
341module_init(connlimit_mt_init); 471module_init(connlimit_mt_init);