aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
authorChangli Gao <xiaosuo@gmail.com>2010-09-21 11:49:20 -0400
committerPatrick McHardy <kaber@trash.net>2010-09-21 11:49:20 -0400
commit99f07e91bef34db0fc8b1a224096e97f02dc0d56 (patch)
tree869d47e97adb3c04b42f38a55836f2518f6221f9 /net/netfilter
parent8a8030407f55a6aaedb51167c1a2383311fcd707 (diff)
netfilter: save the hash of the tuple in the original direction for latter use
Since we don't change the tuple in the original direction, we can save it in ct->tuplehash[IP_CT_DIR_REPLY].hnode.pprev for __nf_conntrack_confirm() use. __hash_conntrack() is split into two steps: hash_conntrack_raw() is used to get the raw hash, and __hash_bucket() is used to get the bucket id. In SYN-flood case, early_drop() doesn't need to recompute the hash again. Signed-off-by: Changli Gao <xiaosuo@gmail.com> Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/nf_conntrack_core.c112
1 files changed, 78 insertions, 34 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 4c0ad9b4dba0..1eacf8d9966a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -67,29 +67,40 @@ EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
67 67
68static unsigned int nf_conntrack_hash_rnd __read_mostly; 68static unsigned int nf_conntrack_hash_rnd __read_mostly;
69 69
70static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, 70static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
71 u16 zone, unsigned int size, unsigned int rnd)
72{ 71{
73 unsigned int n; 72 unsigned int n;
74 u_int32_t h;
75 73
76 /* The direction must be ignored, so we hash everything up to the 74 /* The direction must be ignored, so we hash everything up to the
77 * destination ports (which is a multiple of 4) and treat the last 75 * destination ports (which is a multiple of 4) and treat the last
78 * three bytes manually. 76 * three bytes manually.
79 */ 77 */
80 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); 78 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
81 h = jhash2((u32 *)tuple, n, 79 return jhash2((u32 *)tuple, n, zone ^ nf_conntrack_hash_rnd ^
82 zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) | 80 (((__force __u16)tuple->dst.u.all << 16) |
83 tuple->dst.protonum)); 81 tuple->dst.protonum));
82}
83
84static u32 __hash_bucket(u32 hash, unsigned int size)
85{
86 return ((u64)hash * size) >> 32;
87}
88
89static u32 hash_bucket(u32 hash, const struct net *net)
90{
91 return __hash_bucket(hash, net->ct.htable_size);
92}
84 93
85 return ((u64)h * size) >> 32; 94static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
95 u16 zone, unsigned int size)
96{
97 return __hash_bucket(hash_conntrack_raw(tuple, zone), size);
86} 98}
87 99
88static inline u_int32_t hash_conntrack(const struct net *net, u16 zone, 100static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
89 const struct nf_conntrack_tuple *tuple) 101 const struct nf_conntrack_tuple *tuple)
90{ 102{
91 return __hash_conntrack(tuple, zone, net->ct.htable_size, 103 return __hash_conntrack(tuple, zone, net->ct.htable_size);
92 nf_conntrack_hash_rnd);
93} 104}
94 105
95bool 106bool
@@ -291,20 +302,20 @@ static void death_by_timeout(unsigned long ul_conntrack)
291 * OR 302 * OR
292 * - Caller must lock nf_conntrack_lock before calling this function 303 * - Caller must lock nf_conntrack_lock before calling this function
293 */ 304 */
294struct nf_conntrack_tuple_hash * 305static struct nf_conntrack_tuple_hash *
295__nf_conntrack_find(struct net *net, u16 zone, 306____nf_conntrack_find(struct net *net, u16 zone,
296 const struct nf_conntrack_tuple *tuple) 307 const struct nf_conntrack_tuple *tuple, u32 hash)
297{ 308{
298 struct nf_conntrack_tuple_hash *h; 309 struct nf_conntrack_tuple_hash *h;
299 struct hlist_nulls_node *n; 310 struct hlist_nulls_node *n;
300 unsigned int hash = hash_conntrack(net, zone, tuple); 311 unsigned int bucket = hash_bucket(hash, net);
301 312
302 /* Disable BHs the entire time since we normally need to disable them 313 /* Disable BHs the entire time since we normally need to disable them
303 * at least once for the stats anyway. 314 * at least once for the stats anyway.
304 */ 315 */
305 local_bh_disable(); 316 local_bh_disable();
306begin: 317begin:
307 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { 318 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) {
308 if (nf_ct_tuple_equal(tuple, &h->tuple) && 319 if (nf_ct_tuple_equal(tuple, &h->tuple) &&
309 nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) { 320 nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) {
310 NF_CT_STAT_INC(net, found); 321 NF_CT_STAT_INC(net, found);
@@ -318,7 +329,7 @@ begin:
318 * not the expected one, we must restart lookup. 329 * not the expected one, we must restart lookup.
319 * We probably met an item that was moved to another chain. 330 * We probably met an item that was moved to another chain.
320 */ 331 */
321 if (get_nulls_value(n) != hash) { 332 if (get_nulls_value(n) != bucket) {
322 NF_CT_STAT_INC(net, search_restart); 333 NF_CT_STAT_INC(net, search_restart);
323 goto begin; 334 goto begin;
324 } 335 }
@@ -326,19 +337,27 @@ begin:
326 337
327 return NULL; 338 return NULL;
328} 339}
340
341struct nf_conntrack_tuple_hash *
342__nf_conntrack_find(struct net *net, u16 zone,
343 const struct nf_conntrack_tuple *tuple)
344{
345 return ____nf_conntrack_find(net, zone, tuple,
346 hash_conntrack_raw(tuple, zone));
347}
329EXPORT_SYMBOL_GPL(__nf_conntrack_find); 348EXPORT_SYMBOL_GPL(__nf_conntrack_find);
330 349
331/* Find a connection corresponding to a tuple. */ 350/* Find a connection corresponding to a tuple. */
332struct nf_conntrack_tuple_hash * 351static struct nf_conntrack_tuple_hash *
333nf_conntrack_find_get(struct net *net, u16 zone, 352__nf_conntrack_find_get(struct net *net, u16 zone,
334 const struct nf_conntrack_tuple *tuple) 353 const struct nf_conntrack_tuple *tuple, u32 hash)
335{ 354{
336 struct nf_conntrack_tuple_hash *h; 355 struct nf_conntrack_tuple_hash *h;
337 struct nf_conn *ct; 356 struct nf_conn *ct;
338 357
339 rcu_read_lock(); 358 rcu_read_lock();
340begin: 359begin:
341 h = __nf_conntrack_find(net, zone, tuple); 360 h = ____nf_conntrack_find(net, zone, tuple, hash);
342 if (h) { 361 if (h) {
343 ct = nf_ct_tuplehash_to_ctrack(h); 362 ct = nf_ct_tuplehash_to_ctrack(h);
344 if (unlikely(nf_ct_is_dying(ct) || 363 if (unlikely(nf_ct_is_dying(ct) ||
@@ -356,6 +375,14 @@ begin:
356 375
357 return h; 376 return h;
358} 377}
378
379struct nf_conntrack_tuple_hash *
380nf_conntrack_find_get(struct net *net, u16 zone,
381 const struct nf_conntrack_tuple *tuple)
382{
383 return __nf_conntrack_find_get(net, zone, tuple,
384 hash_conntrack_raw(tuple, zone));
385}
359EXPORT_SYMBOL_GPL(nf_conntrack_find_get); 386EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
360 387
361static void __nf_conntrack_hash_insert(struct nf_conn *ct, 388static void __nf_conntrack_hash_insert(struct nf_conn *ct,
@@ -408,8 +435,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
408 return NF_ACCEPT; 435 return NF_ACCEPT;
409 436
410 zone = nf_ct_zone(ct); 437 zone = nf_ct_zone(ct);
411 hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 438 /* reuse the hash saved before */
412 repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 439 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
440 hash = hash_bucket(hash, net);
441 repl_hash = hash_conntrack(net, zone,
442 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
413 443
414 /* We're not in hash table, and we refuse to set up related 444 /* We're not in hash table, and we refuse to set up related
415 connections for unconfirmed conns. But packet copies and 445 connections for unconfirmed conns. But packet copies and
@@ -566,10 +596,11 @@ static noinline int early_drop(struct net *net, unsigned int hash)
566 return dropped; 596 return dropped;
567} 597}
568 598
569struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, 599static struct nf_conn *
570 const struct nf_conntrack_tuple *orig, 600__nf_conntrack_alloc(struct net *net, u16 zone,
571 const struct nf_conntrack_tuple *repl, 601 const struct nf_conntrack_tuple *orig,
572 gfp_t gfp) 602 const struct nf_conntrack_tuple *repl,
603 gfp_t gfp, u32 hash)
573{ 604{
574 struct nf_conn *ct; 605 struct nf_conn *ct;
575 606
@@ -585,6 +616,9 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
585 get_random_bytes(&rand, sizeof(rand)); 616 get_random_bytes(&rand, sizeof(rand));
586 } while (!rand); 617 } while (!rand);
587 cmpxchg(&nf_conntrack_hash_rnd, 0, rand); 618 cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
619
620 /* recompute the hash as nf_conntrack_hash_rnd is initialized */
621 hash = hash_conntrack_raw(orig, zone);
588 } 622 }
589 623
590 /* We don't want any race condition at early drop stage */ 624 /* We don't want any race condition at early drop stage */
@@ -592,8 +626,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
592 626
593 if (nf_conntrack_max && 627 if (nf_conntrack_max &&
594 unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { 628 unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
595 unsigned int hash = hash_conntrack(net, zone, orig); 629 if (!early_drop(net, hash_bucket(hash, net))) {
596 if (!early_drop(net, hash)) {
597 atomic_dec(&net->ct.count); 630 atomic_dec(&net->ct.count);
598 if (net_ratelimit()) 631 if (net_ratelimit())
599 printk(KERN_WARNING 632 printk(KERN_WARNING
@@ -623,7 +656,8 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
623 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; 656 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
624 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; 657 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
625 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; 658 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
626 ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL; 659 /* save hash for reusing when confirming */
660 *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
627 /* Don't set timer yet: wait for confirmation */ 661 /* Don't set timer yet: wait for confirmation */
628 setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); 662 setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
629 write_pnet(&ct->ct_net, net); 663 write_pnet(&ct->ct_net, net);
@@ -650,6 +684,14 @@ out_free:
650 return ERR_PTR(-ENOMEM); 684 return ERR_PTR(-ENOMEM);
651#endif 685#endif
652} 686}
687
688struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
689 const struct nf_conntrack_tuple *orig,
690 const struct nf_conntrack_tuple *repl,
691 gfp_t gfp)
692{
693 return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
694}
653EXPORT_SYMBOL_GPL(nf_conntrack_alloc); 695EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
654 696
655void nf_conntrack_free(struct nf_conn *ct) 697void nf_conntrack_free(struct nf_conn *ct)
@@ -671,7 +713,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
671 struct nf_conntrack_l3proto *l3proto, 713 struct nf_conntrack_l3proto *l3proto,
672 struct nf_conntrack_l4proto *l4proto, 714 struct nf_conntrack_l4proto *l4proto,
673 struct sk_buff *skb, 715 struct sk_buff *skb,
674 unsigned int dataoff) 716 unsigned int dataoff, u32 hash)
675{ 717{
676 struct nf_conn *ct; 718 struct nf_conn *ct;
677 struct nf_conn_help *help; 719 struct nf_conn_help *help;
@@ -685,7 +727,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
685 return NULL; 727 return NULL;
686 } 728 }
687 729
688 ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC); 730 ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
731 hash);
689 if (IS_ERR(ct)) { 732 if (IS_ERR(ct)) {
690 pr_debug("Can't allocate conntrack.\n"); 733 pr_debug("Can't allocate conntrack.\n");
691 return (struct nf_conntrack_tuple_hash *)ct; 734 return (struct nf_conntrack_tuple_hash *)ct;
@@ -762,6 +805,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
762 struct nf_conntrack_tuple_hash *h; 805 struct nf_conntrack_tuple_hash *h;
763 struct nf_conn *ct; 806 struct nf_conn *ct;
764 u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; 807 u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
808 u32 hash;
765 809
766 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), 810 if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
767 dataoff, l3num, protonum, &tuple, l3proto, 811 dataoff, l3num, protonum, &tuple, l3proto,
@@ -771,10 +815,11 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
771 } 815 }
772 816
773 /* look for tuple match */ 817 /* look for tuple match */
774 h = nf_conntrack_find_get(net, zone, &tuple); 818 hash = hash_conntrack_raw(&tuple, zone);
819 h = __nf_conntrack_find_get(net, zone, &tuple, hash);
775 if (!h) { 820 if (!h) {
776 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, 821 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
777 skb, dataoff); 822 skb, dataoff, hash);
778 if (!h) 823 if (!h)
779 return NULL; 824 return NULL;
780 if (IS_ERR(h)) 825 if (IS_ERR(h))
@@ -1314,8 +1359,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1314 ct = nf_ct_tuplehash_to_ctrack(h); 1359 ct = nf_ct_tuplehash_to_ctrack(h);
1315 hlist_nulls_del_rcu(&h->hnnode); 1360 hlist_nulls_del_rcu(&h->hnnode);
1316 bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct), 1361 bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
1317 hashsize, 1362 hashsize);
1318 nf_conntrack_hash_rnd);
1319 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); 1363 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
1320 } 1364 }
1321 } 1365 }