diff options
author | Changli Gao <xiaosuo@gmail.com> | 2010-09-21 11:49:20 -0400 |
---|---|---|
committer | Patrick McHardy <kaber@trash.net> | 2010-09-21 11:49:20 -0400 |
commit | 99f07e91bef34db0fc8b1a224096e97f02dc0d56 (patch) | |
tree | 869d47e97adb3c04b42f38a55836f2518f6221f9 /net/netfilter | |
parent | 8a8030407f55a6aaedb51167c1a2383311fcd707 (diff) |
netfilter: save the hash of the tuple in the original direction for latter use
Since we don't change the tuple in the original direction, we can save it
in ct->tuplehash[IP_CT_DIR_REPLY].hnode.pprev for __nf_conntrack_confirm()
use.
__hash_conntrack() is split into two steps: hash_conntrack_raw() is used
to get the raw hash, and __hash_bucket() is used to get the bucket id.
In SYN-flood case, early_drop() doesn't need to recompute the hash again.
Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Diffstat (limited to 'net/netfilter')
-rw-r--r-- | net/netfilter/nf_conntrack_core.c | 112 |
1 files changed, 78 insertions, 34 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 4c0ad9b4dba0..1eacf8d9966a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c | |||
@@ -67,29 +67,40 @@ EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); | |||
67 | 67 | ||
68 | static unsigned int nf_conntrack_hash_rnd __read_mostly; | 68 | static unsigned int nf_conntrack_hash_rnd __read_mostly; |
69 | 69 | ||
70 | static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, | 70 | static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone) |
71 | u16 zone, unsigned int size, unsigned int rnd) | ||
72 | { | 71 | { |
73 | unsigned int n; | 72 | unsigned int n; |
74 | u_int32_t h; | ||
75 | 73 | ||
76 | /* The direction must be ignored, so we hash everything up to the | 74 | /* The direction must be ignored, so we hash everything up to the |
77 | * destination ports (which is a multiple of 4) and treat the last | 75 | * destination ports (which is a multiple of 4) and treat the last |
78 | * three bytes manually. | 76 | * three bytes manually. |
79 | */ | 77 | */ |
80 | n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); | 78 | n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); |
81 | h = jhash2((u32 *)tuple, n, | 79 | return jhash2((u32 *)tuple, n, zone ^ nf_conntrack_hash_rnd ^ |
82 | zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) | | 80 | (((__force __u16)tuple->dst.u.all << 16) | |
83 | tuple->dst.protonum)); | 81 | tuple->dst.protonum)); |
82 | } | ||
83 | |||
84 | static u32 __hash_bucket(u32 hash, unsigned int size) | ||
85 | { | ||
86 | return ((u64)hash * size) >> 32; | ||
87 | } | ||
88 | |||
89 | static u32 hash_bucket(u32 hash, const struct net *net) | ||
90 | { | ||
91 | return __hash_bucket(hash, net->ct.htable_size); | ||
92 | } | ||
84 | 93 | ||
85 | return ((u64)h * size) >> 32; | 94 | static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, |
95 | u16 zone, unsigned int size) | ||
96 | { | ||
97 | return __hash_bucket(hash_conntrack_raw(tuple, zone), size); | ||
86 | } | 98 | } |
87 | 99 | ||
88 | static inline u_int32_t hash_conntrack(const struct net *net, u16 zone, | 100 | static inline u_int32_t hash_conntrack(const struct net *net, u16 zone, |
89 | const struct nf_conntrack_tuple *tuple) | 101 | const struct nf_conntrack_tuple *tuple) |
90 | { | 102 | { |
91 | return __hash_conntrack(tuple, zone, net->ct.htable_size, | 103 | return __hash_conntrack(tuple, zone, net->ct.htable_size); |
92 | nf_conntrack_hash_rnd); | ||
93 | } | 104 | } |
94 | 105 | ||
95 | bool | 106 | bool |
@@ -291,20 +302,20 @@ static void death_by_timeout(unsigned long ul_conntrack) | |||
291 | * OR | 302 | * OR |
292 | * - Caller must lock nf_conntrack_lock before calling this function | 303 | * - Caller must lock nf_conntrack_lock before calling this function |
293 | */ | 304 | */ |
294 | struct nf_conntrack_tuple_hash * | 305 | static struct nf_conntrack_tuple_hash * |
295 | __nf_conntrack_find(struct net *net, u16 zone, | 306 | ____nf_conntrack_find(struct net *net, u16 zone, |
296 | const struct nf_conntrack_tuple *tuple) | 307 | const struct nf_conntrack_tuple *tuple, u32 hash) |
297 | { | 308 | { |
298 | struct nf_conntrack_tuple_hash *h; | 309 | struct nf_conntrack_tuple_hash *h; |
299 | struct hlist_nulls_node *n; | 310 | struct hlist_nulls_node *n; |
300 | unsigned int hash = hash_conntrack(net, zone, tuple); | 311 | unsigned int bucket = hash_bucket(hash, net); |
301 | 312 | ||
302 | /* Disable BHs the entire time since we normally need to disable them | 313 | /* Disable BHs the entire time since we normally need to disable them |
303 | * at least once for the stats anyway. | 314 | * at least once for the stats anyway. |
304 | */ | 315 | */ |
305 | local_bh_disable(); | 316 | local_bh_disable(); |
306 | begin: | 317 | begin: |
307 | hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { | 318 | hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) { |
308 | if (nf_ct_tuple_equal(tuple, &h->tuple) && | 319 | if (nf_ct_tuple_equal(tuple, &h->tuple) && |
309 | nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) { | 320 | nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) { |
310 | NF_CT_STAT_INC(net, found); | 321 | NF_CT_STAT_INC(net, found); |
@@ -318,7 +329,7 @@ begin: | |||
318 | * not the expected one, we must restart lookup. | 329 | * not the expected one, we must restart lookup. |
319 | * We probably met an item that was moved to another chain. | 330 | * We probably met an item that was moved to another chain. |
320 | */ | 331 | */ |
321 | if (get_nulls_value(n) != hash) { | 332 | if (get_nulls_value(n) != bucket) { |
322 | NF_CT_STAT_INC(net, search_restart); | 333 | NF_CT_STAT_INC(net, search_restart); |
323 | goto begin; | 334 | goto begin; |
324 | } | 335 | } |
@@ -326,19 +337,27 @@ begin: | |||
326 | 337 | ||
327 | return NULL; | 338 | return NULL; |
328 | } | 339 | } |
340 | |||
341 | struct nf_conntrack_tuple_hash * | ||
342 | __nf_conntrack_find(struct net *net, u16 zone, | ||
343 | const struct nf_conntrack_tuple *tuple) | ||
344 | { | ||
345 | return ____nf_conntrack_find(net, zone, tuple, | ||
346 | hash_conntrack_raw(tuple, zone)); | ||
347 | } | ||
329 | EXPORT_SYMBOL_GPL(__nf_conntrack_find); | 348 | EXPORT_SYMBOL_GPL(__nf_conntrack_find); |
330 | 349 | ||
331 | /* Find a connection corresponding to a tuple. */ | 350 | /* Find a connection corresponding to a tuple. */ |
332 | struct nf_conntrack_tuple_hash * | 351 | static struct nf_conntrack_tuple_hash * |
333 | nf_conntrack_find_get(struct net *net, u16 zone, | 352 | __nf_conntrack_find_get(struct net *net, u16 zone, |
334 | const struct nf_conntrack_tuple *tuple) | 353 | const struct nf_conntrack_tuple *tuple, u32 hash) |
335 | { | 354 | { |
336 | struct nf_conntrack_tuple_hash *h; | 355 | struct nf_conntrack_tuple_hash *h; |
337 | struct nf_conn *ct; | 356 | struct nf_conn *ct; |
338 | 357 | ||
339 | rcu_read_lock(); | 358 | rcu_read_lock(); |
340 | begin: | 359 | begin: |
341 | h = __nf_conntrack_find(net, zone, tuple); | 360 | h = ____nf_conntrack_find(net, zone, tuple, hash); |
342 | if (h) { | 361 | if (h) { |
343 | ct = nf_ct_tuplehash_to_ctrack(h); | 362 | ct = nf_ct_tuplehash_to_ctrack(h); |
344 | if (unlikely(nf_ct_is_dying(ct) || | 363 | if (unlikely(nf_ct_is_dying(ct) || |
@@ -356,6 +375,14 @@ begin: | |||
356 | 375 | ||
357 | return h; | 376 | return h; |
358 | } | 377 | } |
378 | |||
379 | struct nf_conntrack_tuple_hash * | ||
380 | nf_conntrack_find_get(struct net *net, u16 zone, | ||
381 | const struct nf_conntrack_tuple *tuple) | ||
382 | { | ||
383 | return __nf_conntrack_find_get(net, zone, tuple, | ||
384 | hash_conntrack_raw(tuple, zone)); | ||
385 | } | ||
359 | EXPORT_SYMBOL_GPL(nf_conntrack_find_get); | 386 | EXPORT_SYMBOL_GPL(nf_conntrack_find_get); |
360 | 387 | ||
361 | static void __nf_conntrack_hash_insert(struct nf_conn *ct, | 388 | static void __nf_conntrack_hash_insert(struct nf_conn *ct, |
@@ -408,8 +435,11 @@ __nf_conntrack_confirm(struct sk_buff *skb) | |||
408 | return NF_ACCEPT; | 435 | return NF_ACCEPT; |
409 | 436 | ||
410 | zone = nf_ct_zone(ct); | 437 | zone = nf_ct_zone(ct); |
411 | hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | 438 | /* reuse the hash saved before */ |
412 | repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); | 439 | hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; |
440 | hash = hash_bucket(hash, net); | ||
441 | repl_hash = hash_conntrack(net, zone, | ||
442 | &ct->tuplehash[IP_CT_DIR_REPLY].tuple); | ||
413 | 443 | ||
414 | /* We're not in hash table, and we refuse to set up related | 444 | /* We're not in hash table, and we refuse to set up related |
415 | connections for unconfirmed conns. But packet copies and | 445 | connections for unconfirmed conns. But packet copies and |
@@ -566,10 +596,11 @@ static noinline int early_drop(struct net *net, unsigned int hash) | |||
566 | return dropped; | 596 | return dropped; |
567 | } | 597 | } |
568 | 598 | ||
569 | struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, | 599 | static struct nf_conn * |
570 | const struct nf_conntrack_tuple *orig, | 600 | __nf_conntrack_alloc(struct net *net, u16 zone, |
571 | const struct nf_conntrack_tuple *repl, | 601 | const struct nf_conntrack_tuple *orig, |
572 | gfp_t gfp) | 602 | const struct nf_conntrack_tuple *repl, |
603 | gfp_t gfp, u32 hash) | ||
573 | { | 604 | { |
574 | struct nf_conn *ct; | 605 | struct nf_conn *ct; |
575 | 606 | ||
@@ -585,6 +616,9 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, | |||
585 | get_random_bytes(&rand, sizeof(rand)); | 616 | get_random_bytes(&rand, sizeof(rand)); |
586 | } while (!rand); | 617 | } while (!rand); |
587 | cmpxchg(&nf_conntrack_hash_rnd, 0, rand); | 618 | cmpxchg(&nf_conntrack_hash_rnd, 0, rand); |
619 | |||
620 | /* recompute the hash as nf_conntrack_hash_rnd is initialized */ | ||
621 | hash = hash_conntrack_raw(orig, zone); | ||
588 | } | 622 | } |
589 | 623 | ||
590 | /* We don't want any race condition at early drop stage */ | 624 | /* We don't want any race condition at early drop stage */ |
@@ -592,8 +626,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, | |||
592 | 626 | ||
593 | if (nf_conntrack_max && | 627 | if (nf_conntrack_max && |
594 | unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { | 628 | unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { |
595 | unsigned int hash = hash_conntrack(net, zone, orig); | 629 | if (!early_drop(net, hash_bucket(hash, net))) { |
596 | if (!early_drop(net, hash)) { | ||
597 | atomic_dec(&net->ct.count); | 630 | atomic_dec(&net->ct.count); |
598 | if (net_ratelimit()) | 631 | if (net_ratelimit()) |
599 | printk(KERN_WARNING | 632 | printk(KERN_WARNING |
@@ -623,7 +656,8 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, | |||
623 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; | 656 | ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; |
624 | ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; | 657 | ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; |
625 | ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; | 658 | ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; |
626 | ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL; | 659 | /* save hash for reusing when confirming */ |
660 | *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; | ||
627 | /* Don't set timer yet: wait for confirmation */ | 661 | /* Don't set timer yet: wait for confirmation */ |
628 | setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); | 662 | setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); |
629 | write_pnet(&ct->ct_net, net); | 663 | write_pnet(&ct->ct_net, net); |
@@ -650,6 +684,14 @@ out_free: | |||
650 | return ERR_PTR(-ENOMEM); | 684 | return ERR_PTR(-ENOMEM); |
651 | #endif | 685 | #endif |
652 | } | 686 | } |
687 | |||
688 | struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, | ||
689 | const struct nf_conntrack_tuple *orig, | ||
690 | const struct nf_conntrack_tuple *repl, | ||
691 | gfp_t gfp) | ||
692 | { | ||
693 | return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0); | ||
694 | } | ||
653 | EXPORT_SYMBOL_GPL(nf_conntrack_alloc); | 695 | EXPORT_SYMBOL_GPL(nf_conntrack_alloc); |
654 | 696 | ||
655 | void nf_conntrack_free(struct nf_conn *ct) | 697 | void nf_conntrack_free(struct nf_conn *ct) |
@@ -671,7 +713,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, | |||
671 | struct nf_conntrack_l3proto *l3proto, | 713 | struct nf_conntrack_l3proto *l3proto, |
672 | struct nf_conntrack_l4proto *l4proto, | 714 | struct nf_conntrack_l4proto *l4proto, |
673 | struct sk_buff *skb, | 715 | struct sk_buff *skb, |
674 | unsigned int dataoff) | 716 | unsigned int dataoff, u32 hash) |
675 | { | 717 | { |
676 | struct nf_conn *ct; | 718 | struct nf_conn *ct; |
677 | struct nf_conn_help *help; | 719 | struct nf_conn_help *help; |
@@ -685,7 +727,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, | |||
685 | return NULL; | 727 | return NULL; |
686 | } | 728 | } |
687 | 729 | ||
688 | ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC); | 730 | ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC, |
731 | hash); | ||
689 | if (IS_ERR(ct)) { | 732 | if (IS_ERR(ct)) { |
690 | pr_debug("Can't allocate conntrack.\n"); | 733 | pr_debug("Can't allocate conntrack.\n"); |
691 | return (struct nf_conntrack_tuple_hash *)ct; | 734 | return (struct nf_conntrack_tuple_hash *)ct; |
@@ -762,6 +805,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, | |||
762 | struct nf_conntrack_tuple_hash *h; | 805 | struct nf_conntrack_tuple_hash *h; |
763 | struct nf_conn *ct; | 806 | struct nf_conn *ct; |
764 | u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; | 807 | u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; |
808 | u32 hash; | ||
765 | 809 | ||
766 | if (!nf_ct_get_tuple(skb, skb_network_offset(skb), | 810 | if (!nf_ct_get_tuple(skb, skb_network_offset(skb), |
767 | dataoff, l3num, protonum, &tuple, l3proto, | 811 | dataoff, l3num, protonum, &tuple, l3proto, |
@@ -771,10 +815,11 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, | |||
771 | } | 815 | } |
772 | 816 | ||
773 | /* look for tuple match */ | 817 | /* look for tuple match */ |
774 | h = nf_conntrack_find_get(net, zone, &tuple); | 818 | hash = hash_conntrack_raw(&tuple, zone); |
819 | h = __nf_conntrack_find_get(net, zone, &tuple, hash); | ||
775 | if (!h) { | 820 | if (!h) { |
776 | h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, | 821 | h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, |
777 | skb, dataoff); | 822 | skb, dataoff, hash); |
778 | if (!h) | 823 | if (!h) |
779 | return NULL; | 824 | return NULL; |
780 | if (IS_ERR(h)) | 825 | if (IS_ERR(h)) |
@@ -1314,8 +1359,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) | |||
1314 | ct = nf_ct_tuplehash_to_ctrack(h); | 1359 | ct = nf_ct_tuplehash_to_ctrack(h); |
1315 | hlist_nulls_del_rcu(&h->hnnode); | 1360 | hlist_nulls_del_rcu(&h->hnnode); |
1316 | bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct), | 1361 | bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct), |
1317 | hashsize, | 1362 | hashsize); |
1318 | nf_conntrack_hash_rnd); | ||
1319 | hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); | 1363 | hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); |
1320 | } | 1364 | } |
1321 | } | 1365 | } |