aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorFlorian Westphal <fw@strlen.de>2016-11-16 09:13:36 -0500
committerPablo Neira Ayuso <pablo@netfilter.org>2016-11-24 08:43:34 -0500
commit7223ecd4669921cb2a709193521967aaa2b06862 (patch)
tree58ead0631aa12ea4b0744094e061e5aaeabd201c /net
parent728e87b49605f7ee02c0415c8255d3d185a36154 (diff)
netfilter: nat: switch to new rhlist interface
I got offlist bug report about failing connections and high cpu usage. This happens because we hit 'elasticity' checks in rhashtable that refuses bucket list exceeding 16 entries. The nat bysrc hash unfortunately needs to insert distinct objects that share same key and are identical (have same source tuple), this cannot be avoided. Switch to the rhlist interface which is designed for this. The nulls_base is removed here, I don't think its needed: A (unlikely) false positive results in unneeded port clash resolution, a false negative results in packet drop during conntrack confirmation, when we try to insert the duplicate into main conntrack hash table. Tested by adding multiple ip addresses to host, then adding iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE ... and then creating multiple connections, from same source port but different addresses: for i in $(seq 2000 2032);do nc -p 1234 192.168.7.1 $i > /dev/null & done (all of these then get hashed to same bysource slot) Then, to test that nat conflict resultion is working: nc -s 10.0.0.1 -p 1234 192.168.7.1 2000 nc -s 10.0.0.2 -p 1234 192.168.7.1 2000 tcp .. src=10.0.0.1 dst=192.168.7.1 sport=1234 dport=2000 src=192.168.7.1 dst=192.168.7.10 sport=2000 dport=1024 [ASSURED] tcp .. src=10.0.0.2 dst=192.168.7.1 sport=1234 dport=2000 src=192.168.7.1 dst=192.168.7.10 sport=2000 dport=1025 [ASSURED] tcp .. src=192.168.7.10 dst=192.168.7.1 sport=1234 dport=2000 src=192.168.7.1 dst=192.168.7.10 sport=2000 dport=1234 [ASSURED] tcp .. src=192.168.7.10 dst=192.168.7.1 sport=1234 dport=2001 src=192.168.7.1 dst=192.168.7.10 sport=2001 dport=1234 [ASSURED] [..] -> nat altered source ports to 1024 and 1025, respectively. This can also be confirmed on destination host which shows ESTAB 0 0 192.168.7.1:2000 192.168.7.10:1024 ESTAB 0 0 192.168.7.1:2000 192.168.7.10:1025 ESTAB 0 0 192.168.7.1:2000 192.168.7.10:1234 Cc: Herbert Xu <herbert@gondor.apana.org.au> Fixes: 870190a9ec907 ("netfilter: nat: convert nat bysrc hash to rhashtable") Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Diffstat (limited to 'net')
-rw-r--r--net/netfilter/nf_nat_core.c40
1 files changed, 24 insertions, 16 deletions
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index c632429706eb..5b9c884a452e 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -42,7 +42,7 @@ struct nf_nat_conn_key {
42 const struct nf_conntrack_zone *zone; 42 const struct nf_conntrack_zone *zone;
43}; 43};
44 44
45static struct rhashtable nf_nat_bysource_table; 45static struct rhltable nf_nat_bysource_table;
46 46
47inline const struct nf_nat_l3proto * 47inline const struct nf_nat_l3proto *
48__nf_nat_l3proto_find(u8 family) 48__nf_nat_l3proto_find(u8 family)
@@ -207,7 +207,6 @@ static struct rhashtable_params nf_nat_bysource_params = {
207 .obj_cmpfn = nf_nat_bysource_cmp, 207 .obj_cmpfn = nf_nat_bysource_cmp,
208 .nelem_hint = 256, 208 .nelem_hint = 256,
209 .min_size = 1024, 209 .min_size = 1024,
210 .nulls_base = (1U << RHT_BASE_SHIFT),
211}; 210};
212 211
213/* Only called for SRC manip */ 212/* Only called for SRC manip */
@@ -226,12 +225,15 @@ find_appropriate_src(struct net *net,
226 .tuple = tuple, 225 .tuple = tuple,
227 .zone = zone 226 .zone = zone
228 }; 227 };
228 struct rhlist_head *hl;
229 229
230 ct = rhashtable_lookup_fast(&nf_nat_bysource_table, &key, 230 hl = rhltable_lookup(&nf_nat_bysource_table, &key,
231 nf_nat_bysource_params); 231 nf_nat_bysource_params);
232 if (!ct) 232 if (!hl)
233 return 0; 233 return 0;
234 234
235 ct = container_of(hl, typeof(*ct), nat_bysource);
236
235 nf_ct_invert_tuplepr(result, 237 nf_ct_invert_tuplepr(result,
236 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 238 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
237 result->dst = tuple->dst; 239 result->dst = tuple->dst;
@@ -449,11 +451,17 @@ nf_nat_setup_info(struct nf_conn *ct,
449 } 451 }
450 452
451 if (maniptype == NF_NAT_MANIP_SRC) { 453 if (maniptype == NF_NAT_MANIP_SRC) {
454 struct nf_nat_conn_key key = {
455 .net = nf_ct_net(ct),
456 .tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
457 .zone = nf_ct_zone(ct),
458 };
452 int err; 459 int err;
453 460
454 err = rhashtable_insert_fast(&nf_nat_bysource_table, 461 err = rhltable_insert_key(&nf_nat_bysource_table,
455 &ct->nat_bysource, 462 &key,
456 nf_nat_bysource_params); 463 &ct->nat_bysource,
464 nf_nat_bysource_params);
457 if (err) 465 if (err)
458 return NF_DROP; 466 return NF_DROP;
459 } 467 }
@@ -570,8 +578,8 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
570 * will delete entry from already-freed table. 578 * will delete entry from already-freed table.
571 */ 579 */
572 ct->status &= ~IPS_NAT_DONE_MASK; 580 ct->status &= ~IPS_NAT_DONE_MASK;
573 rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource, 581 rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
574 nf_nat_bysource_params); 582 nf_nat_bysource_params);
575 583
576 /* don't delete conntrack. Although that would make things a lot 584 /* don't delete conntrack. Although that would make things a lot
577 * simpler, we'd end up flushing all conntracks on nat rmmod. 585 * simpler, we'd end up flushing all conntracks on nat rmmod.
@@ -701,8 +709,8 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
701 if (!nat) 709 if (!nat)
702 return; 710 return;
703 711
704 rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource, 712 rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
705 nf_nat_bysource_params); 713 nf_nat_bysource_params);
706} 714}
707 715
708static struct nf_ct_ext_type nat_extend __read_mostly = { 716static struct nf_ct_ext_type nat_extend __read_mostly = {
@@ -837,13 +845,13 @@ static int __init nf_nat_init(void)
837{ 845{
838 int ret; 846 int ret;
839 847
840 ret = rhashtable_init(&nf_nat_bysource_table, &nf_nat_bysource_params); 848 ret = rhltable_init(&nf_nat_bysource_table, &nf_nat_bysource_params);
841 if (ret) 849 if (ret)
842 return ret; 850 return ret;
843 851
844 ret = nf_ct_extend_register(&nat_extend); 852 ret = nf_ct_extend_register(&nat_extend);
845 if (ret < 0) { 853 if (ret < 0) {
846 rhashtable_destroy(&nf_nat_bysource_table); 854 rhltable_destroy(&nf_nat_bysource_table);
847 printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); 855 printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
848 return ret; 856 return ret;
849 } 857 }
@@ -867,7 +875,7 @@ static int __init nf_nat_init(void)
867 return 0; 875 return 0;
868 876
869 cleanup_extend: 877 cleanup_extend:
870 rhashtable_destroy(&nf_nat_bysource_table); 878 rhltable_destroy(&nf_nat_bysource_table);
871 nf_ct_extend_unregister(&nat_extend); 879 nf_ct_extend_unregister(&nat_extend);
872 return ret; 880 return ret;
873} 881}
@@ -886,7 +894,7 @@ static void __exit nf_nat_cleanup(void)
886 for (i = 0; i < NFPROTO_NUMPROTO; i++) 894 for (i = 0; i < NFPROTO_NUMPROTO; i++)
887 kfree(nf_nat_l4protos[i]); 895 kfree(nf_nat_l4protos[i]);
888 896
889 rhashtable_destroy(&nf_nat_bysource_table); 897 rhltable_destroy(&nf_nat_bysource_table);
890} 898}
891 899
892MODULE_LICENSE("GPL"); 900MODULE_LICENSE("GPL");