summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-05-09 15:02:58 -0400
committerDavid S. Miller <davem@davemloft.net>2016-05-09 15:02:58 -0400
commite8ed77dfa90dd79c5343415a4bbbfdab9787b35a (patch)
tree04ce7f294e9a11c1addf1e19662f7c30d7da90bf /net
parente26522cd0b63fdbf3b4e9a39d73a985cc9b4fe27 (diff)
parent0c5366b3a8c77fd6d67b763c5a76dfdc314e7726 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter updates for net-next The following large patchset contains Netfilter updates for your net-next tree. My initial intention was to send you this in two goes but when I looked back twice I already had this burden on top of me. Several updates for IPVS from Marco Angaroni: 1) Allow SIP connections originating from real-servers to be load balanced by the SIP persistence engine as is already implemented in the other direction. 2) Release connections immediately for One-packet-scheduling (OPS) in IPVS, instead of making it via timer and rcu callback. 3) Skip deleting conntracks for each one packet in OPS, and don't call nf_conntrack_alter_reply() since no reply is expected. 4) Enable drop on exhaustion for OPS + SIP persistence. Miscelaneous conntrack updates from Florian Westphal, including fix for hash resize: 5) Move conntrack generation counter out of conntrack pernet structure since this is only used by the init_ns to allow hash resizing. 6) Use get_random_once() from packet path to collect hash random seed instead of our compound. 7) Don't disable BH from ____nf_conntrack_find() for statistics, use NF_CT_STAT_INC_ATOMIC() instead. 8) Fix lookup race during conntrack hash resizing. 9) Introduce clash resolution on conntrack insertion for connectionless protocol. Then, Florian's netns rework to get rid of per-netns conntrack table, thus we use one single table for them all. There was consensus on this change during the NFWS 2015 and, on top of that, it has recently been pointed as a source of multiple problems from unpriviledged netns: 11) Use a single conntrack hashtable for all namespaces. Include netns in object comparisons and make it part of the hash calculation. Adapt early_drop() to consider netns. 12) Use single expectation and NAT hashtable for all namespaces. 13) Use a single slab cache for all namespaces for conntrack objects. 14) Skip full table scanning from nf_ct_iterate_cleanup() if the pernet conntrack counter tells us the table is empty (ie. equals zero). Fixes for nf_tables interval set element handling, support to set conntrack connlabels and allow set names up to 32 bytes. 15) Parse element flags from element deletion path and pass it up to the backend set implementation. 16) Allow adjacent intervals in the rbtree set type for dynamic interval updates. 17) Add support to set connlabel from nf_tables, from Florian Westphal. 18) Allow set names up to 32 bytes in nf_tables. Several x_tables fixes and updates: 19) Fix incorrect use of IS_ERR_VALUE() in x_tables, original patch from Andrzej Hajda. And finally, miscelaneous netfilter updates such as: 20) Disable automatic helper assignment by default. Note this proc knob was introduced by a9006892643a ("netfilter: nf_ct_helper: allow to disable automatic helper assignment") 4 years ago to start moving towards explicit conntrack helper configuration via iptables CT target. 21) Get rid of obsolete and inconsistent debugging instrumentation in x_tables. 22) Remove unnecessary check for null after ip6_route_output(). ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/netfilter/arp_tables.c223
-rw-r--r--net/ipv4/netfilter/ip_tables.c250
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c47
-rw-r--r--net/ipv6/netfilter/ip6_tables.c235
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c51
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c162
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c46
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c15
-rw-r--r--net/netfilter/nf_conntrack_core.c415
-rw-r--r--net/netfilter/nf_conntrack_expect.c83
-rw-r--r--net/netfilter/nf_conntrack_helper.c12
-rw-r--r--net/netfilter/nf_conntrack_netlink.c29
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c2
-rw-r--r--net/netfilter/nf_conntrack_standalone.c13
-rw-r--r--net/netfilter/nf_nat_core.c39
-rw-r--r--net/netfilter/nf_tables_api.c78
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c6
-rw-r--r--net/netfilter/nft_ct.c30
-rw-r--r--net/netfilter/nft_rbtree.c49
-rw-r--r--net/openvswitch/conntrack.c8
24 files changed, 889 insertions, 914 deletions
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 60f5161abcb4..2033f929aa66 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -34,27 +34,6 @@ MODULE_LICENSE("GPL");
34MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); 34MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
35MODULE_DESCRIPTION("arptables core"); 35MODULE_DESCRIPTION("arptables core");
36 36
37/*#define DEBUG_ARP_TABLES*/
38/*#define DEBUG_ARP_TABLES_USER*/
39
40#ifdef DEBUG_ARP_TABLES
41#define dprintf(format, args...) pr_debug(format, ## args)
42#else
43#define dprintf(format, args...)
44#endif
45
46#ifdef DEBUG_ARP_TABLES_USER
47#define duprintf(format, args...) pr_debug(format, ## args)
48#else
49#define duprintf(format, args...)
50#endif
51
52#ifdef CONFIG_NETFILTER_DEBUG
53#define ARP_NF_ASSERT(x) WARN_ON(!(x))
54#else
55#define ARP_NF_ASSERT(x)
56#endif
57
58void *arpt_alloc_initial_table(const struct xt_table *info) 37void *arpt_alloc_initial_table(const struct xt_table *info)
59{ 38{
60 return xt_alloc_initial_table(arpt, ARPT); 39 return xt_alloc_initial_table(arpt, ARPT);
@@ -113,36 +92,20 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
113#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) 92#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg)))
114 93
115 if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop, 94 if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop,
116 ARPT_INV_ARPOP)) { 95 ARPT_INV_ARPOP))
117 dprintf("ARP operation field mismatch.\n");
118 dprintf("ar_op: %04x info->arpop: %04x info->arpop_mask: %04x\n",
119 arphdr->ar_op, arpinfo->arpop, arpinfo->arpop_mask);
120 return 0; 96 return 0;
121 }
122 97
123 if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd, 98 if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd,
124 ARPT_INV_ARPHRD)) { 99 ARPT_INV_ARPHRD))
125 dprintf("ARP hardware address format mismatch.\n");
126 dprintf("ar_hrd: %04x info->arhrd: %04x info->arhrd_mask: %04x\n",
127 arphdr->ar_hrd, arpinfo->arhrd, arpinfo->arhrd_mask);
128 return 0; 100 return 0;
129 }
130 101
131 if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro, 102 if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro,
132 ARPT_INV_ARPPRO)) { 103 ARPT_INV_ARPPRO))
133 dprintf("ARP protocol address format mismatch.\n");
134 dprintf("ar_pro: %04x info->arpro: %04x info->arpro_mask: %04x\n",
135 arphdr->ar_pro, arpinfo->arpro, arpinfo->arpro_mask);
136 return 0; 104 return 0;
137 }
138 105
139 if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln, 106 if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln,
140 ARPT_INV_ARPHLN)) { 107 ARPT_INV_ARPHLN))
141 dprintf("ARP hardware address length mismatch.\n");
142 dprintf("ar_hln: %02x info->arhln: %02x info->arhln_mask: %02x\n",
143 arphdr->ar_hln, arpinfo->arhln, arpinfo->arhln_mask);
144 return 0; 108 return 0;
145 }
146 109
147 src_devaddr = arpptr; 110 src_devaddr = arpptr;
148 arpptr += dev->addr_len; 111 arpptr += dev->addr_len;
@@ -155,49 +118,25 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
155 if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len), 118 if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len),
156 ARPT_INV_SRCDEVADDR) || 119 ARPT_INV_SRCDEVADDR) ||
157 FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len), 120 FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len),
158 ARPT_INV_TGTDEVADDR)) { 121 ARPT_INV_TGTDEVADDR))
159 dprintf("Source or target device address mismatch.\n");
160
161 return 0; 122 return 0;
162 }
163 123
164 if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr, 124 if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr,
165 ARPT_INV_SRCIP) || 125 ARPT_INV_SRCIP) ||
166 FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr), 126 FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr),
167 ARPT_INV_TGTIP)) { 127 ARPT_INV_TGTIP))
168 dprintf("Source or target IP address mismatch.\n");
169
170 dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
171 &src_ipaddr,
172 &arpinfo->smsk.s_addr,
173 &arpinfo->src.s_addr,
174 arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : "");
175 dprintf("TGT: %pI4 Mask: %pI4 Target: %pI4.%s\n",
176 &tgt_ipaddr,
177 &arpinfo->tmsk.s_addr,
178 &arpinfo->tgt.s_addr,
179 arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : "");
180 return 0; 128 return 0;
181 }
182 129
183 /* Look for ifname matches. */ 130 /* Look for ifname matches. */
184 ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask); 131 ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask);
185 132
186 if (FWINV(ret != 0, ARPT_INV_VIA_IN)) { 133 if (FWINV(ret != 0, ARPT_INV_VIA_IN))
187 dprintf("VIA in mismatch (%s vs %s).%s\n",
188 indev, arpinfo->iniface,
189 arpinfo->invflags & ARPT_INV_VIA_IN ? " (INV)" : "");
190 return 0; 134 return 0;
191 }
192 135
193 ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask); 136 ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask);
194 137
195 if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) { 138 if (FWINV(ret != 0, ARPT_INV_VIA_OUT))
196 dprintf("VIA out mismatch (%s vs %s).%s\n",
197 outdev, arpinfo->outiface,
198 arpinfo->invflags & ARPT_INV_VIA_OUT ? " (INV)" : "");
199 return 0; 139 return 0;
200 }
201 140
202 return 1; 141 return 1;
203#undef FWINV 142#undef FWINV
@@ -205,16 +144,10 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
205 144
206static inline int arp_checkentry(const struct arpt_arp *arp) 145static inline int arp_checkentry(const struct arpt_arp *arp)
207{ 146{
208 if (arp->flags & ~ARPT_F_MASK) { 147 if (arp->flags & ~ARPT_F_MASK)
209 duprintf("Unknown flag bits set: %08X\n",
210 arp->flags & ~ARPT_F_MASK);
211 return 0; 148 return 0;
212 } 149 if (arp->invflags & ~ARPT_INV_MASK)
213 if (arp->invflags & ~ARPT_INV_MASK) {
214 duprintf("Unknown invflag bits set: %08X\n",
215 arp->invflags & ~ARPT_INV_MASK);
216 return 0; 150 return 0;
217 }
218 151
219 return 1; 152 return 1;
220} 153}
@@ -406,11 +339,9 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
406 = (void *)arpt_get_target_c(e); 339 = (void *)arpt_get_target_c(e);
407 int visited = e->comefrom & (1 << hook); 340 int visited = e->comefrom & (1 << hook);
408 341
409 if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) { 342 if (e->comefrom & (1 << NF_ARP_NUMHOOKS))
410 pr_notice("arptables: loop hook %u pos %u %08X.\n",
411 hook, pos, e->comefrom);
412 return 0; 343 return 0;
413 } 344
414 e->comefrom 345 e->comefrom
415 |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); 346 |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS));
416 347
@@ -423,12 +354,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
423 354
424 if ((strcmp(t->target.u.user.name, 355 if ((strcmp(t->target.u.user.name,
425 XT_STANDARD_TARGET) == 0) && 356 XT_STANDARD_TARGET) == 0) &&
426 t->verdict < -NF_MAX_VERDICT - 1) { 357 t->verdict < -NF_MAX_VERDICT - 1)
427 duprintf("mark_source_chains: bad "
428 "negative verdict (%i)\n",
429 t->verdict);
430 return 0; 358 return 0;
431 }
432 359
433 /* Return: backtrack through the last 360 /* Return: backtrack through the last
434 * big jump. 361 * big jump.
@@ -462,8 +389,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
462 XT_STANDARD_TARGET) == 0 && 389 XT_STANDARD_TARGET) == 0 &&
463 newpos >= 0) { 390 newpos >= 0) {
464 /* This a jump; chase it. */ 391 /* This a jump; chase it. */
465 duprintf("Jump rule %u -> %u\n",
466 pos, newpos);
467 e = (struct arpt_entry *) 392 e = (struct arpt_entry *)
468 (entry0 + newpos); 393 (entry0 + newpos);
469 if (!find_jump_target(newinfo, e)) 394 if (!find_jump_target(newinfo, e))
@@ -480,8 +405,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
480 pos = newpos; 405 pos = newpos;
481 } 406 }
482 } 407 }
483next: 408next: ;
484 duprintf("Finished chain %u\n", hook);
485 } 409 }
486 return 1; 410 return 1;
487} 411}
@@ -489,7 +413,6 @@ next:
489static inline int check_target(struct arpt_entry *e, const char *name) 413static inline int check_target(struct arpt_entry *e, const char *name)
490{ 414{
491 struct xt_entry_target *t = arpt_get_target(e); 415 struct xt_entry_target *t = arpt_get_target(e);
492 int ret;
493 struct xt_tgchk_param par = { 416 struct xt_tgchk_param par = {
494 .table = name, 417 .table = name,
495 .entryinfo = e, 418 .entryinfo = e,
@@ -499,13 +422,7 @@ static inline int check_target(struct arpt_entry *e, const char *name)
499 .family = NFPROTO_ARP, 422 .family = NFPROTO_ARP,
500 }; 423 };
501 424
502 ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); 425 return xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
503 if (ret < 0) {
504 duprintf("arp_tables: check failed for `%s'.\n",
505 t->u.kernel.target->name);
506 return ret;
507 }
508 return 0;
509} 426}
510 427
511static inline int 428static inline int
@@ -513,17 +430,18 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
513{ 430{
514 struct xt_entry_target *t; 431 struct xt_entry_target *t;
515 struct xt_target *target; 432 struct xt_target *target;
433 unsigned long pcnt;
516 int ret; 434 int ret;
517 435
518 e->counters.pcnt = xt_percpu_counter_alloc(); 436 pcnt = xt_percpu_counter_alloc();
519 if (IS_ERR_VALUE(e->counters.pcnt)) 437 if (IS_ERR_VALUE(pcnt))
520 return -ENOMEM; 438 return -ENOMEM;
439 e->counters.pcnt = pcnt;
521 440
522 t = arpt_get_target(e); 441 t = arpt_get_target(e);
523 target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, 442 target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
524 t->u.user.revision); 443 t->u.user.revision);
525 if (IS_ERR(target)) { 444 if (IS_ERR(target)) {
526 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
527 ret = PTR_ERR(target); 445 ret = PTR_ERR(target);
528 goto out; 446 goto out;
529 } 447 }
@@ -569,17 +487,12 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
569 487
570 if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || 488 if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 ||
571 (unsigned char *)e + sizeof(struct arpt_entry) >= limit || 489 (unsigned char *)e + sizeof(struct arpt_entry) >= limit ||
572 (unsigned char *)e + e->next_offset > limit) { 490 (unsigned char *)e + e->next_offset > limit)
573 duprintf("Bad offset %p\n", e);
574 return -EINVAL; 491 return -EINVAL;
575 }
576 492
577 if (e->next_offset 493 if (e->next_offset
578 < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) { 494 < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target))
579 duprintf("checking: element %p size %u\n",
580 e, e->next_offset);
581 return -EINVAL; 495 return -EINVAL;
582 }
583 496
584 if (!arp_checkentry(&e->arp)) 497 if (!arp_checkentry(&e->arp))
585 return -EINVAL; 498 return -EINVAL;
@@ -596,12 +509,9 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
596 if ((unsigned char *)e - base == hook_entries[h]) 509 if ((unsigned char *)e - base == hook_entries[h])
597 newinfo->hook_entry[h] = hook_entries[h]; 510 newinfo->hook_entry[h] = hook_entries[h];
598 if ((unsigned char *)e - base == underflows[h]) { 511 if ((unsigned char *)e - base == underflows[h]) {
599 if (!check_underflow(e)) { 512 if (!check_underflow(e))
600 pr_debug("Underflows must be unconditional and "
601 "use the STANDARD target with "
602 "ACCEPT/DROP\n");
603 return -EINVAL; 513 return -EINVAL;
604 } 514
605 newinfo->underflow[h] = underflows[h]; 515 newinfo->underflow[h] = underflows[h];
606 } 516 }
607 } 517 }
@@ -646,7 +556,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
646 newinfo->underflow[i] = 0xFFFFFFFF; 556 newinfo->underflow[i] = 0xFFFFFFFF;
647 } 557 }
648 558
649 duprintf("translate_table: size %u\n", newinfo->size);
650 i = 0; 559 i = 0;
651 560
652 /* Walk through entries, checking offsets. */ 561 /* Walk through entries, checking offsets. */
@@ -663,31 +572,21 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
663 XT_ERROR_TARGET) == 0) 572 XT_ERROR_TARGET) == 0)
664 ++newinfo->stacksize; 573 ++newinfo->stacksize;
665 } 574 }
666 duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
667 if (ret != 0) 575 if (ret != 0)
668 return ret; 576 return ret;
669 577
670 if (i != repl->num_entries) { 578 if (i != repl->num_entries)
671 duprintf("translate_table: %u not %u entries\n",
672 i, repl->num_entries);
673 return -EINVAL; 579 return -EINVAL;
674 }
675 580
676 /* Check hooks all assigned */ 581 /* Check hooks all assigned */
677 for (i = 0; i < NF_ARP_NUMHOOKS; i++) { 582 for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
678 /* Only hooks which are valid */ 583 /* Only hooks which are valid */
679 if (!(repl->valid_hooks & (1 << i))) 584 if (!(repl->valid_hooks & (1 << i)))
680 continue; 585 continue;
681 if (newinfo->hook_entry[i] == 0xFFFFFFFF) { 586 if (newinfo->hook_entry[i] == 0xFFFFFFFF)
682 duprintf("Invalid hook entry %u %u\n",
683 i, repl->hook_entry[i]);
684 return -EINVAL; 587 return -EINVAL;
685 } 588 if (newinfo->underflow[i] == 0xFFFFFFFF)
686 if (newinfo->underflow[i] == 0xFFFFFFFF) {
687 duprintf("Invalid underflow %u %u\n",
688 i, repl->underflow[i]);
689 return -EINVAL; 589 return -EINVAL;
690 }
691 } 590 }
692 591
693 if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) 592 if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
@@ -895,11 +794,8 @@ static int get_info(struct net *net, void __user *user,
895 struct xt_table *t; 794 struct xt_table *t;
896 int ret; 795 int ret;
897 796
898 if (*len != sizeof(struct arpt_getinfo)) { 797 if (*len != sizeof(struct arpt_getinfo))
899 duprintf("length %u != %Zu\n", *len,
900 sizeof(struct arpt_getinfo));
901 return -EINVAL; 798 return -EINVAL;
902 }
903 799
904 if (copy_from_user(name, user, sizeof(name)) != 0) 800 if (copy_from_user(name, user, sizeof(name)) != 0)
905 return -EFAULT; 801 return -EFAULT;
@@ -955,33 +851,25 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
955 struct arpt_get_entries get; 851 struct arpt_get_entries get;
956 struct xt_table *t; 852 struct xt_table *t;
957 853
958 if (*len < sizeof(get)) { 854 if (*len < sizeof(get))
959 duprintf("get_entries: %u < %Zu\n", *len, sizeof(get));
960 return -EINVAL; 855 return -EINVAL;
961 }
962 if (copy_from_user(&get, uptr, sizeof(get)) != 0) 856 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
963 return -EFAULT; 857 return -EFAULT;
964 if (*len != sizeof(struct arpt_get_entries) + get.size) { 858 if (*len != sizeof(struct arpt_get_entries) + get.size)
965 duprintf("get_entries: %u != %Zu\n", *len,
966 sizeof(struct arpt_get_entries) + get.size);
967 return -EINVAL; 859 return -EINVAL;
968 } 860
969 get.name[sizeof(get.name) - 1] = '\0'; 861 get.name[sizeof(get.name) - 1] = '\0';
970 862
971 t = xt_find_table_lock(net, NFPROTO_ARP, get.name); 863 t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
972 if (!IS_ERR_OR_NULL(t)) { 864 if (!IS_ERR_OR_NULL(t)) {
973 const struct xt_table_info *private = t->private; 865 const struct xt_table_info *private = t->private;
974 866
975 duprintf("t->private->number = %u\n",
976 private->number);
977 if (get.size == private->size) 867 if (get.size == private->size)
978 ret = copy_entries_to_user(private->size, 868 ret = copy_entries_to_user(private->size,
979 t, uptr->entrytable); 869 t, uptr->entrytable);
980 else { 870 else
981 duprintf("get_entries: I've got %u not %u!\n",
982 private->size, get.size);
983 ret = -EAGAIN; 871 ret = -EAGAIN;
984 } 872
985 module_put(t->me); 873 module_put(t->me);
986 xt_table_unlock(t); 874 xt_table_unlock(t);
987 } else 875 } else
@@ -1019,8 +907,6 @@ static int __do_replace(struct net *net, const char *name,
1019 907
1020 /* You lied! */ 908 /* You lied! */
1021 if (valid_hooks != t->valid_hooks) { 909 if (valid_hooks != t->valid_hooks) {
1022 duprintf("Valid hook crap: %08X vs %08X\n",
1023 valid_hooks, t->valid_hooks);
1024 ret = -EINVAL; 910 ret = -EINVAL;
1025 goto put_module; 911 goto put_module;
1026 } 912 }
@@ -1030,8 +916,6 @@ static int __do_replace(struct net *net, const char *name,
1030 goto put_module; 916 goto put_module;
1031 917
1032 /* Update module usage count based on number of rules */ 918 /* Update module usage count based on number of rules */
1033 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1034 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1035 if ((oldinfo->number > oldinfo->initial_entries) || 919 if ((oldinfo->number > oldinfo->initial_entries) ||
1036 (newinfo->number <= oldinfo->initial_entries)) 920 (newinfo->number <= oldinfo->initial_entries))
1037 module_put(t->me); 921 module_put(t->me);
@@ -1101,8 +985,6 @@ static int do_replace(struct net *net, const void __user *user,
1101 if (ret != 0) 985 if (ret != 0)
1102 goto free_newinfo; 986 goto free_newinfo;
1103 987
1104 duprintf("arp_tables: Translated table\n");
1105
1106 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, 988 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1107 tmp.num_counters, tmp.counters); 989 tmp.num_counters, tmp.counters);
1108 if (ret) 990 if (ret)
@@ -1200,20 +1082,14 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
1200 unsigned int entry_offset; 1082 unsigned int entry_offset;
1201 int ret, off; 1083 int ret, off;
1202 1084
1203 duprintf("check_compat_entry_size_and_hooks %p\n", e);
1204 if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || 1085 if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 ||
1205 (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit || 1086 (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit ||
1206 (unsigned char *)e + e->next_offset > limit) { 1087 (unsigned char *)e + e->next_offset > limit)
1207 duprintf("Bad offset %p, limit = %p\n", e, limit);
1208 return -EINVAL; 1088 return -EINVAL;
1209 }
1210 1089
1211 if (e->next_offset < sizeof(struct compat_arpt_entry) + 1090 if (e->next_offset < sizeof(struct compat_arpt_entry) +
1212 sizeof(struct compat_xt_entry_target)) { 1091 sizeof(struct compat_xt_entry_target))
1213 duprintf("checking: element %p size %u\n",
1214 e, e->next_offset);
1215 return -EINVAL; 1092 return -EINVAL;
1216 }
1217 1093
1218 if (!arp_checkentry(&e->arp)) 1094 if (!arp_checkentry(&e->arp))
1219 return -EINVAL; 1095 return -EINVAL;
@@ -1230,8 +1106,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
1230 target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, 1106 target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
1231 t->u.user.revision); 1107 t->u.user.revision);
1232 if (IS_ERR(target)) { 1108 if (IS_ERR(target)) {
1233 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1234 t->u.user.name);
1235 ret = PTR_ERR(target); 1109 ret = PTR_ERR(target);
1236 goto out; 1110 goto out;
1237 } 1111 }
@@ -1301,7 +1175,6 @@ static int translate_compat_table(struct xt_table_info **pinfo,
1301 size = compatr->size; 1175 size = compatr->size;
1302 info->number = compatr->num_entries; 1176 info->number = compatr->num_entries;
1303 1177
1304 duprintf("translate_compat_table: size %u\n", info->size);
1305 j = 0; 1178 j = 0;
1306 xt_compat_lock(NFPROTO_ARP); 1179 xt_compat_lock(NFPROTO_ARP);
1307 xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries); 1180 xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries);
@@ -1316,11 +1189,8 @@ static int translate_compat_table(struct xt_table_info **pinfo,
1316 } 1189 }
1317 1190
1318 ret = -EINVAL; 1191 ret = -EINVAL;
1319 if (j != compatr->num_entries) { 1192 if (j != compatr->num_entries)
1320 duprintf("translate_compat_table: %u not %u entries\n",
1321 j, compatr->num_entries);
1322 goto out_unlock; 1193 goto out_unlock;
1323 }
1324 1194
1325 ret = -ENOMEM; 1195 ret = -ENOMEM;
1326 newinfo = xt_alloc_table_info(size); 1196 newinfo = xt_alloc_table_info(size);
@@ -1411,8 +1281,6 @@ static int compat_do_replace(struct net *net, void __user *user,
1411 if (ret != 0) 1281 if (ret != 0)
1412 goto free_newinfo; 1282 goto free_newinfo;
1413 1283
1414 duprintf("compat_do_replace: Translated table\n");
1415
1416 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, 1284 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1417 tmp.num_counters, compat_ptr(tmp.counters)); 1285 tmp.num_counters, compat_ptr(tmp.counters));
1418 if (ret) 1286 if (ret)
@@ -1445,7 +1313,6 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
1445 break; 1313 break;
1446 1314
1447 default: 1315 default:
1448 duprintf("do_arpt_set_ctl: unknown request %i\n", cmd);
1449 ret = -EINVAL; 1316 ret = -EINVAL;
1450 } 1317 }
1451 1318
@@ -1528,17 +1395,13 @@ static int compat_get_entries(struct net *net,
1528 struct compat_arpt_get_entries get; 1395 struct compat_arpt_get_entries get;
1529 struct xt_table *t; 1396 struct xt_table *t;
1530 1397
1531 if (*len < sizeof(get)) { 1398 if (*len < sizeof(get))
1532 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1533 return -EINVAL; 1399 return -EINVAL;
1534 }
1535 if (copy_from_user(&get, uptr, sizeof(get)) != 0) 1400 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1536 return -EFAULT; 1401 return -EFAULT;
1537 if (*len != sizeof(struct compat_arpt_get_entries) + get.size) { 1402 if (*len != sizeof(struct compat_arpt_get_entries) + get.size)
1538 duprintf("compat_get_entries: %u != %zu\n",
1539 *len, sizeof(get) + get.size);
1540 return -EINVAL; 1403 return -EINVAL;
1541 } 1404
1542 get.name[sizeof(get.name) - 1] = '\0'; 1405 get.name[sizeof(get.name) - 1] = '\0';
1543 1406
1544 xt_compat_lock(NFPROTO_ARP); 1407 xt_compat_lock(NFPROTO_ARP);
@@ -1547,16 +1410,13 @@ static int compat_get_entries(struct net *net,
1547 const struct xt_table_info *private = t->private; 1410 const struct xt_table_info *private = t->private;
1548 struct xt_table_info info; 1411 struct xt_table_info info;
1549 1412
1550 duprintf("t->private->number = %u\n", private->number);
1551 ret = compat_table_info(private, &info); 1413 ret = compat_table_info(private, &info);
1552 if (!ret && get.size == info.size) { 1414 if (!ret && get.size == info.size) {
1553 ret = compat_copy_entries_to_user(private->size, 1415 ret = compat_copy_entries_to_user(private->size,
1554 t, uptr->entrytable); 1416 t, uptr->entrytable);
1555 } else if (!ret) { 1417 } else if (!ret)
1556 duprintf("compat_get_entries: I've got %u not %u!\n",
1557 private->size, get.size);
1558 ret = -EAGAIN; 1418 ret = -EAGAIN;
1559 } 1419
1560 xt_compat_flush_offsets(NFPROTO_ARP); 1420 xt_compat_flush_offsets(NFPROTO_ARP);
1561 module_put(t->me); 1421 module_put(t->me);
1562 xt_table_unlock(t); 1422 xt_table_unlock(t);
@@ -1608,7 +1468,6 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
1608 break; 1468 break;
1609 1469
1610 default: 1470 default:
1611 duprintf("do_arpt_set_ctl: unknown request %i\n", cmd);
1612 ret = -EINVAL; 1471 ret = -EINVAL;
1613 } 1472 }
1614 1473
@@ -1651,7 +1510,6 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1651 } 1510 }
1652 1511
1653 default: 1512 default:
1654 duprintf("do_arpt_get_ctl: unknown request %i\n", cmd);
1655 ret = -EINVAL; 1513 ret = -EINVAL;
1656 } 1514 }
1657 1515
@@ -1696,7 +1554,6 @@ int arpt_register_table(struct net *net,
1696 memcpy(loc_cpu_entry, repl->entries, repl->size); 1554 memcpy(loc_cpu_entry, repl->entries, repl->size);
1697 1555
1698 ret = translate_table(newinfo, loc_cpu_entry, repl); 1556 ret = translate_table(newinfo, loc_cpu_entry, repl);
1699 duprintf("arpt_register_table: translate table gives %d\n", ret);
1700 if (ret != 0) 1557 if (ret != 0)
1701 goto out_free; 1558 goto out_free;
1702 1559
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 735d1ee8c1ab..54906e0e8e0c 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -35,34 +35,12 @@ MODULE_LICENSE("GPL");
35MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 35MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
36MODULE_DESCRIPTION("IPv4 packet filter"); 36MODULE_DESCRIPTION("IPv4 packet filter");
37 37
38/*#define DEBUG_IP_FIREWALL*/
39/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
40/*#define DEBUG_IP_FIREWALL_USER*/
41
42#ifdef DEBUG_IP_FIREWALL
43#define dprintf(format, args...) pr_info(format , ## args)
44#else
45#define dprintf(format, args...)
46#endif
47
48#ifdef DEBUG_IP_FIREWALL_USER
49#define duprintf(format, args...) pr_info(format , ## args)
50#else
51#define duprintf(format, args...)
52#endif
53
54#ifdef CONFIG_NETFILTER_DEBUG 38#ifdef CONFIG_NETFILTER_DEBUG
55#define IP_NF_ASSERT(x) WARN_ON(!(x)) 39#define IP_NF_ASSERT(x) WARN_ON(!(x))
56#else 40#else
57#define IP_NF_ASSERT(x) 41#define IP_NF_ASSERT(x)
58#endif 42#endif
59 43
60#if 0
61/* All the better to debug you with... */
62#define static
63#define inline
64#endif
65
66void *ipt_alloc_initial_table(const struct xt_table *info) 44void *ipt_alloc_initial_table(const struct xt_table *info)
67{ 45{
68 return xt_alloc_initial_table(ipt, IPT); 46 return xt_alloc_initial_table(ipt, IPT);
@@ -85,52 +63,28 @@ ip_packet_match(const struct iphdr *ip,
85 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, 63 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
86 IPT_INV_SRCIP) || 64 IPT_INV_SRCIP) ||
87 FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, 65 FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
88 IPT_INV_DSTIP)) { 66 IPT_INV_DSTIP))
89 dprintf("Source or dest mismatch.\n");
90
91 dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
92 &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr,
93 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
94 dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n",
95 &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr,
96 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
97 return false; 67 return false;
98 }
99 68
100 ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask); 69 ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
101 70
102 if (FWINV(ret != 0, IPT_INV_VIA_IN)) { 71 if (FWINV(ret != 0, IPT_INV_VIA_IN))
103 dprintf("VIA in mismatch (%s vs %s).%s\n",
104 indev, ipinfo->iniface,
105 ipinfo->invflags & IPT_INV_VIA_IN ? " (INV)" : "");
106 return false; 72 return false;
107 }
108 73
109 ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask); 74 ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
110 75
111 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { 76 if (FWINV(ret != 0, IPT_INV_VIA_OUT))
112 dprintf("VIA out mismatch (%s vs %s).%s\n",
113 outdev, ipinfo->outiface,
114 ipinfo->invflags & IPT_INV_VIA_OUT ? " (INV)" : "");
115 return false; 77 return false;
116 }
117 78
118 /* Check specific protocol */ 79 /* Check specific protocol */
119 if (ipinfo->proto && 80 if (ipinfo->proto &&
120 FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) { 81 FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO))
121 dprintf("Packet protocol %hi does not match %hi.%s\n",
122 ip->protocol, ipinfo->proto,
123 ipinfo->invflags & IPT_INV_PROTO ? " (INV)" : "");
124 return false; 82 return false;
125 }
126 83
127 /* If we have a fragment rule but the packet is not a fragment 84 /* If we have a fragment rule but the packet is not a fragment
128 * then we return zero */ 85 * then we return zero */
129 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) { 86 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG))
130 dprintf("Fragment rule but not fragment.%s\n",
131 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
132 return false; 87 return false;
133 }
134 88
135 return true; 89 return true;
136} 90}
@@ -138,16 +92,10 @@ ip_packet_match(const struct iphdr *ip,
138static bool 92static bool
139ip_checkentry(const struct ipt_ip *ip) 93ip_checkentry(const struct ipt_ip *ip)
140{ 94{
141 if (ip->flags & ~IPT_F_MASK) { 95 if (ip->flags & ~IPT_F_MASK)
142 duprintf("Unknown flag bits set: %08X\n",
143 ip->flags & ~IPT_F_MASK);
144 return false; 96 return false;
145 } 97 if (ip->invflags & ~IPT_INV_MASK)
146 if (ip->invflags & ~IPT_INV_MASK) {
147 duprintf("Unknown invflag bits set: %08X\n",
148 ip->invflags & ~IPT_INV_MASK);
149 return false; 98 return false;
150 }
151 return true; 99 return true;
152} 100}
153 101
@@ -346,10 +294,6 @@ ipt_do_table(struct sk_buff *skb,
346 294
347 e = get_entry(table_base, private->hook_entry[hook]); 295 e = get_entry(table_base, private->hook_entry[hook]);
348 296
349 pr_debug("Entering %s(hook %u), UF %p\n",
350 table->name, hook,
351 get_entry(table_base, private->underflow[hook]));
352
353 do { 297 do {
354 const struct xt_entry_target *t; 298 const struct xt_entry_target *t;
355 const struct xt_entry_match *ematch; 299 const struct xt_entry_match *ematch;
@@ -396,22 +340,15 @@ ipt_do_table(struct sk_buff *skb,
396 if (stackidx == 0) { 340 if (stackidx == 0) {
397 e = get_entry(table_base, 341 e = get_entry(table_base,
398 private->underflow[hook]); 342 private->underflow[hook]);
399 pr_debug("Underflow (this is normal) "
400 "to %p\n", e);
401 } else { 343 } else {
402 e = jumpstack[--stackidx]; 344 e = jumpstack[--stackidx];
403 pr_debug("Pulled %p out from pos %u\n",
404 e, stackidx);
405 e = ipt_next_entry(e); 345 e = ipt_next_entry(e);
406 } 346 }
407 continue; 347 continue;
408 } 348 }
409 if (table_base + v != ipt_next_entry(e) && 349 if (table_base + v != ipt_next_entry(e) &&
410 !(e->ip.flags & IPT_F_GOTO)) { 350 !(e->ip.flags & IPT_F_GOTO))
411 jumpstack[stackidx++] = e; 351 jumpstack[stackidx++] = e;
412 pr_debug("Pushed %p into pos %u\n",
413 e, stackidx - 1);
414 }
415 352
416 e = get_entry(table_base, v); 353 e = get_entry(table_base, v);
417 continue; 354 continue;
@@ -429,18 +366,13 @@ ipt_do_table(struct sk_buff *skb,
429 /* Verdict */ 366 /* Verdict */
430 break; 367 break;
431 } while (!acpar.hotdrop); 368 } while (!acpar.hotdrop);
432 pr_debug("Exiting %s; sp at %u\n", __func__, stackidx);
433 369
434 xt_write_recseq_end(addend); 370 xt_write_recseq_end(addend);
435 local_bh_enable(); 371 local_bh_enable();
436 372
437#ifdef DEBUG_ALLOW_ALL
438 return NF_ACCEPT;
439#else
440 if (acpar.hotdrop) 373 if (acpar.hotdrop)
441 return NF_DROP; 374 return NF_DROP;
442 else return verdict; 375 else return verdict;
443#endif
444} 376}
445 377
446static bool find_jump_target(const struct xt_table_info *t, 378static bool find_jump_target(const struct xt_table_info *t,
@@ -480,11 +412,9 @@ mark_source_chains(const struct xt_table_info *newinfo,
480 = (void *)ipt_get_target_c(e); 412 = (void *)ipt_get_target_c(e);
481 int visited = e->comefrom & (1 << hook); 413 int visited = e->comefrom & (1 << hook);
482 414
483 if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { 415 if (e->comefrom & (1 << NF_INET_NUMHOOKS))
484 pr_err("iptables: loop hook %u pos %u %08X.\n",
485 hook, pos, e->comefrom);
486 return 0; 416 return 0;
487 } 417
488 e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); 418 e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
489 419
490 /* Unconditional return/END. */ 420 /* Unconditional return/END. */
@@ -496,26 +426,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
496 426
497 if ((strcmp(t->target.u.user.name, 427 if ((strcmp(t->target.u.user.name,
498 XT_STANDARD_TARGET) == 0) && 428 XT_STANDARD_TARGET) == 0) &&
499 t->verdict < -NF_MAX_VERDICT - 1) { 429 t->verdict < -NF_MAX_VERDICT - 1)
500 duprintf("mark_source_chains: bad "
501 "negative verdict (%i)\n",
502 t->verdict);
503 return 0; 430 return 0;
504 }
505 431
506 /* Return: backtrack through the last 432 /* Return: backtrack through the last
507 big jump. */ 433 big jump. */
508 do { 434 do {
509 e->comefrom ^= (1<<NF_INET_NUMHOOKS); 435 e->comefrom ^= (1<<NF_INET_NUMHOOKS);
510#ifdef DEBUG_IP_FIREWALL_USER
511 if (e->comefrom
512 & (1 << NF_INET_NUMHOOKS)) {
513 duprintf("Back unset "
514 "on hook %u "
515 "rule %u\n",
516 hook, pos);
517 }
518#endif
519 oldpos = pos; 436 oldpos = pos;
520 pos = e->counters.pcnt; 437 pos = e->counters.pcnt;
521 e->counters.pcnt = 0; 438 e->counters.pcnt = 0;
@@ -543,8 +460,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
543 XT_STANDARD_TARGET) == 0 && 460 XT_STANDARD_TARGET) == 0 &&
544 newpos >= 0) { 461 newpos >= 0) {
545 /* This a jump; chase it. */ 462 /* This a jump; chase it. */
546 duprintf("Jump rule %u -> %u\n",
547 pos, newpos);
548 e = (struct ipt_entry *) 463 e = (struct ipt_entry *)
549 (entry0 + newpos); 464 (entry0 + newpos);
550 if (!find_jump_target(newinfo, e)) 465 if (!find_jump_target(newinfo, e))
@@ -561,8 +476,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
561 pos = newpos; 476 pos = newpos;
562 } 477 }
563 } 478 }
564next: 479next: ;
565 duprintf("Finished chain %u\n", hook);
566 } 480 }
567 return 1; 481 return 1;
568} 482}
@@ -584,18 +498,12 @@ static int
584check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) 498check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
585{ 499{
586 const struct ipt_ip *ip = par->entryinfo; 500 const struct ipt_ip *ip = par->entryinfo;
587 int ret;
588 501
589 par->match = m->u.kernel.match; 502 par->match = m->u.kernel.match;
590 par->matchinfo = m->data; 503 par->matchinfo = m->data;
591 504
592 ret = xt_check_match(par, m->u.match_size - sizeof(*m), 505 return xt_check_match(par, m->u.match_size - sizeof(*m),
593 ip->proto, ip->invflags & IPT_INV_PROTO); 506 ip->proto, ip->invflags & IPT_INV_PROTO);
594 if (ret < 0) {
595 duprintf("check failed for `%s'.\n", par->match->name);
596 return ret;
597 }
598 return 0;
599} 507}
600 508
601static int 509static int
@@ -606,10 +514,8 @@ find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
606 514
607 match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, 515 match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
608 m->u.user.revision); 516 m->u.user.revision);
609 if (IS_ERR(match)) { 517 if (IS_ERR(match))
610 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
611 return PTR_ERR(match); 518 return PTR_ERR(match);
612 }
613 m->u.kernel.match = match; 519 m->u.kernel.match = match;
614 520
615 ret = check_match(m, par); 521 ret = check_match(m, par);
@@ -634,16 +540,9 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name)
634 .hook_mask = e->comefrom, 540 .hook_mask = e->comefrom,
635 .family = NFPROTO_IPV4, 541 .family = NFPROTO_IPV4,
636 }; 542 };
637 int ret;
638 543
639 ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 544 return xt_check_target(&par, t->u.target_size - sizeof(*t),
640 e->ip.proto, e->ip.invflags & IPT_INV_PROTO); 545 e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
641 if (ret < 0) {
642 duprintf("check failed for `%s'.\n",
643 t->u.kernel.target->name);
644 return ret;
645 }
646 return 0;
647} 546}
648 547
649static int 548static int
@@ -656,10 +555,12 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
656 unsigned int j; 555 unsigned int j;
657 struct xt_mtchk_param mtpar; 556 struct xt_mtchk_param mtpar;
658 struct xt_entry_match *ematch; 557 struct xt_entry_match *ematch;
558 unsigned long pcnt;
659 559
660 e->counters.pcnt = xt_percpu_counter_alloc(); 560 pcnt = xt_percpu_counter_alloc();
661 if (IS_ERR_VALUE(e->counters.pcnt)) 561 if (IS_ERR_VALUE(pcnt))
662 return -ENOMEM; 562 return -ENOMEM;
563 e->counters.pcnt = pcnt;
663 564
664 j = 0; 565 j = 0;
665 mtpar.net = net; 566 mtpar.net = net;
@@ -678,7 +579,6 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
678 target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, 579 target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
679 t->u.user.revision); 580 t->u.user.revision);
680 if (IS_ERR(target)) { 581 if (IS_ERR(target)) {
681 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
682 ret = PTR_ERR(target); 582 ret = PTR_ERR(target);
683 goto cleanup_matches; 583 goto cleanup_matches;
684 } 584 }
@@ -732,17 +632,12 @@ check_entry_size_and_hooks(struct ipt_entry *e,
732 632
733 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || 633 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
734 (unsigned char *)e + sizeof(struct ipt_entry) >= limit || 634 (unsigned char *)e + sizeof(struct ipt_entry) >= limit ||
735 (unsigned char *)e + e->next_offset > limit) { 635 (unsigned char *)e + e->next_offset > limit)
736 duprintf("Bad offset %p\n", e);
737 return -EINVAL; 636 return -EINVAL;
738 }
739 637
740 if (e->next_offset 638 if (e->next_offset
741 < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) { 639 < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target))
742 duprintf("checking: element %p size %u\n",
743 e, e->next_offset);
744 return -EINVAL; 640 return -EINVAL;
745 }
746 641
747 if (!ip_checkentry(&e->ip)) 642 if (!ip_checkentry(&e->ip))
748 return -EINVAL; 643 return -EINVAL;
@@ -759,12 +654,9 @@ check_entry_size_and_hooks(struct ipt_entry *e,
759 if ((unsigned char *)e - base == hook_entries[h]) 654 if ((unsigned char *)e - base == hook_entries[h])
760 newinfo->hook_entry[h] = hook_entries[h]; 655 newinfo->hook_entry[h] = hook_entries[h];
761 if ((unsigned char *)e - base == underflows[h]) { 656 if ((unsigned char *)e - base == underflows[h]) {
762 if (!check_underflow(e)) { 657 if (!check_underflow(e))
763 pr_debug("Underflows must be unconditional and "
764 "use the STANDARD target with "
765 "ACCEPT/DROP\n");
766 return -EINVAL; 658 return -EINVAL;
767 } 659
768 newinfo->underflow[h] = underflows[h]; 660 newinfo->underflow[h] = underflows[h];
769 } 661 }
770 } 662 }
@@ -816,7 +708,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
816 newinfo->underflow[i] = 0xFFFFFFFF; 708 newinfo->underflow[i] = 0xFFFFFFFF;
817 } 709 }
818 710
819 duprintf("translate_table: size %u\n", newinfo->size);
820 i = 0; 711 i = 0;
821 /* Walk through entries, checking offsets. */ 712 /* Walk through entries, checking offsets. */
822 xt_entry_foreach(iter, entry0, newinfo->size) { 713 xt_entry_foreach(iter, entry0, newinfo->size) {
@@ -833,27 +724,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
833 ++newinfo->stacksize; 724 ++newinfo->stacksize;
834 } 725 }
835 726
836 if (i != repl->num_entries) { 727 if (i != repl->num_entries)
837 duprintf("translate_table: %u not %u entries\n",
838 i, repl->num_entries);
839 return -EINVAL; 728 return -EINVAL;
840 }
841 729
842 /* Check hooks all assigned */ 730 /* Check hooks all assigned */
843 for (i = 0; i < NF_INET_NUMHOOKS; i++) { 731 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
844 /* Only hooks which are valid */ 732 /* Only hooks which are valid */
845 if (!(repl->valid_hooks & (1 << i))) 733 if (!(repl->valid_hooks & (1 << i)))
846 continue; 734 continue;
847 if (newinfo->hook_entry[i] == 0xFFFFFFFF) { 735 if (newinfo->hook_entry[i] == 0xFFFFFFFF)
848 duprintf("Invalid hook entry %u %u\n",
849 i, repl->hook_entry[i]);
850 return -EINVAL; 736 return -EINVAL;
851 } 737 if (newinfo->underflow[i] == 0xFFFFFFFF)
852 if (newinfo->underflow[i] == 0xFFFFFFFF) {
853 duprintf("Invalid underflow %u %u\n",
854 i, repl->underflow[i]);
855 return -EINVAL; 738 return -EINVAL;
856 }
857 } 739 }
858 740
859 if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) 741 if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
@@ -1081,11 +963,8 @@ static int get_info(struct net *net, void __user *user,
1081 struct xt_table *t; 963 struct xt_table *t;
1082 int ret; 964 int ret;
1083 965
1084 if (*len != sizeof(struct ipt_getinfo)) { 966 if (*len != sizeof(struct ipt_getinfo))
1085 duprintf("length %u != %zu\n", *len,
1086 sizeof(struct ipt_getinfo));
1087 return -EINVAL; 967 return -EINVAL;
1088 }
1089 968
1090 if (copy_from_user(name, user, sizeof(name)) != 0) 969 if (copy_from_user(name, user, sizeof(name)) != 0)
1091 return -EFAULT; 970 return -EFAULT;
@@ -1143,31 +1022,23 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
1143 struct ipt_get_entries get; 1022 struct ipt_get_entries get;
1144 struct xt_table *t; 1023 struct xt_table *t;
1145 1024
1146 if (*len < sizeof(get)) { 1025 if (*len < sizeof(get))
1147 duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
1148 return -EINVAL; 1026 return -EINVAL;
1149 }
1150 if (copy_from_user(&get, uptr, sizeof(get)) != 0) 1027 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1151 return -EFAULT; 1028 return -EFAULT;
1152 if (*len != sizeof(struct ipt_get_entries) + get.size) { 1029 if (*len != sizeof(struct ipt_get_entries) + get.size)
1153 duprintf("get_entries: %u != %zu\n",
1154 *len, sizeof(get) + get.size);
1155 return -EINVAL; 1030 return -EINVAL;
1156 }
1157 get.name[sizeof(get.name) - 1] = '\0'; 1031 get.name[sizeof(get.name) - 1] = '\0';
1158 1032
1159 t = xt_find_table_lock(net, AF_INET, get.name); 1033 t = xt_find_table_lock(net, AF_INET, get.name);
1160 if (!IS_ERR_OR_NULL(t)) { 1034 if (!IS_ERR_OR_NULL(t)) {
1161 const struct xt_table_info *private = t->private; 1035 const struct xt_table_info *private = t->private;
1162 duprintf("t->private->number = %u\n", private->number);
1163 if (get.size == private->size) 1036 if (get.size == private->size)
1164 ret = copy_entries_to_user(private->size, 1037 ret = copy_entries_to_user(private->size,
1165 t, uptr->entrytable); 1038 t, uptr->entrytable);
1166 else { 1039 else
1167 duprintf("get_entries: I've got %u not %u!\n",
1168 private->size, get.size);
1169 ret = -EAGAIN; 1040 ret = -EAGAIN;
1170 } 1041
1171 module_put(t->me); 1042 module_put(t->me);
1172 xt_table_unlock(t); 1043 xt_table_unlock(t);
1173 } else 1044 } else
@@ -1203,8 +1074,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1203 1074
1204 /* You lied! */ 1075 /* You lied! */
1205 if (valid_hooks != t->valid_hooks) { 1076 if (valid_hooks != t->valid_hooks) {
1206 duprintf("Valid hook crap: %08X vs %08X\n",
1207 valid_hooks, t->valid_hooks);
1208 ret = -EINVAL; 1077 ret = -EINVAL;
1209 goto put_module; 1078 goto put_module;
1210 } 1079 }
@@ -1214,8 +1083,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1214 goto put_module; 1083 goto put_module;
1215 1084
1216 /* Update module usage count based on number of rules */ 1085 /* Update module usage count based on number of rules */
1217 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1218 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1219 if ((oldinfo->number > oldinfo->initial_entries) || 1086 if ((oldinfo->number > oldinfo->initial_entries) ||
1220 (newinfo->number <= oldinfo->initial_entries)) 1087 (newinfo->number <= oldinfo->initial_entries))
1221 module_put(t->me); 1088 module_put(t->me);
@@ -1284,8 +1151,6 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
1284 if (ret != 0) 1151 if (ret != 0)
1285 goto free_newinfo; 1152 goto free_newinfo;
1286 1153
1287 duprintf("Translated table\n");
1288
1289 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, 1154 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1290 tmp.num_counters, tmp.counters); 1155 tmp.num_counters, tmp.counters);
1291 if (ret) 1156 if (ret)
@@ -1411,11 +1276,9 @@ compat_find_calc_match(struct xt_entry_match *m,
1411 1276
1412 match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, 1277 match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
1413 m->u.user.revision); 1278 m->u.user.revision);
1414 if (IS_ERR(match)) { 1279 if (IS_ERR(match))
1415 duprintf("compat_check_calc_match: `%s' not found\n",
1416 m->u.user.name);
1417 return PTR_ERR(match); 1280 return PTR_ERR(match);
1418 } 1281
1419 m->u.kernel.match = match; 1282 m->u.kernel.match = match;
1420 *size += xt_compat_match_offset(match); 1283 *size += xt_compat_match_offset(match);
1421 return 0; 1284 return 0;
@@ -1447,20 +1310,14 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1447 unsigned int j; 1310 unsigned int j;
1448 int ret, off; 1311 int ret, off;
1449 1312
1450 duprintf("check_compat_entry_size_and_hooks %p\n", e);
1451 if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || 1313 if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
1452 (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit || 1314 (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit ||
1453 (unsigned char *)e + e->next_offset > limit) { 1315 (unsigned char *)e + e->next_offset > limit)
1454 duprintf("Bad offset %p, limit = %p\n", e, limit);
1455 return -EINVAL; 1316 return -EINVAL;
1456 }
1457 1317
1458 if (e->next_offset < sizeof(struct compat_ipt_entry) + 1318 if (e->next_offset < sizeof(struct compat_ipt_entry) +
1459 sizeof(struct compat_xt_entry_target)) { 1319 sizeof(struct compat_xt_entry_target))
1460 duprintf("checking: element %p size %u\n",
1461 e, e->next_offset);
1462 return -EINVAL; 1320 return -EINVAL;
1463 }
1464 1321
1465 if (!ip_checkentry(&e->ip)) 1322 if (!ip_checkentry(&e->ip))
1466 return -EINVAL; 1323 return -EINVAL;
@@ -1484,8 +1341,6 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1484 target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, 1341 target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
1485 t->u.user.revision); 1342 t->u.user.revision);
1486 if (IS_ERR(target)) { 1343 if (IS_ERR(target)) {
1487 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1488 t->u.user.name);
1489 ret = PTR_ERR(target); 1344 ret = PTR_ERR(target);
1490 goto release_matches; 1345 goto release_matches;
1491 } 1346 }
@@ -1567,7 +1422,6 @@ translate_compat_table(struct net *net,
1567 size = compatr->size; 1422 size = compatr->size;
1568 info->number = compatr->num_entries; 1423 info->number = compatr->num_entries;
1569 1424
1570 duprintf("translate_compat_table: size %u\n", info->size);
1571 j = 0; 1425 j = 0;
1572 xt_compat_lock(AF_INET); 1426 xt_compat_lock(AF_INET);
1573 xt_compat_init_offsets(AF_INET, compatr->num_entries); 1427 xt_compat_init_offsets(AF_INET, compatr->num_entries);
@@ -1582,11 +1436,8 @@ translate_compat_table(struct net *net,
1582 } 1436 }
1583 1437
1584 ret = -EINVAL; 1438 ret = -EINVAL;
1585 if (j != compatr->num_entries) { 1439 if (j != compatr->num_entries)
1586 duprintf("translate_compat_table: %u not %u entries\n",
1587 j, compatr->num_entries);
1588 goto out_unlock; 1440 goto out_unlock;
1589 }
1590 1441
1591 ret = -ENOMEM; 1442 ret = -ENOMEM;
1592 newinfo = xt_alloc_table_info(size); 1443 newinfo = xt_alloc_table_info(size);
@@ -1683,8 +1534,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
1683 if (ret != 0) 1534 if (ret != 0)
1684 goto free_newinfo; 1535 goto free_newinfo;
1685 1536
1686 duprintf("compat_do_replace: Translated table\n");
1687
1688 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, 1537 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1689 tmp.num_counters, compat_ptr(tmp.counters)); 1538 tmp.num_counters, compat_ptr(tmp.counters));
1690 if (ret) 1539 if (ret)
@@ -1718,7 +1567,6 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
1718 break; 1567 break;
1719 1568
1720 default: 1569 default:
1721 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1722 ret = -EINVAL; 1570 ret = -EINVAL;
1723 } 1571 }
1724 1572
@@ -1768,19 +1616,15 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1768 struct compat_ipt_get_entries get; 1616 struct compat_ipt_get_entries get;
1769 struct xt_table *t; 1617 struct xt_table *t;
1770 1618
1771 if (*len < sizeof(get)) { 1619 if (*len < sizeof(get))
1772 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1773 return -EINVAL; 1620 return -EINVAL;
1774 }
1775 1621
1776 if (copy_from_user(&get, uptr, sizeof(get)) != 0) 1622 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1777 return -EFAULT; 1623 return -EFAULT;
1778 1624
1779 if (*len != sizeof(struct compat_ipt_get_entries) + get.size) { 1625 if (*len != sizeof(struct compat_ipt_get_entries) + get.size)
1780 duprintf("compat_get_entries: %u != %zu\n",
1781 *len, sizeof(get) + get.size);
1782 return -EINVAL; 1626 return -EINVAL;
1783 } 1627
1784 get.name[sizeof(get.name) - 1] = '\0'; 1628 get.name[sizeof(get.name) - 1] = '\0';
1785 1629
1786 xt_compat_lock(AF_INET); 1630 xt_compat_lock(AF_INET);
@@ -1788,16 +1632,13 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1788 if (!IS_ERR_OR_NULL(t)) { 1632 if (!IS_ERR_OR_NULL(t)) {
1789 const struct xt_table_info *private = t->private; 1633 const struct xt_table_info *private = t->private;
1790 struct xt_table_info info; 1634 struct xt_table_info info;
1791 duprintf("t->private->number = %u\n", private->number);
1792 ret = compat_table_info(private, &info); 1635 ret = compat_table_info(private, &info);
1793 if (!ret && get.size == info.size) { 1636 if (!ret && get.size == info.size)
1794 ret = compat_copy_entries_to_user(private->size, 1637 ret = compat_copy_entries_to_user(private->size,
1795 t, uptr->entrytable); 1638 t, uptr->entrytable);
1796 } else if (!ret) { 1639 else if (!ret)
1797 duprintf("compat_get_entries: I've got %u not %u!\n",
1798 private->size, get.size);
1799 ret = -EAGAIN; 1640 ret = -EAGAIN;
1800 } 1641
1801 xt_compat_flush_offsets(AF_INET); 1642 xt_compat_flush_offsets(AF_INET);
1802 module_put(t->me); 1643 module_put(t->me);
1803 xt_table_unlock(t); 1644 xt_table_unlock(t);
@@ -1850,7 +1691,6 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1850 break; 1691 break;
1851 1692
1852 default: 1693 default:
1853 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1854 ret = -EINVAL; 1694 ret = -EINVAL;
1855 } 1695 }
1856 1696
@@ -1902,7 +1742,6 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1902 } 1742 }
1903 1743
1904 default: 1744 default:
1905 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1906 ret = -EINVAL; 1745 ret = -EINVAL;
1907 } 1746 }
1908 1747
@@ -2004,7 +1843,6 @@ icmp_match(const struct sk_buff *skb, struct xt_action_param *par)
2004 /* We've been asked to examine this packet, and we 1843 /* We've been asked to examine this packet, and we
2005 * can't. Hence, no choice but to drop. 1844 * can't. Hence, no choice but to drop.
2006 */ 1845 */
2007 duprintf("Dropping evil ICMP tinygram.\n");
2008 par->hotdrop = true; 1846 par->hotdrop = true;
2009 return false; 1847 return false;
2010 } 1848 }
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index e3c46e8e2762..ae1a71a97132 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -360,7 +360,7 @@ static int ipv4_init_net(struct net *net)
360 360
361 in->ctl_table[0].data = &nf_conntrack_max; 361 in->ctl_table[0].data = &nf_conntrack_max;
362 in->ctl_table[1].data = &net->ct.count; 362 in->ctl_table[1].data = &net->ct.count;
363 in->ctl_table[2].data = &net->ct.htable_size; 363 in->ctl_table[2].data = &nf_conntrack_htable_size;
364 in->ctl_table[3].data = &net->ct.sysctl_checksum; 364 in->ctl_table[3].data = &net->ct.sysctl_checksum;
365 in->ctl_table[4].data = &net->ct.sysctl_log_invalid; 365 in->ctl_table[4].data = &net->ct.sysctl_log_invalid;
366#endif 366#endif
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index f0dfe92a00d6..c6f3c406f707 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -31,15 +31,14 @@ struct ct_iter_state {
31 31
32static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) 32static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
33{ 33{
34 struct net *net = seq_file_net(seq);
35 struct ct_iter_state *st = seq->private; 34 struct ct_iter_state *st = seq->private;
36 struct hlist_nulls_node *n; 35 struct hlist_nulls_node *n;
37 36
38 for (st->bucket = 0; 37 for (st->bucket = 0;
39 st->bucket < net->ct.htable_size; 38 st->bucket < nf_conntrack_htable_size;
40 st->bucket++) { 39 st->bucket++) {
41 n = rcu_dereference( 40 n = rcu_dereference(
42 hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); 41 hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
43 if (!is_a_nulls(n)) 42 if (!is_a_nulls(n))
44 return n; 43 return n;
45 } 44 }
@@ -49,17 +48,16 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
49static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, 48static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
50 struct hlist_nulls_node *head) 49 struct hlist_nulls_node *head)
51{ 50{
52 struct net *net = seq_file_net(seq);
53 struct ct_iter_state *st = seq->private; 51 struct ct_iter_state *st = seq->private;
54 52
55 head = rcu_dereference(hlist_nulls_next_rcu(head)); 53 head = rcu_dereference(hlist_nulls_next_rcu(head));
56 while (is_a_nulls(head)) { 54 while (is_a_nulls(head)) {
57 if (likely(get_nulls_value(head) == st->bucket)) { 55 if (likely(get_nulls_value(head) == st->bucket)) {
58 if (++st->bucket >= net->ct.htable_size) 56 if (++st->bucket >= nf_conntrack_htable_size)
59 return NULL; 57 return NULL;
60 } 58 }
61 head = rcu_dereference( 59 head = rcu_dereference(
62 hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); 60 hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
63 } 61 }
64 return head; 62 return head;
65} 63}
@@ -114,6 +112,23 @@ static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
114} 112}
115#endif 113#endif
116 114
115static bool ct_seq_should_skip(const struct nf_conn *ct,
116 const struct net *net,
117 const struct nf_conntrack_tuple_hash *hash)
118{
119 /* we only want to print DIR_ORIGINAL */
120 if (NF_CT_DIRECTION(hash))
121 return true;
122
123 if (nf_ct_l3num(ct) != AF_INET)
124 return true;
125
126 if (!net_eq(nf_ct_net(ct), net))
127 return true;
128
129 return false;
130}
131
117static int ct_seq_show(struct seq_file *s, void *v) 132static int ct_seq_show(struct seq_file *s, void *v)
118{ 133{
119 struct nf_conntrack_tuple_hash *hash = v; 134 struct nf_conntrack_tuple_hash *hash = v;
@@ -123,14 +138,15 @@ static int ct_seq_show(struct seq_file *s, void *v)
123 int ret = 0; 138 int ret = 0;
124 139
125 NF_CT_ASSERT(ct); 140 NF_CT_ASSERT(ct);
126 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) 141 if (ct_seq_should_skip(ct, seq_file_net(s), hash))
127 return 0; 142 return 0;
128 143
144 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
145 return 0;
129 146
130 /* we only want to print DIR_ORIGINAL */ 147 /* check if we raced w. object reuse */
131 if (NF_CT_DIRECTION(hash)) 148 if (!nf_ct_is_confirmed(ct) ||
132 goto release; 149 ct_seq_should_skip(ct, seq_file_net(s), hash))
133 if (nf_ct_l3num(ct) != AF_INET)
134 goto release; 150 goto release;
135 151
136 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); 152 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
@@ -220,13 +236,12 @@ struct ct_expect_iter_state {
220 236
221static struct hlist_node *ct_expect_get_first(struct seq_file *seq) 237static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
222{ 238{
223 struct net *net = seq_file_net(seq);
224 struct ct_expect_iter_state *st = seq->private; 239 struct ct_expect_iter_state *st = seq->private;
225 struct hlist_node *n; 240 struct hlist_node *n;
226 241
227 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { 242 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
228 n = rcu_dereference( 243 n = rcu_dereference(
229 hlist_first_rcu(&net->ct.expect_hash[st->bucket])); 244 hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
230 if (n) 245 if (n)
231 return n; 246 return n;
232 } 247 }
@@ -236,7 +251,6 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
236static struct hlist_node *ct_expect_get_next(struct seq_file *seq, 251static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
237 struct hlist_node *head) 252 struct hlist_node *head)
238{ 253{
239 struct net *net = seq_file_net(seq);
240 struct ct_expect_iter_state *st = seq->private; 254 struct ct_expect_iter_state *st = seq->private;
241 255
242 head = rcu_dereference(hlist_next_rcu(head)); 256 head = rcu_dereference(hlist_next_rcu(head));
@@ -244,7 +258,7 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
244 if (++st->bucket >= nf_ct_expect_hsize) 258 if (++st->bucket >= nf_ct_expect_hsize)
245 return NULL; 259 return NULL;
246 head = rcu_dereference( 260 head = rcu_dereference(
247 hlist_first_rcu(&net->ct.expect_hash[st->bucket])); 261 hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
248 } 262 }
249 return head; 263 return head;
250} 264}
@@ -285,6 +299,9 @@ static int exp_seq_show(struct seq_file *s, void *v)
285 299
286 exp = hlist_entry(n, struct nf_conntrack_expect, hnode); 300 exp = hlist_entry(n, struct nf_conntrack_expect, hnode);
287 301
302 if (!net_eq(nf_ct_net(exp->master), seq_file_net(s)))
303 return 0;
304
288 if (exp->tuple.src.l3num != AF_INET) 305 if (exp->tuple.src.l3num != AF_INET)
289 return 0; 306 return 0;
290 307
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 73e606c719ef..63e06c3dd319 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -39,34 +39,12 @@ MODULE_LICENSE("GPL");
39MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 39MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
40MODULE_DESCRIPTION("IPv6 packet filter"); 40MODULE_DESCRIPTION("IPv6 packet filter");
41 41
42/*#define DEBUG_IP_FIREWALL*/
43/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
44/*#define DEBUG_IP_FIREWALL_USER*/
45
46#ifdef DEBUG_IP_FIREWALL
47#define dprintf(format, args...) pr_info(format , ## args)
48#else
49#define dprintf(format, args...)
50#endif
51
52#ifdef DEBUG_IP_FIREWALL_USER
53#define duprintf(format, args...) pr_info(format , ## args)
54#else
55#define duprintf(format, args...)
56#endif
57
58#ifdef CONFIG_NETFILTER_DEBUG 42#ifdef CONFIG_NETFILTER_DEBUG
59#define IP_NF_ASSERT(x) WARN_ON(!(x)) 43#define IP_NF_ASSERT(x) WARN_ON(!(x))
60#else 44#else
61#define IP_NF_ASSERT(x) 45#define IP_NF_ASSERT(x)
62#endif 46#endif
63 47
64#if 0
65/* All the better to debug you with... */
66#define static
67#define inline
68#endif
69
70void *ip6t_alloc_initial_table(const struct xt_table *info) 48void *ip6t_alloc_initial_table(const struct xt_table *info)
71{ 49{
72 return xt_alloc_initial_table(ip6t, IP6T); 50 return xt_alloc_initial_table(ip6t, IP6T);
@@ -100,35 +78,18 @@ ip6_packet_match(const struct sk_buff *skb,
100 if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, 78 if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk,
101 &ip6info->src), IP6T_INV_SRCIP) || 79 &ip6info->src), IP6T_INV_SRCIP) ||
102 FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk, 80 FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk,
103 &ip6info->dst), IP6T_INV_DSTIP)) { 81 &ip6info->dst), IP6T_INV_DSTIP))
104 dprintf("Source or dest mismatch.\n");
105/*
106 dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
107 ipinfo->smsk.s_addr, ipinfo->src.s_addr,
108 ipinfo->invflags & IP6T_INV_SRCIP ? " (INV)" : "");
109 dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
110 ipinfo->dmsk.s_addr, ipinfo->dst.s_addr,
111 ipinfo->invflags & IP6T_INV_DSTIP ? " (INV)" : "");*/
112 return false; 82 return false;
113 }
114 83
115 ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask); 84 ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask);
116 85
117 if (FWINV(ret != 0, IP6T_INV_VIA_IN)) { 86 if (FWINV(ret != 0, IP6T_INV_VIA_IN))
118 dprintf("VIA in mismatch (%s vs %s).%s\n",
119 indev, ip6info->iniface,
120 ip6info->invflags & IP6T_INV_VIA_IN ? " (INV)" : "");
121 return false; 87 return false;
122 }
123 88
124 ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask); 89 ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask);
125 90
126 if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) { 91 if (FWINV(ret != 0, IP6T_INV_VIA_OUT))
127 dprintf("VIA out mismatch (%s vs %s).%s\n",
128 outdev, ip6info->outiface,
129 ip6info->invflags & IP6T_INV_VIA_OUT ? " (INV)" : "");
130 return false; 92 return false;
131 }
132 93
133/* ... might want to do something with class and flowlabel here ... */ 94/* ... might want to do something with class and flowlabel here ... */
134 95
@@ -145,11 +106,6 @@ ip6_packet_match(const struct sk_buff *skb,
145 } 106 }
146 *fragoff = _frag_off; 107 *fragoff = _frag_off;
147 108
148 dprintf("Packet protocol %hi ?= %s%hi.\n",
149 protohdr,
150 ip6info->invflags & IP6T_INV_PROTO ? "!":"",
151 ip6info->proto);
152
153 if (ip6info->proto == protohdr) { 109 if (ip6info->proto == protohdr) {
154 if (ip6info->invflags & IP6T_INV_PROTO) 110 if (ip6info->invflags & IP6T_INV_PROTO)
155 return false; 111 return false;
@@ -169,16 +125,11 @@ ip6_packet_match(const struct sk_buff *skb,
169static bool 125static bool
170ip6_checkentry(const struct ip6t_ip6 *ipv6) 126ip6_checkentry(const struct ip6t_ip6 *ipv6)
171{ 127{
172 if (ipv6->flags & ~IP6T_F_MASK) { 128 if (ipv6->flags & ~IP6T_F_MASK)
173 duprintf("Unknown flag bits set: %08X\n",
174 ipv6->flags & ~IP6T_F_MASK);
175 return false; 129 return false;
176 } 130 if (ipv6->invflags & ~IP6T_INV_MASK)
177 if (ipv6->invflags & ~IP6T_INV_MASK) {
178 duprintf("Unknown invflag bits set: %08X\n",
179 ipv6->invflags & ~IP6T_INV_MASK);
180 return false; 131 return false;
181 } 132
182 return true; 133 return true;
183} 134}
184 135
@@ -446,13 +397,9 @@ ip6t_do_table(struct sk_buff *skb,
446 xt_write_recseq_end(addend); 397 xt_write_recseq_end(addend);
447 local_bh_enable(); 398 local_bh_enable();
448 399
449#ifdef DEBUG_ALLOW_ALL
450 return NF_ACCEPT;
451#else
452 if (acpar.hotdrop) 400 if (acpar.hotdrop)
453 return NF_DROP; 401 return NF_DROP;
454 else return verdict; 402 else return verdict;
455#endif
456} 403}
457 404
458static bool find_jump_target(const struct xt_table_info *t, 405static bool find_jump_target(const struct xt_table_info *t,
@@ -492,11 +439,9 @@ mark_source_chains(const struct xt_table_info *newinfo,
492 = (void *)ip6t_get_target_c(e); 439 = (void *)ip6t_get_target_c(e);
493 int visited = e->comefrom & (1 << hook); 440 int visited = e->comefrom & (1 << hook);
494 441
495 if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { 442 if (e->comefrom & (1 << NF_INET_NUMHOOKS))
496 pr_err("iptables: loop hook %u pos %u %08X.\n",
497 hook, pos, e->comefrom);
498 return 0; 443 return 0;
499 } 444
500 e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); 445 e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
501 446
502 /* Unconditional return/END. */ 447 /* Unconditional return/END. */
@@ -508,26 +453,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
508 453
509 if ((strcmp(t->target.u.user.name, 454 if ((strcmp(t->target.u.user.name,
510 XT_STANDARD_TARGET) == 0) && 455 XT_STANDARD_TARGET) == 0) &&
511 t->verdict < -NF_MAX_VERDICT - 1) { 456 t->verdict < -NF_MAX_VERDICT - 1)
512 duprintf("mark_source_chains: bad "
513 "negative verdict (%i)\n",
514 t->verdict);
515 return 0; 457 return 0;
516 }
517 458
518 /* Return: backtrack through the last 459 /* Return: backtrack through the last
519 big jump. */ 460 big jump. */
520 do { 461 do {
521 e->comefrom ^= (1<<NF_INET_NUMHOOKS); 462 e->comefrom ^= (1<<NF_INET_NUMHOOKS);
522#ifdef DEBUG_IP_FIREWALL_USER
523 if (e->comefrom
524 & (1 << NF_INET_NUMHOOKS)) {
525 duprintf("Back unset "
526 "on hook %u "
527 "rule %u\n",
528 hook, pos);
529 }
530#endif
531 oldpos = pos; 463 oldpos = pos;
532 pos = e->counters.pcnt; 464 pos = e->counters.pcnt;
533 e->counters.pcnt = 0; 465 e->counters.pcnt = 0;
@@ -555,8 +487,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
555 XT_STANDARD_TARGET) == 0 && 487 XT_STANDARD_TARGET) == 0 &&
556 newpos >= 0) { 488 newpos >= 0) {
557 /* This a jump; chase it. */ 489 /* This a jump; chase it. */
558 duprintf("Jump rule %u -> %u\n",
559 pos, newpos);
560 e = (struct ip6t_entry *) 490 e = (struct ip6t_entry *)
561 (entry0 + newpos); 491 (entry0 + newpos);
562 if (!find_jump_target(newinfo, e)) 492 if (!find_jump_target(newinfo, e))
@@ -573,8 +503,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
573 pos = newpos; 503 pos = newpos;
574 } 504 }
575 } 505 }
576next: 506next: ;
577 duprintf("Finished chain %u\n", hook);
578 } 507 }
579 return 1; 508 return 1;
580} 509}
@@ -595,19 +524,12 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net)
595static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) 524static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
596{ 525{
597 const struct ip6t_ip6 *ipv6 = par->entryinfo; 526 const struct ip6t_ip6 *ipv6 = par->entryinfo;
598 int ret;
599 527
600 par->match = m->u.kernel.match; 528 par->match = m->u.kernel.match;
601 par->matchinfo = m->data; 529 par->matchinfo = m->data;
602 530
603 ret = xt_check_match(par, m->u.match_size - sizeof(*m), 531 return xt_check_match(par, m->u.match_size - sizeof(*m),
604 ipv6->proto, ipv6->invflags & IP6T_INV_PROTO); 532 ipv6->proto, ipv6->invflags & IP6T_INV_PROTO);
605 if (ret < 0) {
606 duprintf("ip_tables: check failed for `%s'.\n",
607 par.match->name);
608 return ret;
609 }
610 return 0;
611} 533}
612 534
613static int 535static int
@@ -618,10 +540,9 @@ find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
618 540
619 match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name, 541 match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
620 m->u.user.revision); 542 m->u.user.revision);
621 if (IS_ERR(match)) { 543 if (IS_ERR(match))
622 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
623 return PTR_ERR(match); 544 return PTR_ERR(match);
624 } 545
625 m->u.kernel.match = match; 546 m->u.kernel.match = match;
626 547
627 ret = check_match(m, par); 548 ret = check_match(m, par);
@@ -646,17 +567,11 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
646 .hook_mask = e->comefrom, 567 .hook_mask = e->comefrom,
647 .family = NFPROTO_IPV6, 568 .family = NFPROTO_IPV6,
648 }; 569 };
649 int ret;
650 570
651 t = ip6t_get_target(e); 571 t = ip6t_get_target(e);
652 ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 572 return xt_check_target(&par, t->u.target_size - sizeof(*t),
653 e->ipv6.proto, e->ipv6.invflags & IP6T_INV_PROTO); 573 e->ipv6.proto,
654 if (ret < 0) { 574 e->ipv6.invflags & IP6T_INV_PROTO);
655 duprintf("ip_tables: check failed for `%s'.\n",
656 t->u.kernel.target->name);
657 return ret;
658 }
659 return 0;
660} 575}
661 576
662static int 577static int
@@ -669,10 +584,12 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
669 unsigned int j; 584 unsigned int j;
670 struct xt_mtchk_param mtpar; 585 struct xt_mtchk_param mtpar;
671 struct xt_entry_match *ematch; 586 struct xt_entry_match *ematch;
587 unsigned long pcnt;
672 588
673 e->counters.pcnt = xt_percpu_counter_alloc(); 589 pcnt = xt_percpu_counter_alloc();
674 if (IS_ERR_VALUE(e->counters.pcnt)) 590 if (IS_ERR_VALUE(pcnt))
675 return -ENOMEM; 591 return -ENOMEM;
592 e->counters.pcnt = pcnt;
676 593
677 j = 0; 594 j = 0;
678 mtpar.net = net; 595 mtpar.net = net;
@@ -691,7 +608,6 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
691 target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name, 608 target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
692 t->u.user.revision); 609 t->u.user.revision);
693 if (IS_ERR(target)) { 610 if (IS_ERR(target)) {
694 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
695 ret = PTR_ERR(target); 611 ret = PTR_ERR(target);
696 goto cleanup_matches; 612 goto cleanup_matches;
697 } 613 }
@@ -744,17 +660,12 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
744 660
745 if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || 661 if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 ||
746 (unsigned char *)e + sizeof(struct ip6t_entry) >= limit || 662 (unsigned char *)e + sizeof(struct ip6t_entry) >= limit ||
747 (unsigned char *)e + e->next_offset > limit) { 663 (unsigned char *)e + e->next_offset > limit)
748 duprintf("Bad offset %p\n", e);
749 return -EINVAL; 664 return -EINVAL;
750 }
751 665
752 if (e->next_offset 666 if (e->next_offset
753 < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target)) { 667 < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target))
754 duprintf("checking: element %p size %u\n",
755 e, e->next_offset);
756 return -EINVAL; 668 return -EINVAL;
757 }
758 669
759 if (!ip6_checkentry(&e->ipv6)) 670 if (!ip6_checkentry(&e->ipv6))
760 return -EINVAL; 671 return -EINVAL;
@@ -771,12 +682,9 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
771 if ((unsigned char *)e - base == hook_entries[h]) 682 if ((unsigned char *)e - base == hook_entries[h])
772 newinfo->hook_entry[h] = hook_entries[h]; 683 newinfo->hook_entry[h] = hook_entries[h];
773 if ((unsigned char *)e - base == underflows[h]) { 684 if ((unsigned char *)e - base == underflows[h]) {
774 if (!check_underflow(e)) { 685 if (!check_underflow(e))
775 pr_debug("Underflows must be unconditional and "
776 "use the STANDARD target with "
777 "ACCEPT/DROP\n");
778 return -EINVAL; 686 return -EINVAL;
779 } 687
780 newinfo->underflow[h] = underflows[h]; 688 newinfo->underflow[h] = underflows[h];
781 } 689 }
782 } 690 }
@@ -828,7 +736,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
828 newinfo->underflow[i] = 0xFFFFFFFF; 736 newinfo->underflow[i] = 0xFFFFFFFF;
829 } 737 }
830 738
831 duprintf("translate_table: size %u\n", newinfo->size);
832 i = 0; 739 i = 0;
833 /* Walk through entries, checking offsets. */ 740 /* Walk through entries, checking offsets. */
834 xt_entry_foreach(iter, entry0, newinfo->size) { 741 xt_entry_foreach(iter, entry0, newinfo->size) {
@@ -845,27 +752,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
845 ++newinfo->stacksize; 752 ++newinfo->stacksize;
846 } 753 }
847 754
848 if (i != repl->num_entries) { 755 if (i != repl->num_entries)
849 duprintf("translate_table: %u not %u entries\n",
850 i, repl->num_entries);
851 return -EINVAL; 756 return -EINVAL;
852 }
853 757
854 /* Check hooks all assigned */ 758 /* Check hooks all assigned */
855 for (i = 0; i < NF_INET_NUMHOOKS; i++) { 759 for (i = 0; i < NF_INET_NUMHOOKS; i++) {
856 /* Only hooks which are valid */ 760 /* Only hooks which are valid */
857 if (!(repl->valid_hooks & (1 << i))) 761 if (!(repl->valid_hooks & (1 << i)))
858 continue; 762 continue;
859 if (newinfo->hook_entry[i] == 0xFFFFFFFF) { 763 if (newinfo->hook_entry[i] == 0xFFFFFFFF)
860 duprintf("Invalid hook entry %u %u\n",
861 i, repl->hook_entry[i]);
862 return -EINVAL; 764 return -EINVAL;
863 } 765 if (newinfo->underflow[i] == 0xFFFFFFFF)
864 if (newinfo->underflow[i] == 0xFFFFFFFF) {
865 duprintf("Invalid underflow %u %u\n",
866 i, repl->underflow[i]);
867 return -EINVAL; 766 return -EINVAL;
868 }
869 } 767 }
870 768
871 if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) 769 if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
@@ -1093,11 +991,8 @@ static int get_info(struct net *net, void __user *user,
1093 struct xt_table *t; 991 struct xt_table *t;
1094 int ret; 992 int ret;
1095 993
1096 if (*len != sizeof(struct ip6t_getinfo)) { 994 if (*len != sizeof(struct ip6t_getinfo))
1097 duprintf("length %u != %zu\n", *len,
1098 sizeof(struct ip6t_getinfo));
1099 return -EINVAL; 995 return -EINVAL;
1100 }
1101 996
1102 if (copy_from_user(name, user, sizeof(name)) != 0) 997 if (copy_from_user(name, user, sizeof(name)) != 0)
1103 return -EFAULT; 998 return -EFAULT;
@@ -1155,31 +1050,24 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
1155 struct ip6t_get_entries get; 1050 struct ip6t_get_entries get;
1156 struct xt_table *t; 1051 struct xt_table *t;
1157 1052
1158 if (*len < sizeof(get)) { 1053 if (*len < sizeof(get))
1159 duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
1160 return -EINVAL; 1054 return -EINVAL;
1161 }
1162 if (copy_from_user(&get, uptr, sizeof(get)) != 0) 1055 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1163 return -EFAULT; 1056 return -EFAULT;
1164 if (*len != sizeof(struct ip6t_get_entries) + get.size) { 1057 if (*len != sizeof(struct ip6t_get_entries) + get.size)
1165 duprintf("get_entries: %u != %zu\n",
1166 *len, sizeof(get) + get.size);
1167 return -EINVAL; 1058 return -EINVAL;
1168 } 1059
1169 get.name[sizeof(get.name) - 1] = '\0'; 1060 get.name[sizeof(get.name) - 1] = '\0';
1170 1061
1171 t = xt_find_table_lock(net, AF_INET6, get.name); 1062 t = xt_find_table_lock(net, AF_INET6, get.name);
1172 if (!IS_ERR_OR_NULL(t)) { 1063 if (!IS_ERR_OR_NULL(t)) {
1173 struct xt_table_info *private = t->private; 1064 struct xt_table_info *private = t->private;
1174 duprintf("t->private->number = %u\n", private->number);
1175 if (get.size == private->size) 1065 if (get.size == private->size)
1176 ret = copy_entries_to_user(private->size, 1066 ret = copy_entries_to_user(private->size,
1177 t, uptr->entrytable); 1067 t, uptr->entrytable);
1178 else { 1068 else
1179 duprintf("get_entries: I've got %u not %u!\n",
1180 private->size, get.size);
1181 ret = -EAGAIN; 1069 ret = -EAGAIN;
1182 } 1070
1183 module_put(t->me); 1071 module_put(t->me);
1184 xt_table_unlock(t); 1072 xt_table_unlock(t);
1185 } else 1073 } else
@@ -1215,8 +1103,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1215 1103
1216 /* You lied! */ 1104 /* You lied! */
1217 if (valid_hooks != t->valid_hooks) { 1105 if (valid_hooks != t->valid_hooks) {
1218 duprintf("Valid hook crap: %08X vs %08X\n",
1219 valid_hooks, t->valid_hooks);
1220 ret = -EINVAL; 1106 ret = -EINVAL;
1221 goto put_module; 1107 goto put_module;
1222 } 1108 }
@@ -1226,8 +1112,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1226 goto put_module; 1112 goto put_module;
1227 1113
1228 /* Update module usage count based on number of rules */ 1114 /* Update module usage count based on number of rules */
1229 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1230 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1231 if ((oldinfo->number > oldinfo->initial_entries) || 1115 if ((oldinfo->number > oldinfo->initial_entries) ||
1232 (newinfo->number <= oldinfo->initial_entries)) 1116 (newinfo->number <= oldinfo->initial_entries))
1233 module_put(t->me); 1117 module_put(t->me);
@@ -1296,8 +1180,6 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
1296 if (ret != 0) 1180 if (ret != 0)
1297 goto free_newinfo; 1181 goto free_newinfo;
1298 1182
1299 duprintf("ip_tables: Translated table\n");
1300
1301 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, 1183 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1302 tmp.num_counters, tmp.counters); 1184 tmp.num_counters, tmp.counters);
1303 if (ret) 1185 if (ret)
@@ -1422,11 +1304,9 @@ compat_find_calc_match(struct xt_entry_match *m,
1422 1304
1423 match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name, 1305 match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
1424 m->u.user.revision); 1306 m->u.user.revision);
1425 if (IS_ERR(match)) { 1307 if (IS_ERR(match))
1426 duprintf("compat_check_calc_match: `%s' not found\n",
1427 m->u.user.name);
1428 return PTR_ERR(match); 1308 return PTR_ERR(match);
1429 } 1309
1430 m->u.kernel.match = match; 1310 m->u.kernel.match = match;
1431 *size += xt_compat_match_offset(match); 1311 *size += xt_compat_match_offset(match);
1432 return 0; 1312 return 0;
@@ -1458,20 +1338,14 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
1458 unsigned int j; 1338 unsigned int j;
1459 int ret, off; 1339 int ret, off;
1460 1340
1461 duprintf("check_compat_entry_size_and_hooks %p\n", e);
1462 if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || 1341 if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 ||
1463 (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit || 1342 (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit ||
1464 (unsigned char *)e + e->next_offset > limit) { 1343 (unsigned char *)e + e->next_offset > limit)
1465 duprintf("Bad offset %p, limit = %p\n", e, limit);
1466 return -EINVAL; 1344 return -EINVAL;
1467 }
1468 1345
1469 if (e->next_offset < sizeof(struct compat_ip6t_entry) + 1346 if (e->next_offset < sizeof(struct compat_ip6t_entry) +
1470 sizeof(struct compat_xt_entry_target)) { 1347 sizeof(struct compat_xt_entry_target))
1471 duprintf("checking: element %p size %u\n",
1472 e, e->next_offset);
1473 return -EINVAL; 1348 return -EINVAL;
1474 }
1475 1349
1476 if (!ip6_checkentry(&e->ipv6)) 1350 if (!ip6_checkentry(&e->ipv6))
1477 return -EINVAL; 1351 return -EINVAL;
@@ -1495,8 +1369,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
1495 target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name, 1369 target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
1496 t->u.user.revision); 1370 t->u.user.revision);
1497 if (IS_ERR(target)) { 1371 if (IS_ERR(target)) {
1498 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1499 t->u.user.name);
1500 ret = PTR_ERR(target); 1372 ret = PTR_ERR(target);
1501 goto release_matches; 1373 goto release_matches;
1502 } 1374 }
@@ -1575,7 +1447,6 @@ translate_compat_table(struct net *net,
1575 size = compatr->size; 1447 size = compatr->size;
1576 info->number = compatr->num_entries; 1448 info->number = compatr->num_entries;
1577 1449
1578 duprintf("translate_compat_table: size %u\n", info->size);
1579 j = 0; 1450 j = 0;
1580 xt_compat_lock(AF_INET6); 1451 xt_compat_lock(AF_INET6);
1581 xt_compat_init_offsets(AF_INET6, compatr->num_entries); 1452 xt_compat_init_offsets(AF_INET6, compatr->num_entries);
@@ -1590,11 +1461,8 @@ translate_compat_table(struct net *net,
1590 } 1461 }
1591 1462
1592 ret = -EINVAL; 1463 ret = -EINVAL;
1593 if (j != compatr->num_entries) { 1464 if (j != compatr->num_entries)
1594 duprintf("translate_compat_table: %u not %u entries\n",
1595 j, compatr->num_entries);
1596 goto out_unlock; 1465 goto out_unlock;
1597 }
1598 1466
1599 ret = -ENOMEM; 1467 ret = -ENOMEM;
1600 newinfo = xt_alloc_table_info(size); 1468 newinfo = xt_alloc_table_info(size);
@@ -1685,8 +1553,6 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
1685 if (ret != 0) 1553 if (ret != 0)
1686 goto free_newinfo; 1554 goto free_newinfo;
1687 1555
1688 duprintf("compat_do_replace: Translated table\n");
1689
1690 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, 1556 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1691 tmp.num_counters, compat_ptr(tmp.counters)); 1557 tmp.num_counters, compat_ptr(tmp.counters));
1692 if (ret) 1558 if (ret)
@@ -1720,7 +1586,6 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
1720 break; 1586 break;
1721 1587
1722 default: 1588 default:
1723 duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd);
1724 ret = -EINVAL; 1589 ret = -EINVAL;
1725 } 1590 }
1726 1591
@@ -1770,19 +1635,15 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
1770 struct compat_ip6t_get_entries get; 1635 struct compat_ip6t_get_entries get;
1771 struct xt_table *t; 1636 struct xt_table *t;
1772 1637
1773 if (*len < sizeof(get)) { 1638 if (*len < sizeof(get))
1774 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1775 return -EINVAL; 1639 return -EINVAL;
1776 }
1777 1640
1778 if (copy_from_user(&get, uptr, sizeof(get)) != 0) 1641 if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1779 return -EFAULT; 1642 return -EFAULT;
1780 1643
1781 if (*len != sizeof(struct compat_ip6t_get_entries) + get.size) { 1644 if (*len != sizeof(struct compat_ip6t_get_entries) + get.size)
1782 duprintf("compat_get_entries: %u != %zu\n",
1783 *len, sizeof(get) + get.size);
1784 return -EINVAL; 1645 return -EINVAL;
1785 } 1646
1786 get.name[sizeof(get.name) - 1] = '\0'; 1647 get.name[sizeof(get.name) - 1] = '\0';
1787 1648
1788 xt_compat_lock(AF_INET6); 1649 xt_compat_lock(AF_INET6);
@@ -1790,16 +1651,13 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
1790 if (!IS_ERR_OR_NULL(t)) { 1651 if (!IS_ERR_OR_NULL(t)) {
1791 const struct xt_table_info *private = t->private; 1652 const struct xt_table_info *private = t->private;
1792 struct xt_table_info info; 1653 struct xt_table_info info;
1793 duprintf("t->private->number = %u\n", private->number);
1794 ret = compat_table_info(private, &info); 1654 ret = compat_table_info(private, &info);
1795 if (!ret && get.size == info.size) { 1655 if (!ret && get.size == info.size)
1796 ret = compat_copy_entries_to_user(private->size, 1656 ret = compat_copy_entries_to_user(private->size,
1797 t, uptr->entrytable); 1657 t, uptr->entrytable);
1798 } else if (!ret) { 1658 else if (!ret)
1799 duprintf("compat_get_entries: I've got %u not %u!\n",
1800 private->size, get.size);
1801 ret = -EAGAIN; 1659 ret = -EAGAIN;
1802 } 1660
1803 xt_compat_flush_offsets(AF_INET6); 1661 xt_compat_flush_offsets(AF_INET6);
1804 module_put(t->me); 1662 module_put(t->me);
1805 xt_table_unlock(t); 1663 xt_table_unlock(t);
@@ -1852,7 +1710,6 @@ do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1852 break; 1710 break;
1853 1711
1854 default: 1712 default:
1855 duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd);
1856 ret = -EINVAL; 1713 ret = -EINVAL;
1857 } 1714 }
1858 1715
@@ -1904,7 +1761,6 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1904 } 1761 }
1905 1762
1906 default: 1763 default:
1907 duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd);
1908 ret = -EINVAL; 1764 ret = -EINVAL;
1909 } 1765 }
1910 1766
@@ -2006,7 +1862,6 @@ icmp6_match(const struct sk_buff *skb, struct xt_action_param *par)
2006 /* We've been asked to examine this packet, and we 1862 /* We've been asked to examine this packet, and we
2007 * can't. Hence, no choice but to drop. 1863 * can't. Hence, no choice but to drop.
2008 */ 1864 */
2009 duprintf("Dropping evil ICMP tinygram.\n");
2010 par->hotdrop = true; 1865 par->hotdrop = true;
2011 return false; 1866 return false;
2012 } 1867 }
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 5d778dd11f66..06bed74cf5ee 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -60,7 +60,7 @@ synproxy_send_tcp(struct net *net,
60 fl6.fl6_dport = nth->dest; 60 fl6.fl6_dport = nth->dest;
61 security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6)); 61 security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6));
62 dst = ip6_route_output(net, NULL, &fl6); 62 dst = ip6_route_output(net, NULL, &fl6);
63 if (dst == NULL || dst->error) { 63 if (dst->error) {
64 dst_release(dst); 64 dst_release(dst);
65 goto free_nskb; 65 goto free_nskb;
66 } 66 }
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 85ca189bdc3d..2cb3c626cd43 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -104,6 +104,7 @@ static inline void ct_write_unlock_bh(unsigned int key)
104 spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); 104 spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
105} 105}
106 106
107static void ip_vs_conn_expire(unsigned long data);
107 108
108/* 109/*
109 * Returns hash value for IPVS connection entry 110 * Returns hash value for IPVS connection entry
@@ -453,10 +454,16 @@ ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af,
453} 454}
454EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); 455EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
455 456
457static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp)
458{
459 __ip_vs_conn_put(cp);
460 ip_vs_conn_expire((unsigned long)cp);
461}
462
456/* 463/*
457 * Put back the conn and restart its timer with its timeout 464 * Put back the conn and restart its timer with its timeout
458 */ 465 */
459void ip_vs_conn_put(struct ip_vs_conn *cp) 466static void __ip_vs_conn_put_timer(struct ip_vs_conn *cp)
460{ 467{
461 unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ? 468 unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ?
462 0 : cp->timeout; 469 0 : cp->timeout;
@@ -465,6 +472,16 @@ void ip_vs_conn_put(struct ip_vs_conn *cp)
465 __ip_vs_conn_put(cp); 472 __ip_vs_conn_put(cp);
466} 473}
467 474
475void ip_vs_conn_put(struct ip_vs_conn *cp)
476{
477 if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) &&
478 (atomic_read(&cp->refcnt) == 1) &&
479 !timer_pending(&cp->timer))
480 /* expire connection immediately */
481 __ip_vs_conn_put_notimer(cp);
482 else
483 __ip_vs_conn_put_timer(cp);
484}
468 485
469/* 486/*
470 * Fill a no_client_port connection with a client port number 487 * Fill a no_client_port connection with a client port number
@@ -819,7 +836,8 @@ static void ip_vs_conn_expire(unsigned long data)
819 if (cp->control) 836 if (cp->control)
820 ip_vs_control_del(cp); 837 ip_vs_control_del(cp);
821 838
822 if (cp->flags & IP_VS_CONN_F_NFCT) { 839 if ((cp->flags & IP_VS_CONN_F_NFCT) &&
840 !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) {
823 /* Do not access conntracks during subsys cleanup 841 /* Do not access conntracks during subsys cleanup
824 * because nf_conntrack_find_get can not be used after 842 * because nf_conntrack_find_get can not be used after
825 * conntrack cleanup for the net. 843 * conntrack cleanup for the net.
@@ -834,7 +852,10 @@ static void ip_vs_conn_expire(unsigned long data)
834 ip_vs_unbind_dest(cp); 852 ip_vs_unbind_dest(cp);
835 if (cp->flags & IP_VS_CONN_F_NO_CPORT) 853 if (cp->flags & IP_VS_CONN_F_NO_CPORT)
836 atomic_dec(&ip_vs_conn_no_cport_cnt); 854 atomic_dec(&ip_vs_conn_no_cport_cnt);
837 call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free); 855 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
856 ip_vs_conn_rcu_free(&cp->rcu_head);
857 else
858 call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free);
838 atomic_dec(&ipvs->conn_count); 859 atomic_dec(&ipvs->conn_count);
839 return; 860 return;
840 } 861 }
@@ -850,7 +871,7 @@ static void ip_vs_conn_expire(unsigned long data)
850 if (ipvs->sync_state & IP_VS_STATE_MASTER) 871 if (ipvs->sync_state & IP_VS_STATE_MASTER)
851 ip_vs_sync_conn(ipvs, cp, sysctl_sync_threshold(ipvs)); 872 ip_vs_sync_conn(ipvs, cp, sysctl_sync_threshold(ipvs));
852 873
853 ip_vs_conn_put(cp); 874 __ip_vs_conn_put_timer(cp);
854} 875}
855 876
856/* Modify timer, so that it expires as soon as possible. 877/* Modify timer, so that it expires as soon as possible.
@@ -1240,6 +1261,16 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
1240 return 1; 1261 return 1;
1241} 1262}
1242 1263
1264static inline bool ip_vs_conn_ops_mode(struct ip_vs_conn *cp)
1265{
1266 struct ip_vs_service *svc;
1267
1268 if (!cp->dest)
1269 return false;
1270 svc = rcu_dereference(cp->dest->svc);
1271 return svc && (svc->flags & IP_VS_SVC_F_ONEPACKET);
1272}
1273
1243/* Called from keventd and must protect itself from softirqs */ 1274/* Called from keventd and must protect itself from softirqs */
1244void ip_vs_random_dropentry(struct netns_ipvs *ipvs) 1275void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
1245{ 1276{
@@ -1254,11 +1285,16 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
1254 unsigned int hash = prandom_u32() & ip_vs_conn_tab_mask; 1285 unsigned int hash = prandom_u32() & ip_vs_conn_tab_mask;
1255 1286
1256 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { 1287 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
1257 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
1258 /* connection template */
1259 continue;
1260 if (cp->ipvs != ipvs) 1288 if (cp->ipvs != ipvs)
1261 continue; 1289 continue;
1290 if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
1291 if (atomic_read(&cp->n_control) ||
1292 !ip_vs_conn_ops_mode(cp))
1293 continue;
1294 else
1295 /* connection template of OPS */
1296 goto try_drop;
1297 }
1262 if (cp->protocol == IPPROTO_TCP) { 1298 if (cp->protocol == IPPROTO_TCP) {
1263 switch(cp->state) { 1299 switch(cp->state) {
1264 case IP_VS_TCP_S_SYN_RECV: 1300 case IP_VS_TCP_S_SYN_RECV:
@@ -1286,6 +1322,7 @@ void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
1286 continue; 1322 continue;
1287 } 1323 }
1288 } else { 1324 } else {
1325try_drop:
1289 if (!todrop_entry(cp)) 1326 if (!todrop_entry(cp))
1290 continue; 1327 continue;
1291 } 1328 }
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b9a4082afa3a..1207f20d24e4 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -68,6 +68,7 @@ EXPORT_SYMBOL(ip_vs_conn_put);
68#ifdef CONFIG_IP_VS_DEBUG 68#ifdef CONFIG_IP_VS_DEBUG
69EXPORT_SYMBOL(ip_vs_get_debug_level); 69EXPORT_SYMBOL(ip_vs_get_debug_level);
70#endif 70#endif
71EXPORT_SYMBOL(ip_vs_new_conn_out);
71 72
72static int ip_vs_net_id __read_mostly; 73static int ip_vs_net_id __read_mostly;
73/* netns cnt used for uniqueness */ 74/* netns cnt used for uniqueness */
@@ -611,7 +612,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
611 ret = cp->packet_xmit(skb, cp, pd->pp, iph); 612 ret = cp->packet_xmit(skb, cp, pd->pp, iph);
612 /* do not touch skb anymore */ 613 /* do not touch skb anymore */
613 614
614 atomic_inc(&cp->in_pkts); 615 if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && cp->control)
616 atomic_inc(&cp->control->in_pkts);
617 else
618 atomic_inc(&cp->in_pkts);
615 ip_vs_conn_put(cp); 619 ip_vs_conn_put(cp);
616 return ret; 620 return ret;
617 } 621 }
@@ -1100,6 +1104,143 @@ static inline bool is_new_conn_expected(const struct ip_vs_conn *cp,
1100 } 1104 }
1101} 1105}
1102 1106
1107/* Generic function to create new connections for outgoing RS packets
1108 *
1109 * Pre-requisites for successful connection creation:
1110 * 1) Virtual Service is NOT fwmark based:
1111 * In fwmark-VS actual vaddr and vport are unknown to IPVS
1112 * 2) Real Server and Virtual Service were NOT configured without port:
1113 * This is to allow match of different VS to the same RS ip-addr
1114 */
1115struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc,
1116 struct ip_vs_dest *dest,
1117 struct sk_buff *skb,
1118 const struct ip_vs_iphdr *iph,
1119 __be16 dport,
1120 __be16 cport)
1121{
1122 struct ip_vs_conn_param param;
1123 struct ip_vs_conn *ct = NULL, *cp = NULL;
1124 const union nf_inet_addr *vaddr, *daddr, *caddr;
1125 union nf_inet_addr snet;
1126 __be16 vport;
1127 unsigned int flags;
1128
1129 EnterFunction(12);
1130 vaddr = &svc->addr;
1131 vport = svc->port;
1132 daddr = &iph->saddr;
1133 caddr = &iph->daddr;
1134
1135 /* check pre-requisites are satisfied */
1136 if (svc->fwmark)
1137 return NULL;
1138 if (!vport || !dport)
1139 return NULL;
1140
1141 /* for persistent service first create connection template */
1142 if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
1143 /* apply netmask the same way ingress-side does */
1144#ifdef CONFIG_IP_VS_IPV6
1145 if (svc->af == AF_INET6)
1146 ipv6_addr_prefix(&snet.in6, &caddr->in6,
1147 (__force __u32)svc->netmask);
1148 else
1149#endif
1150 snet.ip = caddr->ip & svc->netmask;
1151 /* fill params and create template if not existent */
1152 if (ip_vs_conn_fill_param_persist(svc, skb, iph->protocol,
1153 &snet, 0, vaddr,
1154 vport, &param) < 0)
1155 return NULL;
1156 ct = ip_vs_ct_in_get(&param);
1157 if (!ct) {
1158 ct = ip_vs_conn_new(&param, dest->af, daddr, dport,
1159 IP_VS_CONN_F_TEMPLATE, dest, 0);
1160 if (!ct) {
1161 kfree(param.pe_data);
1162 return NULL;
1163 }
1164 ct->timeout = svc->timeout;
1165 } else {
1166 kfree(param.pe_data);
1167 }
1168 }
1169
1170 /* connection flags */
1171 flags = ((svc->flags & IP_VS_SVC_F_ONEPACKET) &&
1172 iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0;
1173 /* create connection */
1174 ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
1175 caddr, cport, vaddr, vport, &param);
1176 cp = ip_vs_conn_new(&param, dest->af, daddr, dport, flags, dest, 0);
1177 if (!cp) {
1178 if (ct)
1179 ip_vs_conn_put(ct);
1180 return NULL;
1181 }
1182 if (ct) {
1183 ip_vs_control_add(cp, ct);
1184 ip_vs_conn_put(ct);
1185 }
1186 ip_vs_conn_stats(cp, svc);
1187
1188 /* return connection (will be used to handle outgoing packet) */
1189 IP_VS_DBG_BUF(6, "New connection RS-initiated:%c c:%s:%u v:%s:%u "
1190 "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
1191 ip_vs_fwd_tag(cp),
1192 IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
1193 IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
1194 IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
1195 cp->flags, atomic_read(&cp->refcnt));
1196 LeaveFunction(12);
1197 return cp;
1198}
1199
1200/* Handle outgoing packets which are considered requests initiated by
1201 * real servers, so that subsequent responses from external client can be
1202 * routed to the right real server.
1203 * Used also for outgoing responses in OPS mode.
1204 *
1205 * Connection management is handled by persistent-engine specific callback.
1206 */
1207static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum,
1208 struct netns_ipvs *ipvs,
1209 int af, struct sk_buff *skb,
1210 const struct ip_vs_iphdr *iph)
1211{
1212 struct ip_vs_dest *dest;
1213 struct ip_vs_conn *cp = NULL;
1214 __be16 _ports[2], *pptr;
1215
1216 if (hooknum == NF_INET_LOCAL_IN)
1217 return NULL;
1218
1219 pptr = frag_safe_skb_hp(skb, iph->len,
1220 sizeof(_ports), _ports, iph);
1221 if (!pptr)
1222 return NULL;
1223
1224 rcu_read_lock();
1225 dest = ip_vs_find_real_service(ipvs, af, iph->protocol,
1226 &iph->saddr, pptr[0]);
1227 if (dest) {
1228 struct ip_vs_service *svc;
1229 struct ip_vs_pe *pe;
1230
1231 svc = rcu_dereference(dest->svc);
1232 if (svc) {
1233 pe = rcu_dereference(svc->pe);
1234 if (pe && pe->conn_out)
1235 cp = pe->conn_out(svc, dest, skb, iph,
1236 pptr[0], pptr[1]);
1237 }
1238 }
1239 rcu_read_unlock();
1240
1241 return cp;
1242}
1243
1103/* Handle response packets: rewrite addresses and send away... 1244/* Handle response packets: rewrite addresses and send away...
1104 */ 1245 */
1105static unsigned int 1246static unsigned int
@@ -1245,6 +1386,22 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
1245 1386
1246 if (likely(cp)) 1387 if (likely(cp))
1247 return handle_response(af, skb, pd, cp, &iph, hooknum); 1388 return handle_response(af, skb, pd, cp, &iph, hooknum);
1389
1390 /* Check for real-server-started requests */
1391 if (atomic_read(&ipvs->conn_out_counter)) {
1392 /* Currently only for UDP:
1393 * connection oriented protocols typically use
1394 * ephemeral ports for outgoing connections, so
1395 * related incoming responses would not match any VS
1396 */
1397 if (pp->protocol == IPPROTO_UDP) {
1398 cp = __ip_vs_rs_conn_out(hooknum, ipvs, af, skb, &iph);
1399 if (likely(cp))
1400 return handle_response(af, skb, pd, cp, &iph,
1401 hooknum);
1402 }
1403 }
1404
1248 if (sysctl_nat_icmp_send(ipvs) && 1405 if (sysctl_nat_icmp_send(ipvs) &&
1249 (pp->protocol == IPPROTO_TCP || 1406 (pp->protocol == IPPROTO_TCP ||
1250 pp->protocol == IPPROTO_UDP || 1407 pp->protocol == IPPROTO_UDP ||
@@ -1837,6 +1994,9 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
1837 1994
1838 if (ipvs->sync_state & IP_VS_STATE_MASTER) 1995 if (ipvs->sync_state & IP_VS_STATE_MASTER)
1839 ip_vs_sync_conn(ipvs, cp, pkts); 1996 ip_vs_sync_conn(ipvs, cp, pkts);
1997 else if ((cp->flags & IP_VS_CONN_F_ONE_PACKET) && cp->control)
1998 /* increment is done inside ip_vs_sync_conn too */
1999 atomic_inc(&cp->control->in_pkts);
1840 2000
1841 ip_vs_conn_put(cp); 2001 ip_vs_conn_put(cp);
1842 return ret; 2002 return ret;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index f35ebc02fa5c..c3c809b2e712 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -567,6 +567,36 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
567 return false; 567 return false;
568} 568}
569 569
570/* Find real service record by <proto,addr,port>.
571 * In case of multiple records with the same <proto,addr,port>, only
572 * the first found record is returned.
573 *
574 * To be called under RCU lock.
575 */
576struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af,
577 __u16 protocol,
578 const union nf_inet_addr *daddr,
579 __be16 dport)
580{
581 unsigned int hash;
582 struct ip_vs_dest *dest;
583
584 /* Check for "full" addressed entries */
585 hash = ip_vs_rs_hashkey(af, daddr, dport);
586
587 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
588 if (dest->port == dport &&
589 dest->af == af &&
590 ip_vs_addr_equal(af, &dest->addr, daddr) &&
591 (dest->protocol == protocol || dest->vfwmark)) {
592 /* HIT */
593 return dest;
594 }
595 }
596
597 return NULL;
598}
599
570/* Lookup destination by {addr,port} in the given service 600/* Lookup destination by {addr,port} in the given service
571 * Called under RCU lock. 601 * Called under RCU lock.
572 */ 602 */
@@ -1253,6 +1283,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
1253 atomic_inc(&ipvs->ftpsvc_counter); 1283 atomic_inc(&ipvs->ftpsvc_counter);
1254 else if (svc->port == 0) 1284 else if (svc->port == 0)
1255 atomic_inc(&ipvs->nullsvc_counter); 1285 atomic_inc(&ipvs->nullsvc_counter);
1286 if (svc->pe && svc->pe->conn_out)
1287 atomic_inc(&ipvs->conn_out_counter);
1256 1288
1257 ip_vs_start_estimator(ipvs, &svc->stats); 1289 ip_vs_start_estimator(ipvs, &svc->stats);
1258 1290
@@ -1293,6 +1325,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1293 struct ip_vs_scheduler *sched = NULL, *old_sched; 1325 struct ip_vs_scheduler *sched = NULL, *old_sched;
1294 struct ip_vs_pe *pe = NULL, *old_pe = NULL; 1326 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1295 int ret = 0; 1327 int ret = 0;
1328 bool new_pe_conn_out, old_pe_conn_out;
1296 1329
1297 /* 1330 /*
1298 * Lookup the scheduler, by 'u->sched_name' 1331 * Lookup the scheduler, by 'u->sched_name'
@@ -1355,8 +1388,16 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1355 svc->netmask = u->netmask; 1388 svc->netmask = u->netmask;
1356 1389
1357 old_pe = rcu_dereference_protected(svc->pe, 1); 1390 old_pe = rcu_dereference_protected(svc->pe, 1);
1358 if (pe != old_pe) 1391 if (pe != old_pe) {
1359 rcu_assign_pointer(svc->pe, pe); 1392 rcu_assign_pointer(svc->pe, pe);
1393 /* check for optional methods in new pe */
1394 new_pe_conn_out = (pe && pe->conn_out) ? true : false;
1395 old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false;
1396 if (new_pe_conn_out && !old_pe_conn_out)
1397 atomic_inc(&svc->ipvs->conn_out_counter);
1398 if (old_pe_conn_out && !new_pe_conn_out)
1399 atomic_dec(&svc->ipvs->conn_out_counter);
1400 }
1360 1401
1361out: 1402out:
1362 ip_vs_scheduler_put(old_sched); 1403 ip_vs_scheduler_put(old_sched);
@@ -1389,6 +1430,8 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1389 1430
1390 /* Unbind persistence engine, keep svc->pe */ 1431 /* Unbind persistence engine, keep svc->pe */
1391 old_pe = rcu_dereference_protected(svc->pe, 1); 1432 old_pe = rcu_dereference_protected(svc->pe, 1);
1433 if (old_pe && old_pe->conn_out)
1434 atomic_dec(&ipvs->conn_out_counter);
1392 ip_vs_pe_put(old_pe); 1435 ip_vs_pe_put(old_pe);
1393 1436
1394 /* 1437 /*
@@ -3969,6 +4012,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
3969 (unsigned long) ipvs); 4012 (unsigned long) ipvs);
3970 atomic_set(&ipvs->ftpsvc_counter, 0); 4013 atomic_set(&ipvs->ftpsvc_counter, 0);
3971 atomic_set(&ipvs->nullsvc_counter, 0); 4014 atomic_set(&ipvs->nullsvc_counter, 0);
4015 atomic_set(&ipvs->conn_out_counter, 0);
3972 4016
3973 /* procfs stats */ 4017 /* procfs stats */
3974 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 4018 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 30434fb133df..f04fd8df210b 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -93,6 +93,10 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
93 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) 93 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
94 return; 94 return;
95 95
96 /* Never alter conntrack for OPS conns (no reply is expected) */
97 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
98 return;
99
96 /* Alter reply only in original direction */ 100 /* Alter reply only in original direction */
97 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) 101 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
98 return; 102 return;
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 0a6eb5c0d9e9..d07ef9e31c12 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -143,6 +143,20 @@ static int ip_vs_sip_show_pe_data(const struct ip_vs_conn *cp, char *buf)
143 return cp->pe_data_len; 143 return cp->pe_data_len;
144} 144}
145 145
146static struct ip_vs_conn *
147ip_vs_sip_conn_out(struct ip_vs_service *svc,
148 struct ip_vs_dest *dest,
149 struct sk_buff *skb,
150 const struct ip_vs_iphdr *iph,
151 __be16 dport,
152 __be16 cport)
153{
154 if (likely(iph->protocol == IPPROTO_UDP))
155 return ip_vs_new_conn_out(svc, dest, skb, iph, dport, cport);
156 /* currently no need to handle other than UDP */
157 return NULL;
158}
159
146static struct ip_vs_pe ip_vs_sip_pe = 160static struct ip_vs_pe ip_vs_sip_pe =
147{ 161{
148 .name = "sip", 162 .name = "sip",
@@ -153,6 +167,7 @@ static struct ip_vs_pe ip_vs_sip_pe =
153 .ct_match = ip_vs_sip_ct_match, 167 .ct_match = ip_vs_sip_ct_match,
154 .hashkey_raw = ip_vs_sip_hashkey_raw, 168 .hashkey_raw = ip_vs_sip_hashkey_raw,
155 .show_pe_data = ip_vs_sip_show_pe_data, 169 .show_pe_data = ip_vs_sip_show_pe_data,
170 .conn_out = ip_vs_sip_conn_out,
156}; 171};
157 172
158static int __init ip_vs_sip_init(void) 173static int __init ip_vs_sip_init(void)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2fd607408998..0cd29365004f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -54,6 +54,7 @@
54#include <net/netfilter/nf_nat.h> 54#include <net/netfilter/nf_nat.h>
55#include <net/netfilter/nf_nat_core.h> 55#include <net/netfilter/nf_nat_core.h>
56#include <net/netfilter/nf_nat_helper.h> 56#include <net/netfilter/nf_nat_helper.h>
57#include <net/netns/hash.h>
57 58
58#define NF_CONNTRACK_VERSION "0.5.0" 59#define NF_CONNTRACK_VERSION "0.5.0"
59 60
@@ -68,7 +69,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
68__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); 69__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
69EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); 70EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
70 71
72struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
73EXPORT_SYMBOL_GPL(nf_conntrack_hash);
74
75static __read_mostly struct kmem_cache *nf_conntrack_cachep;
71static __read_mostly spinlock_t nf_conntrack_locks_all_lock; 76static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
77static __read_mostly seqcount_t nf_conntrack_generation;
72static __read_mostly bool nf_conntrack_locks_all; 78static __read_mostly bool nf_conntrack_locks_all;
73 79
74void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) 80void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
@@ -107,7 +113,7 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
107 spin_lock_nested(&nf_conntrack_locks[h1], 113 spin_lock_nested(&nf_conntrack_locks[h1],
108 SINGLE_DEPTH_NESTING); 114 SINGLE_DEPTH_NESTING);
109 } 115 }
110 if (read_seqcount_retry(&net->ct.generation, sequence)) { 116 if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
111 nf_conntrack_double_unlock(h1, h2); 117 nf_conntrack_double_unlock(h1, h2);
112 return true; 118 return true;
113 } 119 }
@@ -141,43 +147,43 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
141DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); 147DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
142EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); 148EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
143 149
144unsigned int nf_conntrack_hash_rnd __read_mostly; 150static unsigned int nf_conntrack_hash_rnd __read_mostly;
145EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd);
146 151
147static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple) 152static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
153 const struct net *net)
148{ 154{
149 unsigned int n; 155 unsigned int n;
156 u32 seed;
157
158 get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
150 159
151 /* The direction must be ignored, so we hash everything up to the 160 /* The direction must be ignored, so we hash everything up to the
152 * destination ports (which is a multiple of 4) and treat the last 161 * destination ports (which is a multiple of 4) and treat the last
153 * three bytes manually. 162 * three bytes manually.
154 */ 163 */
164 seed = nf_conntrack_hash_rnd ^ net_hash_mix(net);
155 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); 165 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
156 return jhash2((u32 *)tuple, n, nf_conntrack_hash_rnd ^ 166 return jhash2((u32 *)tuple, n, seed ^
157 (((__force __u16)tuple->dst.u.all << 16) | 167 (((__force __u16)tuple->dst.u.all << 16) |
158 tuple->dst.protonum)); 168 tuple->dst.protonum));
159} 169}
160 170
161static u32 __hash_bucket(u32 hash, unsigned int size) 171static u32 scale_hash(u32 hash)
162{
163 return reciprocal_scale(hash, size);
164}
165
166static u32 hash_bucket(u32 hash, const struct net *net)
167{ 172{
168 return __hash_bucket(hash, net->ct.htable_size); 173 return reciprocal_scale(hash, nf_conntrack_htable_size);
169} 174}
170 175
171static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, 176static u32 __hash_conntrack(const struct net *net,
172 unsigned int size) 177 const struct nf_conntrack_tuple *tuple,
178 unsigned int size)
173{ 179{
174 return __hash_bucket(hash_conntrack_raw(tuple), size); 180 return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
175} 181}
176 182
177static inline u_int32_t hash_conntrack(const struct net *net, 183static u32 hash_conntrack(const struct net *net,
178 const struct nf_conntrack_tuple *tuple) 184 const struct nf_conntrack_tuple *tuple)
179{ 185{
180 return __hash_conntrack(tuple, net->ct.htable_size); 186 return scale_hash(hash_conntrack_raw(tuple, net));
181} 187}
182 188
183bool 189bool
@@ -358,7 +364,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
358 } 364 }
359 rcu_read_lock(); 365 rcu_read_lock();
360 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 366 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
361 if (l4proto && l4proto->destroy) 367 if (l4proto->destroy)
362 l4proto->destroy(ct); 368 l4proto->destroy(ct);
363 369
364 rcu_read_unlock(); 370 rcu_read_unlock();
@@ -393,7 +399,7 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
393 399
394 local_bh_disable(); 400 local_bh_disable();
395 do { 401 do {
396 sequence = read_seqcount_begin(&net->ct.generation); 402 sequence = read_seqcount_begin(&nf_conntrack_generation);
397 hash = hash_conntrack(net, 403 hash = hash_conntrack(net,
398 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 404 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
399 reply_hash = hash_conntrack(net, 405 reply_hash = hash_conntrack(net,
@@ -445,7 +451,8 @@ static void death_by_timeout(unsigned long ul_conntrack)
445static inline bool 451static inline bool
446nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, 452nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
447 const struct nf_conntrack_tuple *tuple, 453 const struct nf_conntrack_tuple *tuple,
448 const struct nf_conntrack_zone *zone) 454 const struct nf_conntrack_zone *zone,
455 const struct net *net)
449{ 456{
450 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 457 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
451 458
@@ -454,7 +461,8 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
454 */ 461 */
455 return nf_ct_tuple_equal(tuple, &h->tuple) && 462 return nf_ct_tuple_equal(tuple, &h->tuple) &&
456 nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) && 463 nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) &&
457 nf_ct_is_confirmed(ct); 464 nf_ct_is_confirmed(ct) &&
465 net_eq(net, nf_ct_net(ct));
458} 466}
459 467
460/* 468/*
@@ -467,21 +475,23 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
467 const struct nf_conntrack_tuple *tuple, u32 hash) 475 const struct nf_conntrack_tuple *tuple, u32 hash)
468{ 476{
469 struct nf_conntrack_tuple_hash *h; 477 struct nf_conntrack_tuple_hash *h;
478 struct hlist_nulls_head *ct_hash;
470 struct hlist_nulls_node *n; 479 struct hlist_nulls_node *n;
471 unsigned int bucket = hash_bucket(hash, net); 480 unsigned int bucket, sequence;
472 481
473 /* Disable BHs the entire time since we normally need to disable them
474 * at least once for the stats anyway.
475 */
476 local_bh_disable();
477begin: 482begin:
478 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) { 483 do {
479 if (nf_ct_key_equal(h, tuple, zone)) { 484 sequence = read_seqcount_begin(&nf_conntrack_generation);
480 NF_CT_STAT_INC(net, found); 485 bucket = scale_hash(hash);
481 local_bh_enable(); 486 ct_hash = nf_conntrack_hash;
487 } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
488
489 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
490 if (nf_ct_key_equal(h, tuple, zone, net)) {
491 NF_CT_STAT_INC_ATOMIC(net, found);
482 return h; 492 return h;
483 } 493 }
484 NF_CT_STAT_INC(net, searched); 494 NF_CT_STAT_INC_ATOMIC(net, searched);
485 } 495 }
486 /* 496 /*
487 * if the nulls value we got at the end of this lookup is 497 * if the nulls value we got at the end of this lookup is
@@ -489,10 +499,9 @@ begin:
489 * We probably met an item that was moved to another chain. 499 * We probably met an item that was moved to another chain.
490 */ 500 */
491 if (get_nulls_value(n) != bucket) { 501 if (get_nulls_value(n) != bucket) {
492 NF_CT_STAT_INC(net, search_restart); 502 NF_CT_STAT_INC_ATOMIC(net, search_restart);
493 goto begin; 503 goto begin;
494 } 504 }
495 local_bh_enable();
496 505
497 return NULL; 506 return NULL;
498} 507}
@@ -514,7 +523,7 @@ begin:
514 !atomic_inc_not_zero(&ct->ct_general.use))) 523 !atomic_inc_not_zero(&ct->ct_general.use)))
515 h = NULL; 524 h = NULL;
516 else { 525 else {
517 if (unlikely(!nf_ct_key_equal(h, tuple, zone))) { 526 if (unlikely(!nf_ct_key_equal(h, tuple, zone, net))) {
518 nf_ct_put(ct); 527 nf_ct_put(ct);
519 goto begin; 528 goto begin;
520 } 529 }
@@ -530,7 +539,7 @@ nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
530 const struct nf_conntrack_tuple *tuple) 539 const struct nf_conntrack_tuple *tuple)
531{ 540{
532 return __nf_conntrack_find_get(net, zone, tuple, 541 return __nf_conntrack_find_get(net, zone, tuple,
533 hash_conntrack_raw(tuple)); 542 hash_conntrack_raw(tuple, net));
534} 543}
535EXPORT_SYMBOL_GPL(nf_conntrack_find_get); 544EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
536 545
@@ -538,12 +547,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
538 unsigned int hash, 547 unsigned int hash,
539 unsigned int reply_hash) 548 unsigned int reply_hash)
540{ 549{
541 struct net *net = nf_ct_net(ct);
542
543 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, 550 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
544 &net->ct.hash[hash]); 551 &nf_conntrack_hash[hash]);
545 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, 552 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
546 &net->ct.hash[reply_hash]); 553 &nf_conntrack_hash[reply_hash]);
547} 554}
548 555
549int 556int
@@ -560,7 +567,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
560 567
561 local_bh_disable(); 568 local_bh_disable();
562 do { 569 do {
563 sequence = read_seqcount_begin(&net->ct.generation); 570 sequence = read_seqcount_begin(&nf_conntrack_generation);
564 hash = hash_conntrack(net, 571 hash = hash_conntrack(net,
565 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 572 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
566 reply_hash = hash_conntrack(net, 573 reply_hash = hash_conntrack(net,
@@ -568,17 +575,14 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
568 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); 575 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
569 576
570 /* See if there's one in the list already, including reverse */ 577 /* See if there's one in the list already, including reverse */
571 hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) 578 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
572 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 579 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
573 &h->tuple) && 580 zone, net))
574 nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
575 NF_CT_DIRECTION(h)))
576 goto out; 581 goto out;
577 hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) 582
578 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, 583 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
579 &h->tuple) && 584 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
580 nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, 585 zone, net))
581 NF_CT_DIRECTION(h)))
582 goto out; 586 goto out;
583 587
584 add_timer(&ct->timeout); 588 add_timer(&ct->timeout);
@@ -599,6 +603,63 @@ out:
599} 603}
600EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); 604EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
601 605
606static inline void nf_ct_acct_update(struct nf_conn *ct,
607 enum ip_conntrack_info ctinfo,
608 unsigned int len)
609{
610 struct nf_conn_acct *acct;
611
612 acct = nf_conn_acct_find(ct);
613 if (acct) {
614 struct nf_conn_counter *counter = acct->counter;
615
616 atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
617 atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
618 }
619}
620
621static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
622 const struct nf_conn *loser_ct)
623{
624 struct nf_conn_acct *acct;
625
626 acct = nf_conn_acct_find(loser_ct);
627 if (acct) {
628 struct nf_conn_counter *counter = acct->counter;
629 enum ip_conntrack_info ctinfo;
630 unsigned int bytes;
631
632 /* u32 should be fine since we must have seen one packet. */
633 bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
634 nf_ct_acct_update(ct, ctinfo, bytes);
635 }
636}
637
638/* Resolve race on insertion if this protocol allows this. */
639static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
640 enum ip_conntrack_info ctinfo,
641 struct nf_conntrack_tuple_hash *h)
642{
643 /* This is the conntrack entry already in hashes that won race. */
644 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
645 struct nf_conntrack_l4proto *l4proto;
646
647 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
648 if (l4proto->allow_clash &&
649 !nf_ct_is_dying(ct) &&
650 atomic_inc_not_zero(&ct->ct_general.use)) {
651 nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct);
652 nf_conntrack_put(skb->nfct);
653 /* Assign conntrack already in hashes to this skbuff. Don't
654 * modify skb->nfctinfo to ensure consistent stateful filtering.
655 */
656 skb->nfct = &ct->ct_general;
657 return NF_ACCEPT;
658 }
659 NF_CT_STAT_INC(net, drop);
660 return NF_DROP;
661}
662
602/* Confirm a connection given skb; places it in hash table */ 663/* Confirm a connection given skb; places it in hash table */
603int 664int
604__nf_conntrack_confirm(struct sk_buff *skb) 665__nf_conntrack_confirm(struct sk_buff *skb)
@@ -613,6 +674,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
613 enum ip_conntrack_info ctinfo; 674 enum ip_conntrack_info ctinfo;
614 struct net *net; 675 struct net *net;
615 unsigned int sequence; 676 unsigned int sequence;
677 int ret = NF_DROP;
616 678
617 ct = nf_ct_get(skb, &ctinfo); 679 ct = nf_ct_get(skb, &ctinfo);
618 net = nf_ct_net(ct); 680 net = nf_ct_net(ct);
@@ -628,10 +690,10 @@ __nf_conntrack_confirm(struct sk_buff *skb)
628 local_bh_disable(); 690 local_bh_disable();
629 691
630 do { 692 do {
631 sequence = read_seqcount_begin(&net->ct.generation); 693 sequence = read_seqcount_begin(&nf_conntrack_generation);
632 /* reuse the hash saved before */ 694 /* reuse the hash saved before */
633 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; 695 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
634 hash = hash_bucket(hash, net); 696 hash = scale_hash(hash);
635 reply_hash = hash_conntrack(net, 697 reply_hash = hash_conntrack(net,
636 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 698 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
637 699
@@ -655,23 +717,22 @@ __nf_conntrack_confirm(struct sk_buff *skb)
655 */ 717 */
656 nf_ct_del_from_dying_or_unconfirmed_list(ct); 718 nf_ct_del_from_dying_or_unconfirmed_list(ct);
657 719
658 if (unlikely(nf_ct_is_dying(ct))) 720 if (unlikely(nf_ct_is_dying(ct))) {
659 goto out; 721 nf_ct_add_to_dying_list(ct);
722 goto dying;
723 }
660 724
661 /* See if there's one in the list already, including reverse: 725 /* See if there's one in the list already, including reverse:
662 NAT could have grabbed it without realizing, since we're 726 NAT could have grabbed it without realizing, since we're
663 not in the hash. If there is, we lost race. */ 727 not in the hash. If there is, we lost race. */
664 hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) 728 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
665 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 729 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
666 &h->tuple) && 730 zone, net))
667 nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
668 NF_CT_DIRECTION(h)))
669 goto out; 731 goto out;
670 hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) 732
671 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, 733 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
672 &h->tuple) && 734 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
673 nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, 735 zone, net))
674 NF_CT_DIRECTION(h)))
675 goto out; 736 goto out;
676 737
677 /* Timer relative to confirmation time, not original 738 /* Timer relative to confirmation time, not original
@@ -710,10 +771,12 @@ __nf_conntrack_confirm(struct sk_buff *skb)
710 771
711out: 772out:
712 nf_ct_add_to_dying_list(ct); 773 nf_ct_add_to_dying_list(ct);
774 ret = nf_ct_resolve_clash(net, skb, ctinfo, h);
775dying:
713 nf_conntrack_double_unlock(hash, reply_hash); 776 nf_conntrack_double_unlock(hash, reply_hash);
714 NF_CT_STAT_INC(net, insert_failed); 777 NF_CT_STAT_INC(net, insert_failed);
715 local_bh_enable(); 778 local_bh_enable();
716 return NF_DROP; 779 return ret;
717} 780}
718EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); 781EXPORT_SYMBOL_GPL(__nf_conntrack_confirm);
719 782
@@ -726,29 +789,31 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
726 struct net *net = nf_ct_net(ignored_conntrack); 789 struct net *net = nf_ct_net(ignored_conntrack);
727 const struct nf_conntrack_zone *zone; 790 const struct nf_conntrack_zone *zone;
728 struct nf_conntrack_tuple_hash *h; 791 struct nf_conntrack_tuple_hash *h;
792 struct hlist_nulls_head *ct_hash;
793 unsigned int hash, sequence;
729 struct hlist_nulls_node *n; 794 struct hlist_nulls_node *n;
730 struct nf_conn *ct; 795 struct nf_conn *ct;
731 unsigned int hash;
732 796
733 zone = nf_ct_zone(ignored_conntrack); 797 zone = nf_ct_zone(ignored_conntrack);
734 hash = hash_conntrack(net, tuple);
735 798
736 /* Disable BHs the entire time since we need to disable them at 799 rcu_read_lock();
737 * least once for the stats anyway. 800 do {
738 */ 801 sequence = read_seqcount_begin(&nf_conntrack_generation);
739 rcu_read_lock_bh(); 802 hash = hash_conntrack(net, tuple);
740 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { 803 ct_hash = nf_conntrack_hash;
804 } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
805
806 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
741 ct = nf_ct_tuplehash_to_ctrack(h); 807 ct = nf_ct_tuplehash_to_ctrack(h);
742 if (ct != ignored_conntrack && 808 if (ct != ignored_conntrack &&
743 nf_ct_tuple_equal(tuple, &h->tuple) && 809 nf_ct_key_equal(h, tuple, zone, net)) {
744 nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h))) { 810 NF_CT_STAT_INC_ATOMIC(net, found);
745 NF_CT_STAT_INC(net, found); 811 rcu_read_unlock();
746 rcu_read_unlock_bh();
747 return 1; 812 return 1;
748 } 813 }
749 NF_CT_STAT_INC(net, searched); 814 NF_CT_STAT_INC_ATOMIC(net, searched);
750 } 815 }
751 rcu_read_unlock_bh(); 816 rcu_read_unlock();
752 817
753 return 0; 818 return 0;
754} 819}
@@ -762,71 +827,63 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
762{ 827{
763 /* Use oldest entry, which is roughly LRU */ 828 /* Use oldest entry, which is roughly LRU */
764 struct nf_conntrack_tuple_hash *h; 829 struct nf_conntrack_tuple_hash *h;
765 struct nf_conn *ct = NULL, *tmp; 830 struct nf_conn *tmp;
766 struct hlist_nulls_node *n; 831 struct hlist_nulls_node *n;
767 unsigned int i = 0, cnt = 0; 832 unsigned int i, hash, sequence;
768 int dropped = 0; 833 struct nf_conn *ct = NULL;
769 unsigned int hash, sequence;
770 spinlock_t *lockp; 834 spinlock_t *lockp;
835 bool ret = false;
836
837 i = 0;
771 838
772 local_bh_disable(); 839 local_bh_disable();
773restart: 840restart:
774 sequence = read_seqcount_begin(&net->ct.generation); 841 sequence = read_seqcount_begin(&nf_conntrack_generation);
775 hash = hash_bucket(_hash, net); 842 for (; i < NF_CT_EVICTION_RANGE; i++) {
776 for (; i < net->ct.htable_size; i++) { 843 hash = scale_hash(_hash++);
777 lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS]; 844 lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
778 nf_conntrack_lock(lockp); 845 nf_conntrack_lock(lockp);
779 if (read_seqcount_retry(&net->ct.generation, sequence)) { 846 if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
780 spin_unlock(lockp); 847 spin_unlock(lockp);
781 goto restart; 848 goto restart;
782 } 849 }
783 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], 850 hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
784 hnnode) { 851 hnnode) {
785 tmp = nf_ct_tuplehash_to_ctrack(h); 852 tmp = nf_ct_tuplehash_to_ctrack(h);
786 if (!test_bit(IPS_ASSURED_BIT, &tmp->status) && 853
787 !nf_ct_is_dying(tmp) && 854 if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
788 atomic_inc_not_zero(&tmp->ct_general.use)) { 855 !net_eq(nf_ct_net(tmp), net) ||
856 nf_ct_is_dying(tmp))
857 continue;
858
859 if (atomic_inc_not_zero(&tmp->ct_general.use)) {
789 ct = tmp; 860 ct = tmp;
790 break; 861 break;
791 } 862 }
792 cnt++;
793 } 863 }
794 864
795 hash = (hash + 1) % net->ct.htable_size;
796 spin_unlock(lockp); 865 spin_unlock(lockp);
797 866 if (ct)
798 if (ct || cnt >= NF_CT_EVICTION_RANGE)
799 break; 867 break;
800
801 } 868 }
869
802 local_bh_enable(); 870 local_bh_enable();
803 871
804 if (!ct) 872 if (!ct)
805 return dropped; 873 return false;
806 874
807 if (del_timer(&ct->timeout)) { 875 /* kill only if in same netns -- might have moved due to
876 * SLAB_DESTROY_BY_RCU rules
877 */
878 if (net_eq(nf_ct_net(ct), net) && del_timer(&ct->timeout)) {
808 if (nf_ct_delete(ct, 0, 0)) { 879 if (nf_ct_delete(ct, 0, 0)) {
809 dropped = 1;
810 NF_CT_STAT_INC_ATOMIC(net, early_drop); 880 NF_CT_STAT_INC_ATOMIC(net, early_drop);
881 ret = true;
811 } 882 }
812 } 883 }
813 nf_ct_put(ct);
814 return dropped;
815}
816
817void init_nf_conntrack_hash_rnd(void)
818{
819 unsigned int rand;
820 884
821 /* 885 nf_ct_put(ct);
822 * Why not initialize nf_conntrack_rnd in a "init()" function ? 886 return ret;
823 * Because there isn't enough entropy when system initializing,
824 * and we initialize it as late as possible.
825 */
826 do {
827 get_random_bytes(&rand, sizeof(rand));
828 } while (!rand);
829 cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
830} 887}
831 888
832static struct nf_conn * 889static struct nf_conn *
@@ -838,12 +895,6 @@ __nf_conntrack_alloc(struct net *net,
838{ 895{
839 struct nf_conn *ct; 896 struct nf_conn *ct;
840 897
841 if (unlikely(!nf_conntrack_hash_rnd)) {
842 init_nf_conntrack_hash_rnd();
843 /* recompute the hash as nf_conntrack_hash_rnd is initialized */
844 hash = hash_conntrack_raw(orig);
845 }
846
847 /* We don't want any race condition at early drop stage */ 898 /* We don't want any race condition at early drop stage */
848 atomic_inc(&net->ct.count); 899 atomic_inc(&net->ct.count);
849 900
@@ -860,7 +911,7 @@ __nf_conntrack_alloc(struct net *net,
860 * Do not use kmem_cache_zalloc(), as this cache uses 911 * Do not use kmem_cache_zalloc(), as this cache uses
861 * SLAB_DESTROY_BY_RCU. 912 * SLAB_DESTROY_BY_RCU.
862 */ 913 */
863 ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp); 914 ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
864 if (ct == NULL) 915 if (ct == NULL)
865 goto out; 916 goto out;
866 917
@@ -887,7 +938,7 @@ __nf_conntrack_alloc(struct net *net,
887 atomic_set(&ct->ct_general.use, 0); 938 atomic_set(&ct->ct_general.use, 0);
888 return ct; 939 return ct;
889out_free: 940out_free:
890 kmem_cache_free(net->ct.nf_conntrack_cachep, ct); 941 kmem_cache_free(nf_conntrack_cachep, ct);
891out: 942out:
892 atomic_dec(&net->ct.count); 943 atomic_dec(&net->ct.count);
893 return ERR_PTR(-ENOMEM); 944 return ERR_PTR(-ENOMEM);
@@ -914,7 +965,7 @@ void nf_conntrack_free(struct nf_conn *ct)
914 965
915 nf_ct_ext_destroy(ct); 966 nf_ct_ext_destroy(ct);
916 nf_ct_ext_free(ct); 967 nf_ct_ext_free(ct);
917 kmem_cache_free(net->ct.nf_conntrack_cachep, ct); 968 kmem_cache_free(nf_conntrack_cachep, ct);
918 smp_mb__before_atomic(); 969 smp_mb__before_atomic();
919 atomic_dec(&net->ct.count); 970 atomic_dec(&net->ct.count);
920} 971}
@@ -1061,7 +1112,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
1061 1112
1062 /* look for tuple match */ 1113 /* look for tuple match */
1063 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); 1114 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
1064 hash = hash_conntrack_raw(&tuple); 1115 hash = hash_conntrack_raw(&tuple, net);
1065 h = __nf_conntrack_find_get(net, zone, &tuple, hash); 1116 h = __nf_conntrack_find_get(net, zone, &tuple, hash);
1066 if (!h) { 1117 if (!h) {
1067 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, 1118 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
@@ -1270,17 +1321,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
1270 } 1321 }
1271 1322
1272acct: 1323acct:
1273 if (do_acct) { 1324 if (do_acct)
1274 struct nf_conn_acct *acct; 1325 nf_ct_acct_update(ct, ctinfo, skb->len);
1275
1276 acct = nf_conn_acct_find(ct);
1277 if (acct) {
1278 struct nf_conn_counter *counter = acct->counter;
1279
1280 atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
1281 atomic64_add(skb->len, &counter[CTINFO2DIR(ctinfo)].bytes);
1282 }
1283 }
1284} 1326}
1285EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); 1327EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
1286 1328
@@ -1289,18 +1331,8 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
1289 const struct sk_buff *skb, 1331 const struct sk_buff *skb,
1290 int do_acct) 1332 int do_acct)
1291{ 1333{
1292 if (do_acct) { 1334 if (do_acct)
1293 struct nf_conn_acct *acct; 1335 nf_ct_acct_update(ct, ctinfo, skb->len);
1294
1295 acct = nf_conn_acct_find(ct);
1296 if (acct) {
1297 struct nf_conn_counter *counter = acct->counter;
1298
1299 atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
1300 atomic64_add(skb->len - skb_network_offset(skb),
1301 &counter[CTINFO2DIR(ctinfo)].bytes);
1302 }
1303 }
1304 1336
1305 if (del_timer(&ct->timeout)) { 1337 if (del_timer(&ct->timeout)) {
1306 ct->timeout.function((unsigned long)ct); 1338 ct->timeout.function((unsigned long)ct);
@@ -1396,16 +1428,17 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
1396 int cpu; 1428 int cpu;
1397 spinlock_t *lockp; 1429 spinlock_t *lockp;
1398 1430
1399 for (; *bucket < net->ct.htable_size; (*bucket)++) { 1431 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
1400 lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS]; 1432 lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
1401 local_bh_disable(); 1433 local_bh_disable();
1402 nf_conntrack_lock(lockp); 1434 nf_conntrack_lock(lockp);
1403 if (*bucket < net->ct.htable_size) { 1435 if (*bucket < nf_conntrack_htable_size) {
1404 hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { 1436 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
1405 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) 1437 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
1406 continue; 1438 continue;
1407 ct = nf_ct_tuplehash_to_ctrack(h); 1439 ct = nf_ct_tuplehash_to_ctrack(h);
1408 if (iter(ct, data)) 1440 if (net_eq(nf_ct_net(ct), net) &&
1441 iter(ct, data))
1409 goto found; 1442 goto found;
1410 } 1443 }
1411 } 1444 }
@@ -1443,6 +1476,9 @@ void nf_ct_iterate_cleanup(struct net *net,
1443 1476
1444 might_sleep(); 1477 might_sleep();
1445 1478
1479 if (atomic_read(&net->ct.count) == 0)
1480 return;
1481
1446 while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { 1482 while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
1447 /* Time to push up daises... */ 1483 /* Time to push up daises... */
1448 if (del_timer(&ct->timeout)) 1484 if (del_timer(&ct->timeout))
@@ -1494,6 +1530,8 @@ void nf_conntrack_cleanup_end(void)
1494 while (untrack_refs() > 0) 1530 while (untrack_refs() > 0)
1495 schedule(); 1531 schedule();
1496 1532
1533 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
1534
1497#ifdef CONFIG_NF_CONNTRACK_ZONES 1535#ifdef CONFIG_NF_CONNTRACK_ZONES
1498 nf_ct_extend_unregister(&nf_ct_zone_extend); 1536 nf_ct_extend_unregister(&nf_ct_zone_extend);
1499#endif 1537#endif
@@ -1544,15 +1582,12 @@ i_see_dead_people:
1544 } 1582 }
1545 1583
1546 list_for_each_entry(net, net_exit_list, exit_list) { 1584 list_for_each_entry(net, net_exit_list, exit_list) {
1547 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1548 nf_conntrack_proto_pernet_fini(net); 1585 nf_conntrack_proto_pernet_fini(net);
1549 nf_conntrack_helper_pernet_fini(net); 1586 nf_conntrack_helper_pernet_fini(net);
1550 nf_conntrack_ecache_pernet_fini(net); 1587 nf_conntrack_ecache_pernet_fini(net);
1551 nf_conntrack_tstamp_pernet_fini(net); 1588 nf_conntrack_tstamp_pernet_fini(net);
1552 nf_conntrack_acct_pernet_fini(net); 1589 nf_conntrack_acct_pernet_fini(net);
1553 nf_conntrack_expect_pernet_fini(net); 1590 nf_conntrack_expect_pernet_fini(net);
1554 kmem_cache_destroy(net->ct.nf_conntrack_cachep);
1555 kfree(net->ct.slabname);
1556 free_percpu(net->ct.stat); 1591 free_percpu(net->ct.stat);
1557 free_percpu(net->ct.pcpu_lists); 1592 free_percpu(net->ct.pcpu_lists);
1558 } 1593 }
@@ -1607,7 +1642,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1607 1642
1608 local_bh_disable(); 1643 local_bh_disable();
1609 nf_conntrack_all_lock(); 1644 nf_conntrack_all_lock();
1610 write_seqcount_begin(&init_net.ct.generation); 1645 write_seqcount_begin(&nf_conntrack_generation);
1611 1646
1612 /* Lookups in the old hash might happen in parallel, which means we 1647 /* Lookups in the old hash might happen in parallel, which means we
1613 * might get false negatives during connection lookup. New connections 1648 * might get false negatives during connection lookup. New connections
@@ -1615,26 +1650,28 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1615 * though since that required taking the locks. 1650 * though since that required taking the locks.
1616 */ 1651 */
1617 1652
1618 for (i = 0; i < init_net.ct.htable_size; i++) { 1653 for (i = 0; i < nf_conntrack_htable_size; i++) {
1619 while (!hlist_nulls_empty(&init_net.ct.hash[i])) { 1654 while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
1620 h = hlist_nulls_entry(init_net.ct.hash[i].first, 1655 h = hlist_nulls_entry(nf_conntrack_hash[i].first,
1621 struct nf_conntrack_tuple_hash, hnnode); 1656 struct nf_conntrack_tuple_hash, hnnode);
1622 ct = nf_ct_tuplehash_to_ctrack(h); 1657 ct = nf_ct_tuplehash_to_ctrack(h);
1623 hlist_nulls_del_rcu(&h->hnnode); 1658 hlist_nulls_del_rcu(&h->hnnode);
1624 bucket = __hash_conntrack(&h->tuple, hashsize); 1659 bucket = __hash_conntrack(nf_ct_net(ct),
1660 &h->tuple, hashsize);
1625 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); 1661 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
1626 } 1662 }
1627 } 1663 }
1628 old_size = init_net.ct.htable_size; 1664 old_size = nf_conntrack_htable_size;
1629 old_hash = init_net.ct.hash; 1665 old_hash = nf_conntrack_hash;
1630 1666
1631 init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; 1667 nf_conntrack_hash = hash;
1632 init_net.ct.hash = hash; 1668 nf_conntrack_htable_size = hashsize;
1633 1669
1634 write_seqcount_end(&init_net.ct.generation); 1670 write_seqcount_end(&nf_conntrack_generation);
1635 nf_conntrack_all_unlock(); 1671 nf_conntrack_all_unlock();
1636 local_bh_enable(); 1672 local_bh_enable();
1637 1673
1674 synchronize_net();
1638 nf_ct_free_hashtable(old_hash, old_size); 1675 nf_ct_free_hashtable(old_hash, old_size);
1639 return 0; 1676 return 0;
1640} 1677}
@@ -1655,7 +1692,10 @@ EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
1655int nf_conntrack_init_start(void) 1692int nf_conntrack_init_start(void)
1656{ 1693{
1657 int max_factor = 8; 1694 int max_factor = 8;
1658 int i, ret, cpu; 1695 int ret = -ENOMEM;
1696 int i, cpu;
1697
1698 seqcount_init(&nf_conntrack_generation);
1659 1699
1660 for (i = 0; i < CONNTRACK_LOCKS; i++) 1700 for (i = 0; i < CONNTRACK_LOCKS; i++)
1661 spin_lock_init(&nf_conntrack_locks[i]); 1701 spin_lock_init(&nf_conntrack_locks[i]);
@@ -1682,8 +1722,19 @@ int nf_conntrack_init_start(void)
1682 * entries. */ 1722 * entries. */
1683 max_factor = 4; 1723 max_factor = 4;
1684 } 1724 }
1725
1726 nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1);
1727 if (!nf_conntrack_hash)
1728 return -ENOMEM;
1729
1685 nf_conntrack_max = max_factor * nf_conntrack_htable_size; 1730 nf_conntrack_max = max_factor * nf_conntrack_htable_size;
1686 1731
1732 nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
1733 sizeof(struct nf_conn), 0,
1734 SLAB_DESTROY_BY_RCU, NULL);
1735 if (!nf_conntrack_cachep)
1736 goto err_cachep;
1737
1687 printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n", 1738 printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
1688 NF_CONNTRACK_VERSION, nf_conntrack_htable_size, 1739 NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
1689 nf_conntrack_max); 1740 nf_conntrack_max);
@@ -1760,6 +1811,9 @@ err_tstamp:
1760err_acct: 1811err_acct:
1761 nf_conntrack_expect_fini(); 1812 nf_conntrack_expect_fini();
1762err_expect: 1813err_expect:
1814 kmem_cache_destroy(nf_conntrack_cachep);
1815err_cachep:
1816 nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
1763 return ret; 1817 return ret;
1764} 1818}
1765 1819
@@ -1783,7 +1837,6 @@ int nf_conntrack_init_net(struct net *net)
1783 int cpu; 1837 int cpu;
1784 1838
1785 atomic_set(&net->ct.count, 0); 1839 atomic_set(&net->ct.count, 0);
1786 seqcount_init(&net->ct.generation);
1787 1840
1788 net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu); 1841 net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
1789 if (!net->ct.pcpu_lists) 1842 if (!net->ct.pcpu_lists)
@@ -1801,24 +1854,6 @@ int nf_conntrack_init_net(struct net *net)
1801 if (!net->ct.stat) 1854 if (!net->ct.stat)
1802 goto err_pcpu_lists; 1855 goto err_pcpu_lists;
1803 1856
1804 net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
1805 if (!net->ct.slabname)
1806 goto err_slabname;
1807
1808 net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
1809 sizeof(struct nf_conn), 0,
1810 SLAB_DESTROY_BY_RCU, NULL);
1811 if (!net->ct.nf_conntrack_cachep) {
1812 printk(KERN_ERR "Unable to create nf_conn slab cache\n");
1813 goto err_cache;
1814 }
1815
1816 net->ct.htable_size = nf_conntrack_htable_size;
1817 net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
1818 if (!net->ct.hash) {
1819 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
1820 goto err_hash;
1821 }
1822 ret = nf_conntrack_expect_pernet_init(net); 1857 ret = nf_conntrack_expect_pernet_init(net);
1823 if (ret < 0) 1858 if (ret < 0)
1824 goto err_expect; 1859 goto err_expect;
@@ -1850,12 +1885,6 @@ err_tstamp:
1850err_acct: 1885err_acct:
1851 nf_conntrack_expect_pernet_fini(net); 1886 nf_conntrack_expect_pernet_fini(net);
1852err_expect: 1887err_expect:
1853 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1854err_hash:
1855 kmem_cache_destroy(net->ct.nf_conntrack_cachep);
1856err_cache:
1857 kfree(net->ct.slabname);
1858err_slabname:
1859 free_percpu(net->ct.stat); 1888 free_percpu(net->ct.stat);
1860err_pcpu_lists: 1889err_pcpu_lists:
1861 free_percpu(net->ct.pcpu_lists); 1890 free_percpu(net->ct.pcpu_lists);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 278927ab0948..9e3693128313 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -24,6 +24,7 @@
24#include <linux/moduleparam.h> 24#include <linux/moduleparam.h>
25#include <linux/export.h> 25#include <linux/export.h>
26#include <net/net_namespace.h> 26#include <net/net_namespace.h>
27#include <net/netns/hash.h>
27 28
28#include <net/netfilter/nf_conntrack.h> 29#include <net/netfilter/nf_conntrack.h>
29#include <net/netfilter/nf_conntrack_core.h> 30#include <net/netfilter/nf_conntrack_core.h>
@@ -35,9 +36,13 @@
35unsigned int nf_ct_expect_hsize __read_mostly; 36unsigned int nf_ct_expect_hsize __read_mostly;
36EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); 37EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
37 38
39struct hlist_head *nf_ct_expect_hash __read_mostly;
40EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
41
38unsigned int nf_ct_expect_max __read_mostly; 42unsigned int nf_ct_expect_max __read_mostly;
39 43
40static struct kmem_cache *nf_ct_expect_cachep __read_mostly; 44static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
45static unsigned int nf_ct_expect_hashrnd __read_mostly;
41 46
42/* nf_conntrack_expect helper functions */ 47/* nf_conntrack_expect helper functions */
43void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, 48void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
@@ -72,21 +77,32 @@ static void nf_ct_expectation_timed_out(unsigned long ul_expect)
72 nf_ct_expect_put(exp); 77 nf_ct_expect_put(exp);
73} 78}
74 79
75static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple) 80static unsigned int nf_ct_expect_dst_hash(const struct net *n, const struct nf_conntrack_tuple *tuple)
76{ 81{
77 unsigned int hash; 82 unsigned int hash, seed;
78 83
79 if (unlikely(!nf_conntrack_hash_rnd)) { 84 get_random_once(&nf_ct_expect_hashrnd, sizeof(nf_ct_expect_hashrnd));
80 init_nf_conntrack_hash_rnd(); 85
81 } 86 seed = nf_ct_expect_hashrnd ^ net_hash_mix(n);
82 87
83 hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), 88 hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
84 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) | 89 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
85 (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd); 90 (__force __u16)tuple->dst.u.all) ^ seed);
86 91
87 return reciprocal_scale(hash, nf_ct_expect_hsize); 92 return reciprocal_scale(hash, nf_ct_expect_hsize);
88} 93}
89 94
95static bool
96nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
97 const struct nf_conntrack_expect *i,
98 const struct nf_conntrack_zone *zone,
99 const struct net *net)
100{
101 return nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
102 net_eq(net, nf_ct_net(i->master)) &&
103 nf_ct_zone_equal_any(i->master, zone);
104}
105
90struct nf_conntrack_expect * 106struct nf_conntrack_expect *
91__nf_ct_expect_find(struct net *net, 107__nf_ct_expect_find(struct net *net,
92 const struct nf_conntrack_zone *zone, 108 const struct nf_conntrack_zone *zone,
@@ -98,10 +114,9 @@ __nf_ct_expect_find(struct net *net,
98 if (!net->ct.expect_count) 114 if (!net->ct.expect_count)
99 return NULL; 115 return NULL;
100 116
101 h = nf_ct_expect_dst_hash(tuple); 117 h = nf_ct_expect_dst_hash(net, tuple);
102 hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) { 118 hlist_for_each_entry_rcu(i, &nf_ct_expect_hash[h], hnode) {
103 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && 119 if (nf_ct_exp_equal(tuple, i, zone, net))
104 nf_ct_zone_equal_any(i->master, zone))
105 return i; 120 return i;
106 } 121 }
107 return NULL; 122 return NULL;
@@ -139,11 +154,10 @@ nf_ct_find_expectation(struct net *net,
139 if (!net->ct.expect_count) 154 if (!net->ct.expect_count)
140 return NULL; 155 return NULL;
141 156
142 h = nf_ct_expect_dst_hash(tuple); 157 h = nf_ct_expect_dst_hash(net, tuple);
143 hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) { 158 hlist_for_each_entry(i, &nf_ct_expect_hash[h], hnode) {
144 if (!(i->flags & NF_CT_EXPECT_INACTIVE) && 159 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
145 nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && 160 nf_ct_exp_equal(tuple, i, zone, net)) {
146 nf_ct_zone_equal_any(i->master, zone)) {
147 exp = i; 161 exp = i;
148 break; 162 break;
149 } 163 }
@@ -223,6 +237,7 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
223 } 237 }
224 238
225 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) && 239 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
240 net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
226 nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master)); 241 nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
227} 242}
228 243
@@ -232,6 +247,7 @@ static inline int expect_matches(const struct nf_conntrack_expect *a,
232 return a->master == b->master && a->class == b->class && 247 return a->master == b->master && a->class == b->class &&
233 nf_ct_tuple_equal(&a->tuple, &b->tuple) && 248 nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
234 nf_ct_tuple_mask_equal(&a->mask, &b->mask) && 249 nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
250 net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
235 nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master)); 251 nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
236} 252}
237 253
@@ -342,7 +358,7 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
342 struct nf_conn_help *master_help = nfct_help(exp->master); 358 struct nf_conn_help *master_help = nfct_help(exp->master);
343 struct nf_conntrack_helper *helper; 359 struct nf_conntrack_helper *helper;
344 struct net *net = nf_ct_exp_net(exp); 360 struct net *net = nf_ct_exp_net(exp);
345 unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); 361 unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple);
346 362
347 /* two references : one for hash insert, one for the timer */ 363 /* two references : one for hash insert, one for the timer */
348 atomic_add(2, &exp->use); 364 atomic_add(2, &exp->use);
@@ -350,7 +366,7 @@ static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
350 hlist_add_head(&exp->lnode, &master_help->expectations); 366 hlist_add_head(&exp->lnode, &master_help->expectations);
351 master_help->expecting[exp->class]++; 367 master_help->expecting[exp->class]++;
352 368
353 hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]); 369 hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
354 net->ct.expect_count++; 370 net->ct.expect_count++;
355 371
356 setup_timer(&exp->timeout, nf_ct_expectation_timed_out, 372 setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
@@ -401,8 +417,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
401 ret = -ESHUTDOWN; 417 ret = -ESHUTDOWN;
402 goto out; 418 goto out;
403 } 419 }
404 h = nf_ct_expect_dst_hash(&expect->tuple); 420 h = nf_ct_expect_dst_hash(net, &expect->tuple);
405 hlist_for_each_entry_safe(i, next, &net->ct.expect_hash[h], hnode) { 421 hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
406 if (expect_matches(i, expect)) { 422 if (expect_matches(i, expect)) {
407 if (del_timer(&i->timeout)) { 423 if (del_timer(&i->timeout)) {
408 nf_ct_unlink_expect(i); 424 nf_ct_unlink_expect(i);
@@ -468,12 +484,11 @@ struct ct_expect_iter_state {
468 484
469static struct hlist_node *ct_expect_get_first(struct seq_file *seq) 485static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
470{ 486{
471 struct net *net = seq_file_net(seq);
472 struct ct_expect_iter_state *st = seq->private; 487 struct ct_expect_iter_state *st = seq->private;
473 struct hlist_node *n; 488 struct hlist_node *n;
474 489
475 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { 490 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
476 n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket])); 491 n = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
477 if (n) 492 if (n)
478 return n; 493 return n;
479 } 494 }
@@ -483,14 +498,13 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
483static struct hlist_node *ct_expect_get_next(struct seq_file *seq, 498static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
484 struct hlist_node *head) 499 struct hlist_node *head)
485{ 500{
486 struct net *net = seq_file_net(seq);
487 struct ct_expect_iter_state *st = seq->private; 501 struct ct_expect_iter_state *st = seq->private;
488 502
489 head = rcu_dereference(hlist_next_rcu(head)); 503 head = rcu_dereference(hlist_next_rcu(head));
490 while (head == NULL) { 504 while (head == NULL) {
491 if (++st->bucket >= nf_ct_expect_hsize) 505 if (++st->bucket >= nf_ct_expect_hsize)
492 return NULL; 506 return NULL;
493 head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket])); 507 head = rcu_dereference(hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
494 } 508 }
495 return head; 509 return head;
496} 510}
@@ -623,28 +637,13 @@ module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
623 637
624int nf_conntrack_expect_pernet_init(struct net *net) 638int nf_conntrack_expect_pernet_init(struct net *net)
625{ 639{
626 int err = -ENOMEM;
627
628 net->ct.expect_count = 0; 640 net->ct.expect_count = 0;
629 net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0); 641 return exp_proc_init(net);
630 if (net->ct.expect_hash == NULL)
631 goto err1;
632
633 err = exp_proc_init(net);
634 if (err < 0)
635 goto err2;
636
637 return 0;
638err2:
639 nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
640err1:
641 return err;
642} 642}
643 643
644void nf_conntrack_expect_pernet_fini(struct net *net) 644void nf_conntrack_expect_pernet_fini(struct net *net)
645{ 645{
646 exp_proc_remove(net); 646 exp_proc_remove(net);
647 nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
648} 647}
649 648
650int nf_conntrack_expect_init(void) 649int nf_conntrack_expect_init(void)
@@ -660,6 +659,13 @@ int nf_conntrack_expect_init(void)
660 0, 0, NULL); 659 0, 0, NULL);
661 if (!nf_ct_expect_cachep) 660 if (!nf_ct_expect_cachep)
662 return -ENOMEM; 661 return -ENOMEM;
662
663 nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
664 if (!nf_ct_expect_hash) {
665 kmem_cache_destroy(nf_ct_expect_cachep);
666 return -ENOMEM;
667 }
668
663 return 0; 669 return 0;
664} 670}
665 671
@@ -667,4 +673,5 @@ void nf_conntrack_expect_fini(void)
667{ 673{
668 rcu_barrier(); /* Wait for call_rcu() before destroy */ 674 rcu_barrier(); /* Wait for call_rcu() before destroy */
669 kmem_cache_destroy(nf_ct_expect_cachep); 675 kmem_cache_destroy(nf_ct_expect_cachep);
676 nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_hsize);
670} 677}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 3b40ec575cd5..f703adb7e5f7 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -38,10 +38,10 @@ unsigned int nf_ct_helper_hsize __read_mostly;
38EXPORT_SYMBOL_GPL(nf_ct_helper_hsize); 38EXPORT_SYMBOL_GPL(nf_ct_helper_hsize);
39static unsigned int nf_ct_helper_count __read_mostly; 39static unsigned int nf_ct_helper_count __read_mostly;
40 40
41static bool nf_ct_auto_assign_helper __read_mostly = true; 41static bool nf_ct_auto_assign_helper __read_mostly = false;
42module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644); 42module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644);
43MODULE_PARM_DESC(nf_conntrack_helper, 43MODULE_PARM_DESC(nf_conntrack_helper,
44 "Enable automatic conntrack helper assignment (default 1)"); 44 "Enable automatic conntrack helper assignment (default 0)");
45 45
46#ifdef CONFIG_SYSCTL 46#ifdef CONFIG_SYSCTL
47static struct ctl_table helper_sysctl_table[] = { 47static struct ctl_table helper_sysctl_table[] = {
@@ -400,7 +400,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
400 spin_lock_bh(&nf_conntrack_expect_lock); 400 spin_lock_bh(&nf_conntrack_expect_lock);
401 for (i = 0; i < nf_ct_expect_hsize; i++) { 401 for (i = 0; i < nf_ct_expect_hsize; i++) {
402 hlist_for_each_entry_safe(exp, next, 402 hlist_for_each_entry_safe(exp, next,
403 &net->ct.expect_hash[i], hnode) { 403 &nf_ct_expect_hash[i], hnode) {
404 struct nf_conn_help *help = nfct_help(exp->master); 404 struct nf_conn_help *help = nfct_help(exp->master);
405 if ((rcu_dereference_protected( 405 if ((rcu_dereference_protected(
406 help->helper, 406 help->helper,
@@ -424,10 +424,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
424 spin_unlock_bh(&pcpu->lock); 424 spin_unlock_bh(&pcpu->lock);
425 } 425 }
426 local_bh_disable(); 426 local_bh_disable();
427 for (i = 0; i < net->ct.htable_size; i++) { 427 for (i = 0; i < nf_conntrack_htable_size; i++) {
428 nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); 428 nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
429 if (i < net->ct.htable_size) { 429 if (i < nf_conntrack_htable_size) {
430 hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) 430 hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
431 unhelp(h, me); 431 unhelp(h, me);
432 } 432 }
433 spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); 433 spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 294a8e28cec4..a18d1ceabad5 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -824,19 +824,22 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
824 last = (struct nf_conn *)cb->args[1]; 824 last = (struct nf_conn *)cb->args[1];
825 825
826 local_bh_disable(); 826 local_bh_disable();
827 for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) { 827 for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
828restart: 828restart:
829 lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS]; 829 lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
830 nf_conntrack_lock(lockp); 830 nf_conntrack_lock(lockp);
831 if (cb->args[0] >= net->ct.htable_size) { 831 if (cb->args[0] >= nf_conntrack_htable_size) {
832 spin_unlock(lockp); 832 spin_unlock(lockp);
833 goto out; 833 goto out;
834 } 834 }
835 hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]], 835 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]],
836 hnnode) { 836 hnnode) {
837 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) 837 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
838 continue; 838 continue;
839 ct = nf_ct_tuplehash_to_ctrack(h); 839 ct = nf_ct_tuplehash_to_ctrack(h);
840 if (!net_eq(net, nf_ct_net(ct)))
841 continue;
842
840 /* Dump entries of a given L3 protocol number. 843 /* Dump entries of a given L3 protocol number.
841 * If it is not specified, ie. l3proto == 0, 844 * If it is not specified, ie. l3proto == 0,
842 * then dump everything. */ 845 * then dump everything. */
@@ -2629,10 +2632,14 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
2629 last = (struct nf_conntrack_expect *)cb->args[1]; 2632 last = (struct nf_conntrack_expect *)cb->args[1];
2630 for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { 2633 for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) {
2631restart: 2634restart:
2632 hlist_for_each_entry(exp, &net->ct.expect_hash[cb->args[0]], 2635 hlist_for_each_entry(exp, &nf_ct_expect_hash[cb->args[0]],
2633 hnode) { 2636 hnode) {
2634 if (l3proto && exp->tuple.src.l3num != l3proto) 2637 if (l3proto && exp->tuple.src.l3num != l3proto)
2635 continue; 2638 continue;
2639
2640 if (!net_eq(nf_ct_net(exp->master), net))
2641 continue;
2642
2636 if (cb->args[1]) { 2643 if (cb->args[1]) {
2637 if (exp != last) 2644 if (exp != last)
2638 continue; 2645 continue;
@@ -2883,8 +2890,12 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
2883 spin_lock_bh(&nf_conntrack_expect_lock); 2890 spin_lock_bh(&nf_conntrack_expect_lock);
2884 for (i = 0; i < nf_ct_expect_hsize; i++) { 2891 for (i = 0; i < nf_ct_expect_hsize; i++) {
2885 hlist_for_each_entry_safe(exp, next, 2892 hlist_for_each_entry_safe(exp, next,
2886 &net->ct.expect_hash[i], 2893 &nf_ct_expect_hash[i],
2887 hnode) { 2894 hnode) {
2895
2896 if (!net_eq(nf_ct_exp_net(exp), net))
2897 continue;
2898
2888 m_help = nfct_help(exp->master); 2899 m_help = nfct_help(exp->master);
2889 if (!strcmp(m_help->helper->name, name) && 2900 if (!strcmp(m_help->helper->name, name) &&
2890 del_timer(&exp->timeout)) { 2901 del_timer(&exp->timeout)) {
@@ -2901,8 +2912,12 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
2901 spin_lock_bh(&nf_conntrack_expect_lock); 2912 spin_lock_bh(&nf_conntrack_expect_lock);
2902 for (i = 0; i < nf_ct_expect_hsize; i++) { 2913 for (i = 0; i < nf_ct_expect_hsize; i++) {
2903 hlist_for_each_entry_safe(exp, next, 2914 hlist_for_each_entry_safe(exp, next,
2904 &net->ct.expect_hash[i], 2915 &nf_ct_expect_hash[i],
2905 hnode) { 2916 hnode) {
2917
2918 if (!net_eq(nf_ct_exp_net(exp), net))
2919 continue;
2920
2906 if (del_timer(&exp->timeout)) { 2921 if (del_timer(&exp->timeout)) {
2907 nf_ct_unlink_expect_report(exp, 2922 nf_ct_unlink_expect_report(exp,
2908 NETLINK_CB(skb).portid, 2923 NETLINK_CB(skb).portid,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 478f92f834b6..4fd040575ffe 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -309,6 +309,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
309 .l3proto = PF_INET, 309 .l3proto = PF_INET,
310 .l4proto = IPPROTO_UDP, 310 .l4proto = IPPROTO_UDP,
311 .name = "udp", 311 .name = "udp",
312 .allow_clash = true,
312 .pkt_to_tuple = udp_pkt_to_tuple, 313 .pkt_to_tuple = udp_pkt_to_tuple,
313 .invert_tuple = udp_invert_tuple, 314 .invert_tuple = udp_invert_tuple,
314 .print_tuple = udp_print_tuple, 315 .print_tuple = udp_print_tuple,
@@ -341,6 +342,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
341 .l3proto = PF_INET6, 342 .l3proto = PF_INET6,
342 .l4proto = IPPROTO_UDP, 343 .l4proto = IPPROTO_UDP,
343 .name = "udp", 344 .name = "udp",
345 .allow_clash = true,
344 .pkt_to_tuple = udp_pkt_to_tuple, 346 .pkt_to_tuple = udp_pkt_to_tuple,
345 .invert_tuple = udp_invert_tuple, 347 .invert_tuple = udp_invert_tuple,
346 .print_tuple = udp_print_tuple, 348 .print_tuple = udp_print_tuple,
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 1ac8ee13a873..9d692f5adb94 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -274,6 +274,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
274 .l3proto = PF_INET, 274 .l3proto = PF_INET,
275 .l4proto = IPPROTO_UDPLITE, 275 .l4proto = IPPROTO_UDPLITE,
276 .name = "udplite", 276 .name = "udplite",
277 .allow_clash = true,
277 .pkt_to_tuple = udplite_pkt_to_tuple, 278 .pkt_to_tuple = udplite_pkt_to_tuple,
278 .invert_tuple = udplite_invert_tuple, 279 .invert_tuple = udplite_invert_tuple,
279 .print_tuple = udplite_print_tuple, 280 .print_tuple = udplite_print_tuple,
@@ -306,6 +307,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
306 .l3proto = PF_INET6, 307 .l3proto = PF_INET6,
307 .l4proto = IPPROTO_UDPLITE, 308 .l4proto = IPPROTO_UDPLITE,
308 .name = "udplite", 309 .name = "udplite",
310 .allow_clash = true,
309 .pkt_to_tuple = udplite_pkt_to_tuple, 311 .pkt_to_tuple = udplite_pkt_to_tuple,
310 .invert_tuple = udplite_invert_tuple, 312 .invert_tuple = udplite_invert_tuple,
311 .print_tuple = udplite_print_tuple, 313 .print_tuple = udplite_print_tuple,
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 0f1a45bcacb2..f87e84ebcec3 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -54,14 +54,13 @@ struct ct_iter_state {
54 54
55static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) 55static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
56{ 56{
57 struct net *net = seq_file_net(seq);
58 struct ct_iter_state *st = seq->private; 57 struct ct_iter_state *st = seq->private;
59 struct hlist_nulls_node *n; 58 struct hlist_nulls_node *n;
60 59
61 for (st->bucket = 0; 60 for (st->bucket = 0;
62 st->bucket < net->ct.htable_size; 61 st->bucket < nf_conntrack_htable_size;
63 st->bucket++) { 62 st->bucket++) {
64 n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); 63 n = rcu_dereference(hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
65 if (!is_a_nulls(n)) 64 if (!is_a_nulls(n))
66 return n; 65 return n;
67 } 66 }
@@ -71,18 +70,17 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
71static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, 70static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
72 struct hlist_nulls_node *head) 71 struct hlist_nulls_node *head)
73{ 72{
74 struct net *net = seq_file_net(seq);
75 struct ct_iter_state *st = seq->private; 73 struct ct_iter_state *st = seq->private;
76 74
77 head = rcu_dereference(hlist_nulls_next_rcu(head)); 75 head = rcu_dereference(hlist_nulls_next_rcu(head));
78 while (is_a_nulls(head)) { 76 while (is_a_nulls(head)) {
79 if (likely(get_nulls_value(head) == st->bucket)) { 77 if (likely(get_nulls_value(head) == st->bucket)) {
80 if (++st->bucket >= net->ct.htable_size) 78 if (++st->bucket >= nf_conntrack_htable_size)
81 return NULL; 79 return NULL;
82 } 80 }
83 head = rcu_dereference( 81 head = rcu_dereference(
84 hlist_nulls_first_rcu( 82 hlist_nulls_first_rcu(
85 &net->ct.hash[st->bucket])); 83 &nf_conntrack_hash[st->bucket]));
86 } 84 }
87 return head; 85 return head;
88} 86}
@@ -458,7 +456,7 @@ static struct ctl_table nf_ct_sysctl_table[] = {
458 }, 456 },
459 { 457 {
460 .procname = "nf_conntrack_buckets", 458 .procname = "nf_conntrack_buckets",
461 .data = &init_net.ct.htable_size, 459 .data = &nf_conntrack_htable_size,
462 .maxlen = sizeof(unsigned int), 460 .maxlen = sizeof(unsigned int),
463 .mode = 0444, 461 .mode = 0444,
464 .proc_handler = proc_dointvec, 462 .proc_handler = proc_dointvec,
@@ -512,7 +510,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
512 goto out_kmemdup; 510 goto out_kmemdup;
513 511
514 table[1].data = &net->ct.count; 512 table[1].data = &net->ct.count;
515 table[2].data = &net->ct.htable_size;
516 table[3].data = &net->ct.sysctl_checksum; 513 table[3].data = &net->ct.sysctl_checksum;
517 table[4].data = &net->ct.sysctl_log_invalid; 514 table[4].data = &net->ct.sysctl_log_invalid;
518 515
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 06a9f45771ab..6877a396f8fc 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -38,6 +38,9 @@ static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
38static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO] 38static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
39 __read_mostly; 39 __read_mostly;
40 40
41static struct hlist_head *nf_nat_bysource __read_mostly;
42static unsigned int nf_nat_htable_size __read_mostly;
43static unsigned int nf_nat_hash_rnd __read_mostly;
41 44
42inline const struct nf_nat_l3proto * 45inline const struct nf_nat_l3proto *
43__nf_nat_l3proto_find(u8 family) 46__nf_nat_l3proto_find(u8 family)
@@ -118,15 +121,17 @@ EXPORT_SYMBOL(nf_xfrm_me_harder);
118 121
119/* We keep an extra hash for each conntrack, for fast searching. */ 122/* We keep an extra hash for each conntrack, for fast searching. */
120static inline unsigned int 123static inline unsigned int
121hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple) 124hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
122{ 125{
123 unsigned int hash; 126 unsigned int hash;
124 127
128 get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
129
125 /* Original src, to ensure we map it consistently if poss. */ 130 /* Original src, to ensure we map it consistently if poss. */
126 hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), 131 hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
127 tuple->dst.protonum ^ nf_conntrack_hash_rnd); 132 tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
128 133
129 return reciprocal_scale(hash, net->ct.nat_htable_size); 134 return reciprocal_scale(hash, nf_nat_htable_size);
130} 135}
131 136
132/* Is this tuple already taken? (not by us) */ 137/* Is this tuple already taken? (not by us) */
@@ -196,9 +201,10 @@ find_appropriate_src(struct net *net,
196 const struct nf_conn_nat *nat; 201 const struct nf_conn_nat *nat;
197 const struct nf_conn *ct; 202 const struct nf_conn *ct;
198 203
199 hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) { 204 hlist_for_each_entry_rcu(nat, &nf_nat_bysource[h], bysource) {
200 ct = nat->ct; 205 ct = nat->ct;
201 if (same_src(ct, tuple) && 206 if (same_src(ct, tuple) &&
207 net_eq(net, nf_ct_net(ct)) &&
202 nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { 208 nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
203 /* Copy source part from reply tuple. */ 209 /* Copy source part from reply tuple. */
204 nf_ct_invert_tuplepr(result, 210 nf_ct_invert_tuplepr(result,
@@ -431,7 +437,7 @@ nf_nat_setup_info(struct nf_conn *ct,
431 nat = nfct_nat(ct); 437 nat = nfct_nat(ct);
432 nat->ct = ct; 438 nat->ct = ct;
433 hlist_add_head_rcu(&nat->bysource, 439 hlist_add_head_rcu(&nat->bysource,
434 &net->ct.nat_bysource[srchash]); 440 &nf_nat_bysource[srchash]);
435 spin_unlock_bh(&nf_nat_lock); 441 spin_unlock_bh(&nf_nat_lock);
436 } 442 }
437 443
@@ -819,27 +825,14 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
819} 825}
820#endif 826#endif
821 827
822static int __net_init nf_nat_net_init(struct net *net)
823{
824 /* Leave them the same for the moment. */
825 net->ct.nat_htable_size = net->ct.htable_size;
826 net->ct.nat_bysource = nf_ct_alloc_hashtable(&net->ct.nat_htable_size, 0);
827 if (!net->ct.nat_bysource)
828 return -ENOMEM;
829 return 0;
830}
831
832static void __net_exit nf_nat_net_exit(struct net *net) 828static void __net_exit nf_nat_net_exit(struct net *net)
833{ 829{
834 struct nf_nat_proto_clean clean = {}; 830 struct nf_nat_proto_clean clean = {};
835 831
836 nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0); 832 nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean, 0, 0);
837 synchronize_rcu();
838 nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
839} 833}
840 834
841static struct pernet_operations nf_nat_net_ops = { 835static struct pernet_operations nf_nat_net_ops = {
842 .init = nf_nat_net_init,
843 .exit = nf_nat_net_exit, 836 .exit = nf_nat_net_exit,
844}; 837};
845 838
@@ -852,8 +845,16 @@ static int __init nf_nat_init(void)
852{ 845{
853 int ret; 846 int ret;
854 847
848 /* Leave them the same for the moment. */
849 nf_nat_htable_size = nf_conntrack_htable_size;
850
851 nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
852 if (!nf_nat_bysource)
853 return -ENOMEM;
854
855 ret = nf_ct_extend_register(&nat_extend); 855 ret = nf_ct_extend_register(&nat_extend);
856 if (ret < 0) { 856 if (ret < 0) {
857 nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
857 printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); 858 printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
858 return ret; 859 return ret;
859 } 860 }
@@ -877,6 +878,7 @@ static int __init nf_nat_init(void)
877 return 0; 878 return 0;
878 879
879 cleanup_extend: 880 cleanup_extend:
881 nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
880 nf_ct_extend_unregister(&nat_extend); 882 nf_ct_extend_unregister(&nat_extend);
881 return ret; 883 return ret;
882} 884}
@@ -895,6 +897,7 @@ static void __exit nf_nat_cleanup(void)
895 for (i = 0; i < NFPROTO_NUMPROTO; i++) 897 for (i = 0; i < NFPROTO_NUMPROTO; i++)
896 kfree(nf_nat_l4protos[i]); 898 kfree(nf_nat_l4protos[i]);
897 synchronize_net(); 899 synchronize_net();
900 nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
898} 901}
899 902
900MODULE_LICENSE("GPL"); 903MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7a85a9dd37ad..4d292b933b5c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2317,7 +2317,7 @@ nft_select_set_ops(const struct nlattr * const nla[],
2317static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { 2317static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
2318 [NFTA_SET_TABLE] = { .type = NLA_STRING }, 2318 [NFTA_SET_TABLE] = { .type = NLA_STRING },
2319 [NFTA_SET_NAME] = { .type = NLA_STRING, 2319 [NFTA_SET_NAME] = { .type = NLA_STRING,
2320 .len = IFNAMSIZ - 1 }, 2320 .len = NFT_SET_MAXNAMELEN - 1 },
2321 [NFTA_SET_FLAGS] = { .type = NLA_U32 }, 2321 [NFTA_SET_FLAGS] = { .type = NLA_U32 },
2322 [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 }, 2322 [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 },
2323 [NFTA_SET_KEY_LEN] = { .type = NLA_U32 }, 2323 [NFTA_SET_KEY_LEN] = { .type = NLA_U32 },
@@ -2401,7 +2401,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
2401 unsigned long *inuse; 2401 unsigned long *inuse;
2402 unsigned int n = 0, min = 0; 2402 unsigned int n = 0, min = 0;
2403 2403
2404 p = strnchr(name, IFNAMSIZ, '%'); 2404 p = strnchr(name, NFT_SET_MAXNAMELEN, '%');
2405 if (p != NULL) { 2405 if (p != NULL) {
2406 if (p[1] != 'd' || strchr(p + 2, '%')) 2406 if (p[1] != 'd' || strchr(p + 2, '%'))
2407 return -EINVAL; 2407 return -EINVAL;
@@ -2696,7 +2696,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
2696 struct nft_table *table; 2696 struct nft_table *table;
2697 struct nft_set *set; 2697 struct nft_set *set;
2698 struct nft_ctx ctx; 2698 struct nft_ctx ctx;
2699 char name[IFNAMSIZ]; 2699 char name[NFT_SET_MAXNAMELEN];
2700 unsigned int size; 2700 unsigned int size;
2701 bool create; 2701 bool create;
2702 u64 timeout; 2702 u64 timeout;
@@ -3375,6 +3375,22 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem)
3375} 3375}
3376EXPORT_SYMBOL_GPL(nft_set_elem_destroy); 3376EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
3377 3377
3378static int nft_setelem_parse_flags(const struct nft_set *set,
3379 const struct nlattr *attr, u32 *flags)
3380{
3381 if (attr == NULL)
3382 return 0;
3383
3384 *flags = ntohl(nla_get_be32(attr));
3385 if (*flags & ~NFT_SET_ELEM_INTERVAL_END)
3386 return -EINVAL;
3387 if (!(set->flags & NFT_SET_INTERVAL) &&
3388 *flags & NFT_SET_ELEM_INTERVAL_END)
3389 return -EINVAL;
3390
3391 return 0;
3392}
3393
3378static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, 3394static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
3379 const struct nlattr *attr) 3395 const struct nlattr *attr)
3380{ 3396{
@@ -3388,8 +3404,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
3388 struct nft_data data; 3404 struct nft_data data;
3389 enum nft_registers dreg; 3405 enum nft_registers dreg;
3390 struct nft_trans *trans; 3406 struct nft_trans *trans;
3407 u32 flags = 0;
3391 u64 timeout; 3408 u64 timeout;
3392 u32 flags;
3393 u8 ulen; 3409 u8 ulen;
3394 int err; 3410 int err;
3395 3411
@@ -3403,17 +3419,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
3403 3419
3404 nft_set_ext_prepare(&tmpl); 3420 nft_set_ext_prepare(&tmpl);
3405 3421
3406 flags = 0; 3422 err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
3407 if (nla[NFTA_SET_ELEM_FLAGS] != NULL) { 3423 if (err < 0)
3408 flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS])); 3424 return err;
3409 if (flags & ~NFT_SET_ELEM_INTERVAL_END) 3425 if (flags != 0)
3410 return -EINVAL; 3426 nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
3411 if (!(set->flags & NFT_SET_INTERVAL) &&
3412 flags & NFT_SET_ELEM_INTERVAL_END)
3413 return -EINVAL;
3414 if (flags != 0)
3415 nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
3416 }
3417 3427
3418 if (set->flags & NFT_SET_MAP) { 3428 if (set->flags & NFT_SET_MAP) {
3419 if (nla[NFTA_SET_ELEM_DATA] == NULL && 3429 if (nla[NFTA_SET_ELEM_DATA] == NULL &&
@@ -3582,9 +3592,13 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
3582 const struct nlattr *attr) 3592 const struct nlattr *attr)
3583{ 3593{
3584 struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; 3594 struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
3595 struct nft_set_ext_tmpl tmpl;
3585 struct nft_data_desc desc; 3596 struct nft_data_desc desc;
3586 struct nft_set_elem elem; 3597 struct nft_set_elem elem;
3598 struct nft_set_ext *ext;
3587 struct nft_trans *trans; 3599 struct nft_trans *trans;
3600 u32 flags = 0;
3601 void *priv;
3588 int err; 3602 int err;
3589 3603
3590 err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, 3604 err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
@@ -3596,6 +3610,14 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
3596 if (nla[NFTA_SET_ELEM_KEY] == NULL) 3610 if (nla[NFTA_SET_ELEM_KEY] == NULL)
3597 goto err1; 3611 goto err1;
3598 3612
3613 nft_set_ext_prepare(&tmpl);
3614
3615 err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags);
3616 if (err < 0)
3617 return err;
3618 if (flags != 0)
3619 nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
3620
3599 err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc, 3621 err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
3600 nla[NFTA_SET_ELEM_KEY]); 3622 nla[NFTA_SET_ELEM_KEY]);
3601 if (err < 0) 3623 if (err < 0)
@@ -3605,24 +3627,40 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
3605 if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) 3627 if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
3606 goto err2; 3628 goto err2;
3607 3629
3630 nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, desc.len);
3631
3632 err = -ENOMEM;
3633 elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, NULL, 0,
3634 GFP_KERNEL);
3635 if (elem.priv == NULL)
3636 goto err2;
3637
3638 ext = nft_set_elem_ext(set, elem.priv);
3639 if (flags)
3640 *nft_set_ext_flags(ext) = flags;
3641
3608 trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set); 3642 trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
3609 if (trans == NULL) { 3643 if (trans == NULL) {
3610 err = -ENOMEM; 3644 err = -ENOMEM;
3611 goto err2; 3645 goto err3;
3612 } 3646 }
3613 3647
3614 elem.priv = set->ops->deactivate(set, &elem); 3648 priv = set->ops->deactivate(set, &elem);
3615 if (elem.priv == NULL) { 3649 if (priv == NULL) {
3616 err = -ENOENT; 3650 err = -ENOENT;
3617 goto err3; 3651 goto err4;
3618 } 3652 }
3653 kfree(elem.priv);
3654 elem.priv = priv;
3619 3655
3620 nft_trans_elem(trans) = elem; 3656 nft_trans_elem(trans) = elem;
3621 list_add_tail(&trans->list, &ctx->net->nft.commit_list); 3657 list_add_tail(&trans->list, &ctx->net->nft.commit_list);
3622 return 0; 3658 return 0;
3623 3659
3624err3: 3660err4:
3625 kfree(trans); 3661 kfree(trans);
3662err3:
3663 kfree(elem.priv);
3626err2: 3664err2:
3627 nft_data_uninit(&elem.key.val, desc.type); 3665 nft_data_uninit(&elem.key.val, desc.type);
3628err1: 3666err1:
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 2671b9deb103..3c84f14326f5 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -306,10 +306,10 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
306 int i; 306 int i;
307 307
308 local_bh_disable(); 308 local_bh_disable();
309 for (i = 0; i < net->ct.htable_size; i++) { 309 for (i = 0; i < nf_conntrack_htable_size; i++) {
310 nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); 310 nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
311 if (i < net->ct.htable_size) { 311 if (i < nf_conntrack_htable_size) {
312 hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) 312 hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
313 untimeout(h, timeout); 313 untimeout(h, timeout);
314 } 314 }
315 spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); 315 spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 25998facefd0..137e308d5b24 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -198,6 +198,14 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
198 } 198 }
199 break; 199 break;
200#endif 200#endif
201#ifdef CONFIG_NF_CONNTRACK_LABELS
202 case NFT_CT_LABELS:
203 nf_connlabels_replace(ct,
204 &regs->data[priv->sreg],
205 &regs->data[priv->sreg],
206 NF_CT_LABELS_MAX_SIZE / sizeof(u32));
207 break;
208#endif
201 default: 209 default:
202 break; 210 break;
203 } 211 }
@@ -365,6 +373,16 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
365 len = FIELD_SIZEOF(struct nf_conn, mark); 373 len = FIELD_SIZEOF(struct nf_conn, mark);
366 break; 374 break;
367#endif 375#endif
376#ifdef CONFIG_NF_CONNTRACK_LABELS
377 case NFT_CT_LABELS:
378 if (tb[NFTA_CT_DIRECTION])
379 return -EINVAL;
380 len = NF_CT_LABELS_MAX_SIZE;
381 err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
382 if (err)
383 return err;
384 break;
385#endif
368 default: 386 default:
369 return -EOPNOTSUPP; 387 return -EOPNOTSUPP;
370 } 388 }
@@ -384,6 +402,18 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
384static void nft_ct_destroy(const struct nft_ctx *ctx, 402static void nft_ct_destroy(const struct nft_ctx *ctx,
385 const struct nft_expr *expr) 403 const struct nft_expr *expr)
386{ 404{
405 struct nft_ct *priv = nft_expr_priv(expr);
406
407 switch (priv->key) {
408#ifdef CONFIG_NF_CONNTRACK_LABELS
409 case NFT_CT_LABELS:
410 nf_connlabels_put(ctx->net);
411 break;
412#endif
413 default:
414 break;
415 }
416
387 nft_ct_l3proto_module_put(ctx->afi->family); 417 nft_ct_l3proto_module_put(ctx->afi->family);
388} 418}
389 419
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index 1c30f41cff5b..f762094af7c1 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -29,6 +29,17 @@ struct nft_rbtree_elem {
29 struct nft_set_ext ext; 29 struct nft_set_ext ext;
30}; 30};
31 31
32static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
33{
34 return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
35 (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
36}
37
38static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
39 const struct nft_rbtree_elem *interval)
40{
41 return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
42}
32 43
33static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key, 44static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
34 const struct nft_set_ext **ext) 45 const struct nft_set_ext **ext)
@@ -37,6 +48,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
37 const struct nft_rbtree_elem *rbe, *interval = NULL; 48 const struct nft_rbtree_elem *rbe, *interval = NULL;
38 const struct rb_node *parent; 49 const struct rb_node *parent;
39 u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); 50 u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
51 const void *this;
40 int d; 52 int d;
41 53
42 spin_lock_bh(&nft_rbtree_lock); 54 spin_lock_bh(&nft_rbtree_lock);
@@ -44,9 +56,16 @@ static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
44 while (parent != NULL) { 56 while (parent != NULL) {
45 rbe = rb_entry(parent, struct nft_rbtree_elem, node); 57 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
46 58
47 d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen); 59 this = nft_set_ext_key(&rbe->ext);
60 d = memcmp(this, key, set->klen);
48 if (d < 0) { 61 if (d < 0) {
49 parent = parent->rb_left; 62 parent = parent->rb_left;
63 /* In case of adjacent ranges, we always see the high
64 * part of the range in first place, before the low one.
65 * So don't update interval if the keys are equal.
66 */
67 if (interval && nft_rbtree_equal(set, this, interval))
68 continue;
50 interval = rbe; 69 interval = rbe;
51 } else if (d > 0) 70 } else if (d > 0)
52 parent = parent->rb_right; 71 parent = parent->rb_right;
@@ -56,9 +75,7 @@ found:
56 parent = parent->rb_left; 75 parent = parent->rb_left;
57 continue; 76 continue;
58 } 77 }
59 if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) && 78 if (nft_rbtree_interval_end(rbe))
60 *nft_set_ext_flags(&rbe->ext) &
61 NFT_SET_ELEM_INTERVAL_END)
62 goto out; 79 goto out;
63 spin_unlock_bh(&nft_rbtree_lock); 80 spin_unlock_bh(&nft_rbtree_lock);
64 81
@@ -98,9 +115,16 @@ static int __nft_rbtree_insert(const struct nft_set *set,
98 else if (d > 0) 115 else if (d > 0)
99 p = &parent->rb_right; 116 p = &parent->rb_right;
100 else { 117 else {
101 if (nft_set_elem_active(&rbe->ext, genmask)) 118 if (nft_set_elem_active(&rbe->ext, genmask)) {
102 return -EEXIST; 119 if (nft_rbtree_interval_end(rbe) &&
103 p = &parent->rb_left; 120 !nft_rbtree_interval_end(new))
121 p = &parent->rb_left;
122 else if (!nft_rbtree_interval_end(rbe) &&
123 nft_rbtree_interval_end(new))
124 p = &parent->rb_right;
125 else
126 return -EEXIST;
127 }
104 } 128 }
105 } 129 }
106 rb_link_node(&new->node, parent, p); 130 rb_link_node(&new->node, parent, p);
@@ -145,7 +169,7 @@ static void *nft_rbtree_deactivate(const struct nft_set *set,
145{ 169{
146 const struct nft_rbtree *priv = nft_set_priv(set); 170 const struct nft_rbtree *priv = nft_set_priv(set);
147 const struct rb_node *parent = priv->root.rb_node; 171 const struct rb_node *parent = priv->root.rb_node;
148 struct nft_rbtree_elem *rbe; 172 struct nft_rbtree_elem *rbe, *this = elem->priv;
149 u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); 173 u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
150 int d; 174 int d;
151 175
@@ -163,6 +187,15 @@ static void *nft_rbtree_deactivate(const struct nft_set *set,
163 parent = parent->rb_left; 187 parent = parent->rb_left;
164 continue; 188 continue;
165 } 189 }
190 if (nft_rbtree_interval_end(rbe) &&
191 !nft_rbtree_interval_end(this)) {
192 parent = parent->rb_left;
193 continue;
194 } else if (!nft_rbtree_interval_end(rbe) &&
195 nft_rbtree_interval_end(this)) {
196 parent = parent->rb_right;
197 continue;
198 }
166 nft_set_elem_change_active(set, &rbe->ext); 199 nft_set_elem_change_active(set, &rbe->ext);
167 return rbe; 200 return rbe;
168 } 201 }
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 9741a76c7405..9f0bc49fa969 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -439,20 +439,12 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
439 u8 protonum; 439 u8 protonum;
440 440
441 l3proto = __nf_ct_l3proto_find(l3num); 441 l3proto = __nf_ct_l3proto_find(l3num);
442 if (!l3proto) {
443 pr_debug("ovs_ct_find_existing: Can't get l3proto\n");
444 return NULL;
445 }
446 if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, 442 if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
447 &protonum) <= 0) { 443 &protonum) <= 0) {
448 pr_debug("ovs_ct_find_existing: Can't get protonum\n"); 444 pr_debug("ovs_ct_find_existing: Can't get protonum\n");
449 return NULL; 445 return NULL;
450 } 446 }
451 l4proto = __nf_ct_l4proto_find(l3num, protonum); 447 l4proto = __nf_ct_l4proto_find(l3num, protonum);
452 if (!l4proto) {
453 pr_debug("ovs_ct_find_existing: Can't get l4proto\n");
454 return NULL;
455 }
456 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, 448 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
457 protonum, net, &tuple, l3proto, l4proto)) { 449 protonum, net, &tuple, l3proto, l4proto)) {
458 pr_debug("ovs_ct_find_existing: Can't get tuple\n"); 450 pr_debug("ovs_ct_find_existing: Can't get tuple\n");