aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2010-08-02 18:07:58 -0400
committerDavid S. Miller <davem@davemloft.net>2010-08-02 18:07:58 -0400
commit83bf2e4089bebc2c7fd14a79de5954b26fe8d4af (patch)
treeab2cb1f229ba4c2d7236406c997e41a223daf74d /net
parentde38483010bae523f533bb6bf9f7b7353772f6eb (diff)
parent6661481d5a8975657742c7ed40ae16bdaa7d0a6e (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/netfilter/arp_tables.c15
-rw-r--r--net/ipv4/netfilter/ip_tables.c12
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c10
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c27
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_common.c12
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_dccp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c12
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c10
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_sctp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_tcp.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udp.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udplite.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_unknown.c4
-rw-r--r--net/ipv6/netfilter/ip6_tables.c14
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c9
-rw-r--r--net/netfilter/Kconfig35
-rw-r--r--net/netfilter/Makefile3
-rw-r--r--net/netfilter/ipvs/Kconfig11
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c43
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c45
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c37
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c176
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c55
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c50
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c56
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c29
-rw-r--r--net/netfilter/nf_conntrack_core.c3
-rw-r--r--net/netfilter/nf_conntrack_extend.c22
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c10
-rw-r--r--net/netfilter/xt_CHECKSUM.c70
-rw-r--r--net/netfilter/xt_TPROXY.c6
-rw-r--r--net/netfilter/xt_cpu.c63
-rw-r--r--net/netfilter/xt_ipvs.c189
-rw-r--r--net/netfilter/xt_quota.c12
35 files changed, 717 insertions, 352 deletions
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 16c0ba0a2728..6bccba31d132 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -283,16 +283,13 @@ unsigned int arpt_do_table(struct sk_buff *skb,
283 arp = arp_hdr(skb); 283 arp = arp_hdr(skb);
284 do { 284 do {
285 const struct arpt_entry_target *t; 285 const struct arpt_entry_target *t;
286 int hdr_len;
287 286
288 if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { 287 if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
289 e = arpt_next_entry(e); 288 e = arpt_next_entry(e);
290 continue; 289 continue;
291 } 290 }
292 291
293 hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + 292 ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1);
294 (2 * skb->dev->addr_len);
295 ADD_COUNTER(e->counters, hdr_len, 1);
296 293
297 t = arpt_get_target_c(e); 294 t = arpt_get_target_c(e);
298 295
@@ -713,7 +710,7 @@ static void get_counters(const struct xt_table_info *t,
713 struct arpt_entry *iter; 710 struct arpt_entry *iter;
714 unsigned int cpu; 711 unsigned int cpu;
715 unsigned int i; 712 unsigned int i;
716 unsigned int curcpu; 713 unsigned int curcpu = get_cpu();
717 714
718 /* Instead of clearing (by a previous call to memset()) 715 /* Instead of clearing (by a previous call to memset())
719 * the counters and using adds, we set the counters 716 * the counters and using adds, we set the counters
@@ -723,14 +720,16 @@ static void get_counters(const struct xt_table_info *t,
723 * if new softirq were to run and call ipt_do_table 720 * if new softirq were to run and call ipt_do_table
724 */ 721 */
725 local_bh_disable(); 722 local_bh_disable();
726 curcpu = smp_processor_id();
727
728 i = 0; 723 i = 0;
729 xt_entry_foreach(iter, t->entries[curcpu], t->size) { 724 xt_entry_foreach(iter, t->entries[curcpu], t->size) {
730 SET_COUNTER(counters[i], iter->counters.bcnt, 725 SET_COUNTER(counters[i], iter->counters.bcnt,
731 iter->counters.pcnt); 726 iter->counters.pcnt);
732 ++i; 727 ++i;
733 } 728 }
729 local_bh_enable();
730 /* Processing counters from other cpus, we can let bottom half enabled,
731 * (preemption is disabled)
732 */
734 733
735 for_each_possible_cpu(cpu) { 734 for_each_possible_cpu(cpu) {
736 if (cpu == curcpu) 735 if (cpu == curcpu)
@@ -744,7 +743,7 @@ static void get_counters(const struct xt_table_info *t,
744 } 743 }
745 xt_info_wrunlock(cpu); 744 xt_info_wrunlock(cpu);
746 } 745 }
747 local_bh_enable(); 746 put_cpu();
748} 747}
749 748
750static struct xt_counters *alloc_counters(const struct xt_table *table) 749static struct xt_counters *alloc_counters(const struct xt_table *table)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b38c11810c65..c439721b165a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -364,7 +364,7 @@ ipt_do_table(struct sk_buff *skb,
364 goto no_match; 364 goto no_match;
365 } 365 }
366 366
367 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); 367 ADD_COUNTER(e->counters, skb->len, 1);
368 368
369 t = ipt_get_target(e); 369 t = ipt_get_target(e);
370 IP_NF_ASSERT(t->u.kernel.target); 370 IP_NF_ASSERT(t->u.kernel.target);
@@ -884,7 +884,7 @@ get_counters(const struct xt_table_info *t,
884 struct ipt_entry *iter; 884 struct ipt_entry *iter;
885 unsigned int cpu; 885 unsigned int cpu;
886 unsigned int i; 886 unsigned int i;
887 unsigned int curcpu; 887 unsigned int curcpu = get_cpu();
888 888
889 /* Instead of clearing (by a previous call to memset()) 889 /* Instead of clearing (by a previous call to memset())
890 * the counters and using adds, we set the counters 890 * the counters and using adds, we set the counters
@@ -894,14 +894,16 @@ get_counters(const struct xt_table_info *t,
894 * if new softirq were to run and call ipt_do_table 894 * if new softirq were to run and call ipt_do_table
895 */ 895 */
896 local_bh_disable(); 896 local_bh_disable();
897 curcpu = smp_processor_id();
898
899 i = 0; 897 i = 0;
900 xt_entry_foreach(iter, t->entries[curcpu], t->size) { 898 xt_entry_foreach(iter, t->entries[curcpu], t->size) {
901 SET_COUNTER(counters[i], iter->counters.bcnt, 899 SET_COUNTER(counters[i], iter->counters.bcnt,
902 iter->counters.pcnt); 900 iter->counters.pcnt);
903 ++i; 901 ++i;
904 } 902 }
903 local_bh_enable();
904 /* Processing counters from other cpus, we can let bottom half enabled,
905 * (preemption is disabled)
906 */
905 907
906 for_each_possible_cpu(cpu) { 908 for_each_possible_cpu(cpu) {
907 if (cpu == curcpu) 909 if (cpu == curcpu)
@@ -915,7 +917,7 @@ get_counters(const struct xt_table_info *t,
915 } 917 }
916 xt_info_wrunlock(cpu); 918 xt_info_wrunlock(cpu);
917 } 919 }
918 local_bh_enable(); 920 put_cpu();
919} 921}
920 922
921static struct xt_counters *alloc_counters(const struct xt_table *table) 923static struct xt_counters *alloc_counters(const struct xt_table *table)
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index bbbd2736c549..b254dafaf429 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -95,10 +95,11 @@ static void send_reset(struct sk_buff *oldskb, int hook)
95 } 95 }
96 96
97 tcph->rst = 1; 97 tcph->rst = 1;
98 tcph->check = tcp_v4_check(sizeof(struct tcphdr), 98 tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
99 niph->saddr, niph->daddr, 99 niph->daddr, 0);
100 csum_partial(tcph, 100 nskb->ip_summed = CHECKSUM_PARTIAL;
101 sizeof(struct tcphdr), 0)); 101 nskb->csum_start = (unsigned char *)tcph - nskb->head;
102 nskb->csum_offset = offsetof(struct tcphdr, check);
102 103
103 addr_type = RTN_UNSPEC; 104 addr_type = RTN_UNSPEC;
104 if (hook != NF_INET_FORWARD 105 if (hook != NF_INET_FORWARD
@@ -115,7 +116,6 @@ static void send_reset(struct sk_buff *oldskb, int hook)
115 goto free_nskb; 116 goto free_nskb;
116 117
117 niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT); 118 niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT);
118 nskb->ip_summed = CHECKSUM_NONE;
119 119
120 /* "Never happens" */ 120 /* "Never happens" */
121 if (nskb->len > dst_mtu(skb_dst(nskb))) 121 if (nskb->len > dst_mtu(skb_dst(nskb)))
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index c7719b283ada..8c8632d9b93c 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -261,14 +261,9 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
261 rcu_read_lock(); 261 rcu_read_lock();
262 proto = __nf_nat_proto_find(orig_tuple->dst.protonum); 262 proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
263 263
264 /* Change protocol info to have some randomization */
265 if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
266 proto->unique_tuple(tuple, range, maniptype, ct);
267 goto out;
268 }
269
270 /* Only bother mapping if it's not already in range and unique */ 264 /* Only bother mapping if it's not already in range and unique */
271 if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || 265 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM) &&
266 (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
272 proto->in_range(tuple, maniptype, &range->min, &range->max)) && 267 proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
273 !nf_nat_used_tuple(tuple, ct)) 268 !nf_nat_used_tuple(tuple, ct))
274 goto out; 269 goto out;
@@ -440,7 +435,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
440 if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) 435 if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
441 return 0; 436 return 0;
442 437
443 inside = (void *)skb->data + ip_hdrlen(skb); 438 inside = (void *)skb->data + hdrlen;
444 439
445 /* We're actually going to mangle it beyond trivial checksum 440 /* We're actually going to mangle it beyond trivial checksum
446 adjustment, so make sure the current checksum is correct. */ 441 adjustment, so make sure the current checksum is correct. */
@@ -470,12 +465,10 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
470 /* rcu_read_lock()ed by nf_hook_slow */ 465 /* rcu_read_lock()ed by nf_hook_slow */
471 l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); 466 l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
472 467
473 if (!nf_ct_get_tuple(skb, 468 if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr),
474 ip_hdrlen(skb) + sizeof(struct icmphdr), 469 (hdrlen +
475 (ip_hdrlen(skb) +
476 sizeof(struct icmphdr) + inside->ip.ihl * 4), 470 sizeof(struct icmphdr) + inside->ip.ihl * 4),
477 (u_int16_t)AF_INET, 471 (u_int16_t)AF_INET, inside->ip.protocol,
478 inside->ip.protocol,
479 &inner, l3proto, l4proto)) 472 &inner, l3proto, l4proto))
480 return 0; 473 return 0;
481 474
@@ -484,15 +477,13 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
484 pass all hooks (locally-generated ICMP). Consider incoming 477 pass all hooks (locally-generated ICMP). Consider incoming
485 packet: PREROUTING (DST manip), routing produces ICMP, goes 478 packet: PREROUTING (DST manip), routing produces ICMP, goes
486 through POSTROUTING (which must correct the DST manip). */ 479 through POSTROUTING (which must correct the DST manip). */
487 if (!manip_pkt(inside->ip.protocol, skb, 480 if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp),
488 ip_hdrlen(skb) + sizeof(inside->icmp), 481 &ct->tuplehash[!dir].tuple, !manip))
489 &ct->tuplehash[!dir].tuple,
490 !manip))
491 return 0; 482 return 0;
492 483
493 if (skb->ip_summed != CHECKSUM_PARTIAL) { 484 if (skb->ip_summed != CHECKSUM_PARTIAL) {
494 /* Reloading "inside" here since manip_pkt inner. */ 485 /* Reloading "inside" here since manip_pkt inner. */
495 inside = (void *)skb->data + ip_hdrlen(skb); 486 inside = (void *)skb->data + hdrlen;
496 inside->icmp.checksum = 0; 487 inside->icmp.checksum = 0;
497 inside->icmp.checksum = 488 inside->icmp.checksum =
498 csum_fold(skb_checksum(skb, hdrlen, 489 csum_fold(skb_checksum(skb, hdrlen,
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
index 6c4f11f51446..3e61faf23a9a 100644
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -34,7 +34,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
34} 34}
35EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); 35EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
36 36
37bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, 37void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
38 const struct nf_nat_range *range, 38 const struct nf_nat_range *range,
39 enum nf_nat_manip_type maniptype, 39 enum nf_nat_manip_type maniptype,
40 const struct nf_conn *ct, 40 const struct nf_conn *ct,
@@ -53,7 +53,7 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
53 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { 53 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
54 /* If it's dst rewrite, can't change port */ 54 /* If it's dst rewrite, can't change port */
55 if (maniptype == IP_NAT_MANIP_DST) 55 if (maniptype == IP_NAT_MANIP_DST)
56 return false; 56 return;
57 57
58 if (ntohs(*portptr) < 1024) { 58 if (ntohs(*portptr) < 1024) {
59 /* Loose convention: >> 512 is credential passing */ 59 /* Loose convention: >> 512 is credential passing */
@@ -81,15 +81,15 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
81 else 81 else
82 off = *rover; 82 off = *rover;
83 83
84 for (i = 0; i < range_size; i++, off++) { 84 for (i = 0; ; ++off) {
85 *portptr = htons(min + off % range_size); 85 *portptr = htons(min + off % range_size);
86 if (nf_nat_used_tuple(tuple, ct)) 86 if (++i != range_size && nf_nat_used_tuple(tuple, ct))
87 continue; 87 continue;
88 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) 88 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
89 *rover = off; 89 *rover = off;
90 return true; 90 return;
91 } 91 }
92 return false; 92 return;
93} 93}
94EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); 94EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
95 95
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c
index 22485ce306d4..570faf2667b2 100644
--- a/net/ipv4/netfilter/nf_nat_proto_dccp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c
@@ -22,14 +22,14 @@
22 22
23static u_int16_t dccp_port_rover; 23static u_int16_t dccp_port_rover;
24 24
25static bool 25static void
26dccp_unique_tuple(struct nf_conntrack_tuple *tuple, 26dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
27 const struct nf_nat_range *range, 27 const struct nf_nat_range *range,
28 enum nf_nat_manip_type maniptype, 28 enum nf_nat_manip_type maniptype,
29 const struct nf_conn *ct) 29 const struct nf_conn *ct)
30{ 30{
31 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 31 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
32 &dccp_port_rover); 32 &dccp_port_rover);
33} 33}
34 34
35static bool 35static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index d7e89201351e..bc8d83a31c73 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -37,7 +37,7 @@ MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
37MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); 37MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
38 38
39/* generate unique tuple ... */ 39/* generate unique tuple ... */
40static bool 40static void
41gre_unique_tuple(struct nf_conntrack_tuple *tuple, 41gre_unique_tuple(struct nf_conntrack_tuple *tuple,
42 const struct nf_nat_range *range, 42 const struct nf_nat_range *range,
43 enum nf_nat_manip_type maniptype, 43 enum nf_nat_manip_type maniptype,
@@ -50,7 +50,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
50 /* If there is no master conntrack we are not PPTP, 50 /* If there is no master conntrack we are not PPTP,
51 do not change tuples */ 51 do not change tuples */
52 if (!ct->master) 52 if (!ct->master)
53 return false; 53 return;
54 54
55 if (maniptype == IP_NAT_MANIP_SRC) 55 if (maniptype == IP_NAT_MANIP_SRC)
56 keyptr = &tuple->src.u.gre.key; 56 keyptr = &tuple->src.u.gre.key;
@@ -68,14 +68,14 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
68 68
69 pr_debug("min = %u, range_size = %u\n", min, range_size); 69 pr_debug("min = %u, range_size = %u\n", min, range_size);
70 70
71 for (i = 0; i < range_size; i++, key++) { 71 for (i = 0; ; ++key) {
72 *keyptr = htons(min + key % range_size); 72 *keyptr = htons(min + key % range_size);
73 if (!nf_nat_used_tuple(tuple, ct)) 73 if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
74 return true; 74 return;
75 } 75 }
76 76
77 pr_debug("%p: no NAT mapping\n", ct); 77 pr_debug("%p: no NAT mapping\n", ct);
78 return false; 78 return;
79} 79}
80 80
81/* manipulate a GRE packet according to maniptype */ 81/* manipulate a GRE packet according to maniptype */
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 19a8b0b07d8e..5744c3ec847c 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -27,7 +27,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
27 ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); 27 ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
28} 28}
29 29
30static bool 30static void
31icmp_unique_tuple(struct nf_conntrack_tuple *tuple, 31icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
32 const struct nf_nat_range *range, 32 const struct nf_nat_range *range,
33 enum nf_nat_manip_type maniptype, 33 enum nf_nat_manip_type maniptype,
@@ -42,13 +42,13 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
42 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) 42 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
43 range_size = 0xFFFF; 43 range_size = 0xFFFF;
44 44
45 for (i = 0; i < range_size; i++, id++) { 45 for (i = 0; ; ++id) {
46 tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + 46 tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
47 (id % range_size)); 47 (id % range_size));
48 if (!nf_nat_used_tuple(tuple, ct)) 48 if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
49 return true; 49 return;
50 } 50 }
51 return false; 51 return;
52} 52}
53 53
54static bool 54static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
index 3fc598eeeb1a..756331d42661 100644
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -16,14 +16,14 @@
16 16
17static u_int16_t nf_sctp_port_rover; 17static u_int16_t nf_sctp_port_rover;
18 18
19static bool 19static void
20sctp_unique_tuple(struct nf_conntrack_tuple *tuple, 20sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
21 const struct nf_nat_range *range, 21 const struct nf_nat_range *range,
22 enum nf_nat_manip_type maniptype, 22 enum nf_nat_manip_type maniptype,
23 const struct nf_conn *ct) 23 const struct nf_conn *ct)
24{ 24{
25 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 25 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
26 &nf_sctp_port_rover); 26 &nf_sctp_port_rover);
27} 27}
28 28
29static bool 29static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index 399e2cfa263b..aa460a595d5d 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -20,14 +20,13 @@
20 20
21static u_int16_t tcp_port_rover; 21static u_int16_t tcp_port_rover;
22 22
23static bool 23static void
24tcp_unique_tuple(struct nf_conntrack_tuple *tuple, 24tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
25 const struct nf_nat_range *range, 25 const struct nf_nat_range *range,
26 enum nf_nat_manip_type maniptype, 26 enum nf_nat_manip_type maniptype,
27 const struct nf_conn *ct) 27 const struct nf_conn *ct)
28{ 28{
29 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 29 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover);
30 &tcp_port_rover);
31} 30}
32 31
33static bool 32static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index 9e61c79492e4..dfe65c7e2925 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -19,14 +19,13 @@
19 19
20static u_int16_t udp_port_rover; 20static u_int16_t udp_port_rover;
21 21
22static bool 22static void
23udp_unique_tuple(struct nf_conntrack_tuple *tuple, 23udp_unique_tuple(struct nf_conntrack_tuple *tuple,
24 const struct nf_nat_range *range, 24 const struct nf_nat_range *range,
25 enum nf_nat_manip_type maniptype, 25 enum nf_nat_manip_type maniptype,
26 const struct nf_conn *ct) 26 const struct nf_conn *ct)
27{ 27{
28 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 28 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover);
29 &udp_port_rover);
30} 29}
31 30
32static bool 31static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c
index 440a229bbd87..3cc8c8af39ef 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udplite.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c
@@ -18,14 +18,14 @@
18 18
19static u_int16_t udplite_port_rover; 19static u_int16_t udplite_port_rover;
20 20
21static bool 21static void
22udplite_unique_tuple(struct nf_conntrack_tuple *tuple, 22udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
23 const struct nf_nat_range *range, 23 const struct nf_nat_range *range,
24 enum nf_nat_manip_type maniptype, 24 enum nf_nat_manip_type maniptype,
25 const struct nf_conn *ct) 25 const struct nf_conn *ct)
26{ 26{
27 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 27 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
28 &udplite_port_rover); 28 &udplite_port_rover);
29} 29}
30 30
31static bool 31static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index 14381c62acea..a50f2bc1c732 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -26,14 +26,14 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
26 return true; 26 return true;
27} 27}
28 28
29static bool unknown_unique_tuple(struct nf_conntrack_tuple *tuple, 29static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
30 const struct nf_nat_range *range, 30 const struct nf_nat_range *range,
31 enum nf_nat_manip_type maniptype, 31 enum nf_nat_manip_type maniptype,
32 const struct nf_conn *ct) 32 const struct nf_conn *ct)
33{ 33{
34 /* Sorry: we can't help you; if it's not unique, we can't frob 34 /* Sorry: we can't help you; if it's not unique, we can't frob
35 anything. */ 35 anything. */
36 return false; 36 return;
37} 37}
38 38
39static bool 39static bool
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index dc41d6d3c6c6..5359ef4daac5 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -387,9 +387,7 @@ ip6t_do_table(struct sk_buff *skb,
387 goto no_match; 387 goto no_match;
388 } 388 }
389 389
390 ADD_COUNTER(e->counters, 390 ADD_COUNTER(e->counters, skb->len, 1);
391 ntohs(ipv6_hdr(skb)->payload_len) +
392 sizeof(struct ipv6hdr), 1);
393 391
394 t = ip6t_get_target_c(e); 392 t = ip6t_get_target_c(e);
395 IP_NF_ASSERT(t->u.kernel.target); 393 IP_NF_ASSERT(t->u.kernel.target);
@@ -899,7 +897,7 @@ get_counters(const struct xt_table_info *t,
899 struct ip6t_entry *iter; 897 struct ip6t_entry *iter;
900 unsigned int cpu; 898 unsigned int cpu;
901 unsigned int i; 899 unsigned int i;
902 unsigned int curcpu; 900 unsigned int curcpu = get_cpu();
903 901
904 /* Instead of clearing (by a previous call to memset()) 902 /* Instead of clearing (by a previous call to memset())
905 * the counters and using adds, we set the counters 903 * the counters and using adds, we set the counters
@@ -909,14 +907,16 @@ get_counters(const struct xt_table_info *t,
909 * if new softirq were to run and call ipt_do_table 907 * if new softirq were to run and call ipt_do_table
910 */ 908 */
911 local_bh_disable(); 909 local_bh_disable();
912 curcpu = smp_processor_id();
913
914 i = 0; 910 i = 0;
915 xt_entry_foreach(iter, t->entries[curcpu], t->size) { 911 xt_entry_foreach(iter, t->entries[curcpu], t->size) {
916 SET_COUNTER(counters[i], iter->counters.bcnt, 912 SET_COUNTER(counters[i], iter->counters.bcnt,
917 iter->counters.pcnt); 913 iter->counters.pcnt);
918 ++i; 914 ++i;
919 } 915 }
916 local_bh_enable();
917 /* Processing counters from other cpus, we can let bottom half enabled,
918 * (preemption is disabled)
919 */
920 920
921 for_each_possible_cpu(cpu) { 921 for_each_possible_cpu(cpu) {
922 if (cpu == curcpu) 922 if (cpu == curcpu)
@@ -930,7 +930,7 @@ get_counters(const struct xt_table_info *t,
930 } 930 }
931 xt_info_wrunlock(cpu); 931 xt_info_wrunlock(cpu);
932 } 932 }
933 local_bh_enable(); 933 put_cpu();
934} 934}
935 935
936static struct xt_counters *alloc_counters(const struct xt_table *table) 936static struct xt_counters *alloc_counters(const struct xt_table *table)
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 9254008602d4..098a050a20b0 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -269,6 +269,11 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
269 * in the chain of fragments so far. We must know where to put 269 * in the chain of fragments so far. We must know where to put
270 * this fragment, right? 270 * this fragment, right?
271 */ 271 */
272 prev = fq->q.fragments_tail;
273 if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) {
274 next = NULL;
275 goto found;
276 }
272 prev = NULL; 277 prev = NULL;
273 for (next = fq->q.fragments; next != NULL; next = next->next) { 278 for (next = fq->q.fragments; next != NULL; next = next->next) {
274 if (NFCT_FRAG6_CB(next)->offset >= offset) 279 if (NFCT_FRAG6_CB(next)->offset >= offset)
@@ -276,6 +281,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
276 prev = next; 281 prev = next;
277 } 282 }
278 283
284found:
279 /* We found where to put this one. Check for overlap with 285 /* We found where to put this one. Check for overlap with
280 * preceding fragment, and, if needed, align things so that 286 * preceding fragment, and, if needed, align things so that
281 * any overlaps are eliminated. 287 * any overlaps are eliminated.
@@ -341,6 +347,8 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
341 347
342 /* Insert this fragment in the chain of fragments. */ 348 /* Insert this fragment in the chain of fragments. */
343 skb->next = next; 349 skb->next = next;
350 if (!next)
351 fq->q.fragments_tail = skb;
344 if (prev) 352 if (prev)
345 prev->next = skb; 353 prev->next = skb;
346 else 354 else
@@ -464,6 +472,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
464 head->csum); 472 head->csum);
465 473
466 fq->q.fragments = NULL; 474 fq->q.fragments = NULL;
475 fq->q.fragments_tail = NULL;
467 476
468 /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ 477 /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
469 fp = skb_shinfo(head)->frag_list; 478 fp = skb_shinfo(head)->frag_list;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index aa2f106347e4..43288259f4a1 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -326,6 +326,22 @@ config NETFILTER_XT_CONNMARK
326 326
327comment "Xtables targets" 327comment "Xtables targets"
328 328
329config NETFILTER_XT_TARGET_CHECKSUM
330 tristate "CHECKSUM target support"
331 depends on IP_NF_MANGLE || IP6_NF_MANGLE
332 depends on NETFILTER_ADVANCED
333 ---help---
334 This option adds a `CHECKSUM' target, which can be used in the iptables mangle
335 table.
336
337 You can use this target to compute and fill in the checksum in
338 a packet that lacks a checksum. This is particularly useful,
339 if you need to work around old applications such as dhcp clients,
340 that do not work well with checksum offloads, but don't want to disable
341 checksum offload in your device.
342
343 To compile it as a module, choose M here. If unsure, say N.
344
329config NETFILTER_XT_TARGET_CLASSIFY 345config NETFILTER_XT_TARGET_CLASSIFY
330 tristate '"CLASSIFY" target support' 346 tristate '"CLASSIFY" target support'
331 depends on NETFILTER_ADVANCED 347 depends on NETFILTER_ADVANCED
@@ -647,6 +663,15 @@ config NETFILTER_XT_MATCH_CONNTRACK
647 663
648 To compile it as a module, choose M here. If unsure, say N. 664 To compile it as a module, choose M here. If unsure, say N.
649 665
666config NETFILTER_XT_MATCH_CPU
667 tristate '"cpu" match support'
668 depends on NETFILTER_ADVANCED
669 help
670 CPU matching allows you to match packets based on the CPU
671 currently handling the packet.
672
673 To compile it as a module, choose M here. If unsure, say N.
674
650config NETFILTER_XT_MATCH_DCCP 675config NETFILTER_XT_MATCH_DCCP
651 tristate '"dccp" protocol match support' 676 tristate '"dccp" protocol match support'
652 depends on NETFILTER_ADVANCED 677 depends on NETFILTER_ADVANCED
@@ -726,6 +751,16 @@ config NETFILTER_XT_MATCH_IPRANGE
726 751
727 If unsure, say M. 752 If unsure, say M.
728 753
754config NETFILTER_XT_MATCH_IPVS
755 tristate '"ipvs" match support'
756 depends on IP_VS
757 depends on NETFILTER_ADVANCED
758 depends on NF_CONNTRACK
759 help
760 This option allows you to match against IPVS properties of a packet.
761
762 If unsure, say N.
763
729config NETFILTER_XT_MATCH_LENGTH 764config NETFILTER_XT_MATCH_LENGTH
730 tristate '"length" match support' 765 tristate '"length" match support'
731 depends on NETFILTER_ADVANCED 766 depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index e28420aac5ef..441050f31111 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
45obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o 45obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
46 46
47# targets 47# targets
48obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
48obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o 49obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
49obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o 50obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
50obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o 51obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
@@ -69,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
69obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o 70obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
70obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o 71obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
71obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o 72obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
73obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o
72obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o 74obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
73obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o 75obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
74obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o 76obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
@@ -76,6 +78,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
76obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o 78obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
77obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o 79obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o
78obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o 80obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o
81obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o
79obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o 82obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
80obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o 83obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
81obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o 84obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 712ccad13344..46a77d5c3887 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -3,7 +3,7 @@
3# 3#
4menuconfig IP_VS 4menuconfig IP_VS
5 tristate "IP virtual server support" 5 tristate "IP virtual server support"
6 depends on NET && INET && NETFILTER 6 depends on NET && INET && NETFILTER && NF_CONNTRACK
7 ---help--- 7 ---help---
8 IP Virtual Server support will let you build a high-performance 8 IP Virtual Server support will let you build a high-performance
9 virtual server based on cluster of two or more real servers. This 9 virtual server based on cluster of two or more real servers. This
@@ -26,7 +26,7 @@ if IP_VS
26 26
27config IP_VS_IPV6 27config IP_VS_IPV6
28 bool "IPv6 support for IPVS" 28 bool "IPv6 support for IPVS"
29 depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6) 29 depends on IPV6 = y || IP_VS = IPV6
30 ---help--- 30 ---help---
31 Add IPv6 support to IPVS. This is incomplete and might be dangerous. 31 Add IPv6 support to IPVS. This is incomplete and might be dangerous.
32 32
@@ -87,19 +87,16 @@ config IP_VS_PROTO_UDP
87 protocol. Say Y if unsure. 87 protocol. Say Y if unsure.
88 88
89config IP_VS_PROTO_AH_ESP 89config IP_VS_PROTO_AH_ESP
90 bool 90 def_bool IP_VS_PROTO_ESP || IP_VS_PROTO_AH
91 depends on UNDEFINED
92 91
93config IP_VS_PROTO_ESP 92config IP_VS_PROTO_ESP
94 bool "ESP load balancing support" 93 bool "ESP load balancing support"
95 select IP_VS_PROTO_AH_ESP
96 ---help--- 94 ---help---
97 This option enables support for load balancing ESP (Encapsulation 95 This option enables support for load balancing ESP (Encapsulation
98 Security Payload) transport protocol. Say Y if unsure. 96 Security Payload) transport protocol. Say Y if unsure.
99 97
100config IP_VS_PROTO_AH 98config IP_VS_PROTO_AH
101 bool "AH load balancing support" 99 bool "AH load balancing support"
102 select IP_VS_PROTO_AH_ESP
103 ---help--- 100 ---help---
104 This option enables support for load balancing AH (Authentication 101 This option enables support for load balancing AH (Authentication
105 Header) transport protocol. Say Y if unsure. 102 Header) transport protocol. Say Y if unsure.
@@ -238,7 +235,7 @@ comment 'IPVS application helper'
238 235
239config IP_VS_FTP 236config IP_VS_FTP
240 tristate "FTP protocol helper" 237 tristate "FTP protocol helper"
241 depends on IP_VS_PROTO_TCP 238 depends on IP_VS_PROTO_TCP && NF_NAT
242 ---help--- 239 ---help---
243 FTP is a protocol that transfers IP address and/or port number in 240 FTP is a protocol that transfers IP address and/or port number in
244 the payload. In the virtual server via Network Address Translation, 241 the payload. In the virtual server via Network Address Translation,
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 1cb0e834f8ff..e76f87f4aca8 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -569,49 +569,6 @@ static const struct file_operations ip_vs_app_fops = {
569}; 569};
570#endif 570#endif
571 571
572
573/*
574 * Replace a segment of data with a new segment
575 */
576int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
577 char *o_buf, int o_len, char *n_buf, int n_len)
578{
579 int diff;
580 int o_offset;
581 int o_left;
582
583 EnterFunction(9);
584
585 diff = n_len - o_len;
586 o_offset = o_buf - (char *)skb->data;
587 /* The length of left data after o_buf+o_len in the skb data */
588 o_left = skb->len - (o_offset + o_len);
589
590 if (diff <= 0) {
591 memmove(o_buf + n_len, o_buf + o_len, o_left);
592 memcpy(o_buf, n_buf, n_len);
593 skb_trim(skb, skb->len + diff);
594 } else if (diff <= skb_tailroom(skb)) {
595 skb_put(skb, diff);
596 memmove(o_buf + n_len, o_buf + o_len, o_left);
597 memcpy(o_buf, n_buf, n_len);
598 } else {
599 if (pskb_expand_head(skb, skb_headroom(skb), diff, pri))
600 return -ENOMEM;
601 skb_put(skb, diff);
602 memmove(skb->data + o_offset + n_len,
603 skb->data + o_offset + o_len, o_left);
604 skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
605 }
606
607 /* must update the iph total length here */
608 ip_hdr(skb)->tot_len = htons(skb->len);
609
610 LeaveFunction(9);
611 return 0;
612}
613
614
615int __init ip_vs_app_init(void) 572int __init ip_vs_app_init(void)
616{ 573{
617 /* we will replace it with proc_net_ipvs_create() soon */ 574 /* we will replace it with proc_net_ipvs_create() soon */
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 654544e72264..b71c69a2db13 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -271,6 +271,29 @@ struct ip_vs_conn *ip_vs_conn_in_get
271 return cp; 271 return cp;
272} 272}
273 273
274struct ip_vs_conn *
275ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
276 struct ip_vs_protocol *pp,
277 const struct ip_vs_iphdr *iph,
278 unsigned int proto_off, int inverse)
279{
280 __be16 _ports[2], *pptr;
281
282 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
283 if (pptr == NULL)
284 return NULL;
285
286 if (likely(!inverse))
287 return ip_vs_conn_in_get(af, iph->protocol,
288 &iph->saddr, pptr[0],
289 &iph->daddr, pptr[1]);
290 else
291 return ip_vs_conn_in_get(af, iph->protocol,
292 &iph->daddr, pptr[1],
293 &iph->saddr, pptr[0]);
294}
295EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto);
296
274/* Get reference to connection template */ 297/* Get reference to connection template */
275struct ip_vs_conn *ip_vs_ct_in_get 298struct ip_vs_conn *ip_vs_ct_in_get
276(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, 299(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
@@ -356,6 +379,28 @@ struct ip_vs_conn *ip_vs_conn_out_get
356 return ret; 379 return ret;
357} 380}
358 381
382struct ip_vs_conn *
383ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
384 struct ip_vs_protocol *pp,
385 const struct ip_vs_iphdr *iph,
386 unsigned int proto_off, int inverse)
387{
388 __be16 _ports[2], *pptr;
389
390 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
391 if (pptr == NULL)
392 return NULL;
393
394 if (likely(!inverse))
395 return ip_vs_conn_out_get(af, iph->protocol,
396 &iph->saddr, pptr[0],
397 &iph->daddr, pptr[1]);
398 else
399 return ip_vs_conn_out_get(af, iph->protocol,
400 &iph->daddr, pptr[1],
401 &iph->saddr, pptr[0]);
402}
403EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
359 404
360/* 405/*
361 * Put back the conn and restart its timer with its timeout 406 * Put back the conn and restart its timer with its timeout
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 50907d8472a3..4f8ddba48011 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -54,7 +54,6 @@
54 54
55EXPORT_SYMBOL(register_ip_vs_scheduler); 55EXPORT_SYMBOL(register_ip_vs_scheduler);
56EXPORT_SYMBOL(unregister_ip_vs_scheduler); 56EXPORT_SYMBOL(unregister_ip_vs_scheduler);
57EXPORT_SYMBOL(ip_vs_skb_replace);
58EXPORT_SYMBOL(ip_vs_proto_name); 57EXPORT_SYMBOL(ip_vs_proto_name);
59EXPORT_SYMBOL(ip_vs_conn_new); 58EXPORT_SYMBOL(ip_vs_conn_new);
60EXPORT_SYMBOL(ip_vs_conn_in_get); 59EXPORT_SYMBOL(ip_vs_conn_in_get);
@@ -536,26 +535,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
536 return NF_DROP; 535 return NF_DROP;
537} 536}
538 537
539
540/*
541 * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
542 * chain, and is used for VS/NAT.
543 * It detects packets for VS/NAT connections and sends the packets
544 * immediately. This can avoid that iptable_nat mangles the packets
545 * for VS/NAT.
546 */
547static unsigned int ip_vs_post_routing(unsigned int hooknum,
548 struct sk_buff *skb,
549 const struct net_device *in,
550 const struct net_device *out,
551 int (*okfn)(struct sk_buff *))
552{
553 if (!skb->ipvs_property)
554 return NF_ACCEPT;
555 /* The packet was sent from IPVS, exit this chain */
556 return NF_STOP;
557}
558
559__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) 538__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
560{ 539{
561 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); 540 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
@@ -1499,14 +1478,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1499 .hooknum = NF_INET_FORWARD, 1478 .hooknum = NF_INET_FORWARD,
1500 .priority = 99, 1479 .priority = 99,
1501 }, 1480 },
1502 /* Before the netfilter connection tracking, exit from POST_ROUTING */
1503 {
1504 .hook = ip_vs_post_routing,
1505 .owner = THIS_MODULE,
1506 .pf = PF_INET,
1507 .hooknum = NF_INET_POST_ROUTING,
1508 .priority = NF_IP_PRI_NAT_SRC-1,
1509 },
1510#ifdef CONFIG_IP_VS_IPV6 1481#ifdef CONFIG_IP_VS_IPV6
1511 /* After packet filtering, forward packet through VS/DR, VS/TUN, 1482 /* After packet filtering, forward packet through VS/DR, VS/TUN,
1512 * or VS/NAT(change destination), so that filtering rules can be 1483 * or VS/NAT(change destination), so that filtering rules can be
@@ -1535,14 +1506,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1535 .hooknum = NF_INET_FORWARD, 1506 .hooknum = NF_INET_FORWARD,
1536 .priority = 99, 1507 .priority = 99,
1537 }, 1508 },
1538 /* Before the netfilter connection tracking, exit from POST_ROUTING */
1539 {
1540 .hook = ip_vs_post_routing,
1541 .owner = THIS_MODULE,
1542 .pf = PF_INET6,
1543 .hooknum = NF_INET_POST_ROUTING,
1544 .priority = NF_IP6_PRI_NAT_SRC-1,
1545 },
1546#endif 1509#endif
1547}; 1510};
1548 1511
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 2ae747a376a5..f228a17ec649 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -20,6 +20,17 @@
20 * 20 *
21 * Author: Wouter Gadeyne 21 * Author: Wouter Gadeyne
22 * 22 *
23 *
24 * Code for ip_vs_expect_related and ip_vs_expect_callback is taken from
25 * http://www.ssi.bg/~ja/nfct/:
26 *
27 * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
28 *
29 * Portions Copyright (C) 2001-2002
30 * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
31 *
32 * Portions Copyright (C) 2003-2008
33 * Julian Anastasov
23 */ 34 */
24 35
25#define KMSG_COMPONENT "IPVS" 36#define KMSG_COMPONENT "IPVS"
@@ -32,6 +43,9 @@
32#include <linux/in.h> 43#include <linux/in.h>
33#include <linux/ip.h> 44#include <linux/ip.h>
34#include <linux/netfilter.h> 45#include <linux/netfilter.h>
46#include <net/netfilter/nf_conntrack.h>
47#include <net/netfilter/nf_conntrack_expect.h>
48#include <net/netfilter/nf_nat_helper.h>
35#include <linux/gfp.h> 49#include <linux/gfp.h>
36#include <net/protocol.h> 50#include <net/protocol.h>
37#include <net/tcp.h> 51#include <net/tcp.h>
@@ -43,6 +57,16 @@
43#define SERVER_STRING "227 Entering Passive Mode (" 57#define SERVER_STRING "227 Entering Passive Mode ("
44#define CLIENT_STRING "PORT " 58#define CLIENT_STRING "PORT "
45 59
60#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u"
61#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \
62 &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
63 (T)->dst.protonum
64
65#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
66#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \
67 &((C)->vaddr.ip), ntohs((C)->vport), \
68 &((C)->daddr.ip), ntohs((C)->dport), \
69 (C)->protocol, (C)->state
46 70
47/* 71/*
48 * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper 72 * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
@@ -123,6 +147,119 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
123 return 1; 147 return 1;
124} 148}
125 149
150/*
151 * Called from init_conntrack() as expectfn handler.
152 */
153static void
154ip_vs_expect_callback(struct nf_conn *ct,
155 struct nf_conntrack_expect *exp)
156{
157 struct nf_conntrack_tuple *orig, new_reply;
158 struct ip_vs_conn *cp;
159
160 if (exp->tuple.src.l3num != PF_INET)
161 return;
162
163 /*
164 * We assume that no NF locks are held before this callback.
165 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
166 * expectations even if they use wildcard values, now we provide the
167 * actual values from the newly created original conntrack direction.
168 * The conntrack is confirmed when packet reaches IPVS hooks.
169 */
170
171 /* RS->CLIENT */
172 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
173 cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum,
174 &orig->src.u3, orig->src.u.tcp.port,
175 &orig->dst.u3, orig->dst.u.tcp.port);
176 if (cp) {
177 /* Change reply CLIENT->RS to CLIENT->VS */
178 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
179 IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
180 FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
181 __func__, ct, ct->status,
182 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
183 ARG_CONN(cp));
184 new_reply.dst.u3 = cp->vaddr;
185 new_reply.dst.u.tcp.port = cp->vport;
186 IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
187 ", inout cp=" FMT_CONN "\n",
188 __func__, ct,
189 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
190 ARG_CONN(cp));
191 goto alter;
192 }
193
194 /* CLIENT->VS */
195 cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum,
196 &orig->src.u3, orig->src.u.tcp.port,
197 &orig->dst.u3, orig->dst.u.tcp.port);
198 if (cp) {
199 /* Change reply VS->CLIENT to RS->CLIENT */
200 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
201 IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
202 FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
203 __func__, ct, ct->status,
204 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
205 ARG_CONN(cp));
206 new_reply.src.u3 = cp->daddr;
207 new_reply.src.u.tcp.port = cp->dport;
208 IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", "
209 FMT_TUPLE ", outin cp=" FMT_CONN "\n",
210 __func__, ct,
211 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
212 ARG_CONN(cp));
213 goto alter;
214 }
215
216 IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE
217 " - unknown expect\n",
218 __func__, ct, ct->status, ARG_TUPLE(orig));
219 return;
220
221alter:
222 /* Never alter conntrack for non-NAT conns */
223 if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
224 nf_conntrack_alter_reply(ct, &new_reply);
225 ip_vs_conn_put(cp);
226 return;
227}
228
229/*
230 * Create NF conntrack expectation with wildcard (optional) source port.
231 * Then the default callback function will alter the reply and will confirm
232 * the conntrack entry when the first packet comes.
233 */
234static void
235ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct,
236 struct ip_vs_conn *cp, u_int8_t proto,
237 const __be16 *port, int from_rs)
238{
239 struct nf_conntrack_expect *exp;
240
241 BUG_ON(!ct || ct == &nf_conntrack_untracked);
242
243 exp = nf_ct_expect_alloc(ct);
244 if (!exp)
245 return;
246
247 if (from_rs)
248 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
249 nf_ct_l3num(ct), &cp->daddr, &cp->caddr,
250 proto, port, &cp->cport);
251 else
252 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
253 nf_ct_l3num(ct), &cp->caddr, &cp->vaddr,
254 proto, port, &cp->vport);
255
256 exp->expectfn = ip_vs_expect_callback;
257
258 IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n",
259 __func__, ct, ARG_TUPLE(&exp->tuple));
260 nf_ct_expect_related(exp);
261 nf_ct_expect_put(exp);
262}
126 263
127/* 264/*
128 * Look at outgoing ftp packets to catch the response to a PASV command 265 * Look at outgoing ftp packets to catch the response to a PASV command
@@ -149,7 +286,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
149 struct ip_vs_conn *n_cp; 286 struct ip_vs_conn *n_cp;
150 char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ 287 char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */
151 unsigned buf_len; 288 unsigned buf_len;
152 int ret; 289 int ret = 0;
290 enum ip_conntrack_info ctinfo;
291 struct nf_conn *ct;
153 292
154#ifdef CONFIG_IP_VS_IPV6 293#ifdef CONFIG_IP_VS_IPV6
155 /* This application helper doesn't work with IPv6 yet, 294 /* This application helper doesn't work with IPv6 yet,
@@ -219,19 +358,26 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
219 358
220 buf_len = strlen(buf); 359 buf_len = strlen(buf);
221 360
361 ct = nf_ct_get(skb, &ctinfo);
362 if (ct && !nf_ct_is_untracked(ct)) {
363 /* If mangling fails this function will return 0
364 * which will cause the packet to be dropped.
365 * Mangling can only fail under memory pressure,
366 * hopefully it will succeed on the retransmitted
367 * packet.
368 */
369 ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
370 start-data, end-start,
371 buf, buf_len);
372 if (ret)
373 ip_vs_expect_related(skb, ct, n_cp,
374 IPPROTO_TCP, NULL, 0);
375 }
376
222 /* 377 /*
223 * Calculate required delta-offset to keep TCP happy 378 * Not setting 'diff' is intentional, otherwise the sequence
379 * would be adjusted twice.
224 */ 380 */
225 *diff = buf_len - (end-start);
226
227 if (*diff == 0) {
228 /* simply replace it with new passive address */
229 memcpy(start, buf, buf_len);
230 ret = 1;
231 } else {
232 ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start,
233 end-start, buf, buf_len);
234 }
235 381
236 cp->app_data = NULL; 382 cp->app_data = NULL;
237 ip_vs_tcp_conn_listen(n_cp); 383 ip_vs_tcp_conn_listen(n_cp);
@@ -263,6 +409,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
263 union nf_inet_addr to; 409 union nf_inet_addr to;
264 __be16 port; 410 __be16 port;
265 struct ip_vs_conn *n_cp; 411 struct ip_vs_conn *n_cp;
412 struct nf_conn *ct;
266 413
267#ifdef CONFIG_IP_VS_IPV6 414#ifdef CONFIG_IP_VS_IPV6
268 /* This application helper doesn't work with IPv6 yet, 415 /* This application helper doesn't work with IPv6 yet,
@@ -349,6 +496,11 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
349 ip_vs_control_add(n_cp, cp); 496 ip_vs_control_add(n_cp, cp);
350 } 497 }
351 498
499 ct = (struct nf_conn *)skb->nfct;
500 if (ct && ct != &nf_conntrack_untracked)
501 ip_vs_expect_related(skb, ct, n_cp,
502 IPPROTO_TCP, &n_cp->dport, 1);
503
352 /* 504 /*
353 * Move tunnel to listen state 505 * Move tunnel to listen state
354 */ 506 */
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 2d3d5e4b35f8..027f654799fe 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -98,6 +98,7 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
98 98
99 return NULL; 99 return NULL;
100} 100}
101EXPORT_SYMBOL(ip_vs_proto_get);
101 102
102 103
103/* 104/*
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index c9a3f7a21d53..4c0855cb006e 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -8,55 +8,6 @@
8#include <net/sctp/checksum.h> 8#include <net/sctp/checksum.h>
9#include <net/ip_vs.h> 9#include <net/ip_vs.h>
10 10
11
12static struct ip_vs_conn *
13sctp_conn_in_get(int af,
14 const struct sk_buff *skb,
15 struct ip_vs_protocol *pp,
16 const struct ip_vs_iphdr *iph,
17 unsigned int proto_off,
18 int inverse)
19{
20 __be16 _ports[2], *pptr;
21
22 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
23 if (pptr == NULL)
24 return NULL;
25
26 if (likely(!inverse))
27 return ip_vs_conn_in_get(af, iph->protocol,
28 &iph->saddr, pptr[0],
29 &iph->daddr, pptr[1]);
30 else
31 return ip_vs_conn_in_get(af, iph->protocol,
32 &iph->daddr, pptr[1],
33 &iph->saddr, pptr[0]);
34}
35
36static struct ip_vs_conn *
37sctp_conn_out_get(int af,
38 const struct sk_buff *skb,
39 struct ip_vs_protocol *pp,
40 const struct ip_vs_iphdr *iph,
41 unsigned int proto_off,
42 int inverse)
43{
44 __be16 _ports[2], *pptr;
45
46 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
47 if (pptr == NULL)
48 return NULL;
49
50 if (likely(!inverse))
51 return ip_vs_conn_out_get(af, iph->protocol,
52 &iph->saddr, pptr[0],
53 &iph->daddr, pptr[1]);
54 else
55 return ip_vs_conn_out_get(af, iph->protocol,
56 &iph->daddr, pptr[1],
57 &iph->saddr, pptr[0]);
58}
59
60static int 11static int
61sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 12sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
62 int *verdict, struct ip_vs_conn **cpp) 13 int *verdict, struct ip_vs_conn **cpp)
@@ -173,7 +124,7 @@ sctp_dnat_handler(struct sk_buff *skb,
173 return 0; 124 return 0;
174 125
175 /* Call application helper if needed */ 126 /* Call application helper if needed */
176 if (!ip_vs_app_pkt_out(cp, skb)) 127 if (!ip_vs_app_pkt_in(cp, skb))
177 return 0; 128 return 0;
178 } 129 }
179 130
@@ -1169,8 +1120,8 @@ struct ip_vs_protocol ip_vs_protocol_sctp = {
1169 .register_app = sctp_register_app, 1120 .register_app = sctp_register_app,
1170 .unregister_app = sctp_unregister_app, 1121 .unregister_app = sctp_unregister_app,
1171 .conn_schedule = sctp_conn_schedule, 1122 .conn_schedule = sctp_conn_schedule,
1172 .conn_in_get = sctp_conn_in_get, 1123 .conn_in_get = ip_vs_conn_in_get_proto,
1173 .conn_out_get = sctp_conn_out_get, 1124 .conn_out_get = ip_vs_conn_out_get_proto,
1174 .snat_handler = sctp_snat_handler, 1125 .snat_handler = sctp_snat_handler,
1175 .dnat_handler = sctp_dnat_handler, 1126 .dnat_handler = sctp_dnat_handler,
1176 .csum_check = sctp_csum_check, 1127 .csum_check = sctp_csum_check,
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 91d28e073742..282d24de8592 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -27,52 +27,6 @@
27 27
28#include <net/ip_vs.h> 28#include <net/ip_vs.h>
29 29
30
31static struct ip_vs_conn *
32tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
33 const struct ip_vs_iphdr *iph, unsigned int proto_off,
34 int inverse)
35{
36 __be16 _ports[2], *pptr;
37
38 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
39 if (pptr == NULL)
40 return NULL;
41
42 if (likely(!inverse)) {
43 return ip_vs_conn_in_get(af, iph->protocol,
44 &iph->saddr, pptr[0],
45 &iph->daddr, pptr[1]);
46 } else {
47 return ip_vs_conn_in_get(af, iph->protocol,
48 &iph->daddr, pptr[1],
49 &iph->saddr, pptr[0]);
50 }
51}
52
53static struct ip_vs_conn *
54tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
55 const struct ip_vs_iphdr *iph, unsigned int proto_off,
56 int inverse)
57{
58 __be16 _ports[2], *pptr;
59
60 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
61 if (pptr == NULL)
62 return NULL;
63
64 if (likely(!inverse)) {
65 return ip_vs_conn_out_get(af, iph->protocol,
66 &iph->saddr, pptr[0],
67 &iph->daddr, pptr[1]);
68 } else {
69 return ip_vs_conn_out_get(af, iph->protocol,
70 &iph->daddr, pptr[1],
71 &iph->saddr, pptr[0]);
72 }
73}
74
75
76static int 30static int
77tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 31tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
78 int *verdict, struct ip_vs_conn **cpp) 32 int *verdict, struct ip_vs_conn **cpp)
@@ -721,8 +675,8 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
721 .register_app = tcp_register_app, 675 .register_app = tcp_register_app,
722 .unregister_app = tcp_unregister_app, 676 .unregister_app = tcp_unregister_app,
723 .conn_schedule = tcp_conn_schedule, 677 .conn_schedule = tcp_conn_schedule,
724 .conn_in_get = tcp_conn_in_get, 678 .conn_in_get = ip_vs_conn_in_get_proto,
725 .conn_out_get = tcp_conn_out_get, 679 .conn_out_get = ip_vs_conn_out_get_proto,
726 .snat_handler = tcp_snat_handler, 680 .snat_handler = tcp_snat_handler,
727 .dnat_handler = tcp_dnat_handler, 681 .dnat_handler = tcp_dnat_handler,
728 .csum_check = tcp_csum_check, 682 .csum_check = tcp_csum_check,
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index e7a6885e0167..8553231b5d41 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -27,58 +27,6 @@
27#include <net/ip.h> 27#include <net/ip.h>
28#include <net/ip6_checksum.h> 28#include <net/ip6_checksum.h>
29 29
30static struct ip_vs_conn *
31udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
32 const struct ip_vs_iphdr *iph, unsigned int proto_off,
33 int inverse)
34{
35 struct ip_vs_conn *cp;
36 __be16 _ports[2], *pptr;
37
38 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
39 if (pptr == NULL)
40 return NULL;
41
42 if (likely(!inverse)) {
43 cp = ip_vs_conn_in_get(af, iph->protocol,
44 &iph->saddr, pptr[0],
45 &iph->daddr, pptr[1]);
46 } else {
47 cp = ip_vs_conn_in_get(af, iph->protocol,
48 &iph->daddr, pptr[1],
49 &iph->saddr, pptr[0]);
50 }
51
52 return cp;
53}
54
55
56static struct ip_vs_conn *
57udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
58 const struct ip_vs_iphdr *iph, unsigned int proto_off,
59 int inverse)
60{
61 struct ip_vs_conn *cp;
62 __be16 _ports[2], *pptr;
63
64 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
65 if (pptr == NULL)
66 return NULL;
67
68 if (likely(!inverse)) {
69 cp = ip_vs_conn_out_get(af, iph->protocol,
70 &iph->saddr, pptr[0],
71 &iph->daddr, pptr[1]);
72 } else {
73 cp = ip_vs_conn_out_get(af, iph->protocol,
74 &iph->daddr, pptr[1],
75 &iph->saddr, pptr[0]);
76 }
77
78 return cp;
79}
80
81
82static int 30static int
83udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 31udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
84 int *verdict, struct ip_vs_conn **cpp) 32 int *verdict, struct ip_vs_conn **cpp)
@@ -520,8 +468,8 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
520 .init = udp_init, 468 .init = udp_init,
521 .exit = udp_exit, 469 .exit = udp_exit,
522 .conn_schedule = udp_conn_schedule, 470 .conn_schedule = udp_conn_schedule,
523 .conn_in_get = udp_conn_in_get, 471 .conn_in_get = ip_vs_conn_in_get_proto,
524 .conn_out_get = udp_conn_out_get, 472 .conn_out_get = ip_vs_conn_out_get_proto,
525 .snat_handler = udp_snat_handler, 473 .snat_handler = udp_snat_handler,
526 .dnat_handler = udp_dnat_handler, 474 .dnat_handler = udp_dnat_handler,
527 .csum_check = udp_csum_check, 475 .csum_check = udp_csum_check,
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 02b078e11cf3..21e1a5e9b9d3 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -28,6 +28,7 @@
28#include <net/ip6_route.h> 28#include <net/ip6_route.h>
29#include <linux/icmpv6.h> 29#include <linux/icmpv6.h>
30#include <linux/netfilter.h> 30#include <linux/netfilter.h>
31#include <net/netfilter/nf_conntrack.h>
31#include <linux/netfilter_ipv4.h> 32#include <linux/netfilter_ipv4.h>
32 33
33#include <net/ip_vs.h> 34#include <net/ip_vs.h>
@@ -348,6 +349,30 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
348} 349}
349#endif 350#endif
350 351
352static void
353ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
354{
355 struct nf_conn *ct = (struct nf_conn *)skb->nfct;
356 struct nf_conntrack_tuple new_tuple;
357
358 if (ct == NULL || nf_ct_is_untracked(ct) || nf_ct_is_confirmed(ct))
359 return;
360
361 /*
362 * The connection is not yet in the hashtable, so we update it.
363 * CIP->VIP will remain the same, so leave the tuple in
364 * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the
365 * real-server we will see RIP->DIP.
366 */
367 new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
368 new_tuple.src.u3 = cp->daddr;
369 /*
370 * This will also take care of UDP and other protocols.
371 */
372 new_tuple.src.u.tcp.port = cp->dport;
373 nf_conntrack_alter_reply(ct, &new_tuple);
374}
375
351/* 376/*
352 * NAT transmitter (only for outside-to-inside nat forwarding) 377 * NAT transmitter (only for outside-to-inside nat forwarding)
353 * Not used for related ICMP 378 * Not used for related ICMP
@@ -403,6 +428,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
403 428
404 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 429 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
405 430
431 ip_vs_update_conntrack(skb, cp);
432
406 /* FIXME: when application helper enlarges the packet and the length 433 /* FIXME: when application helper enlarges the packet and the length
407 is larger than the MTU of outgoing device, there will be still 434 is larger than the MTU of outgoing device, there will be still
408 MTU problem. */ 435 MTU problem. */
@@ -479,6 +506,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
479 506
480 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 507 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
481 508
509 ip_vs_update_conntrack(skb, cp);
510
482 /* FIXME: when application helper enlarges the packet and the length 511 /* FIXME: when application helper enlarges the packet and the length
483 is larger than the MTU of outgoing device, there will be still 512 is larger than the MTU of outgoing device, there will be still
484 MTU problem. */ 513 MTU problem. */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 16b41b4e2a3c..df3eedb142ff 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -966,8 +966,7 @@ acct:
966 if (acct) { 966 if (acct) {
967 spin_lock_bh(&ct->lock); 967 spin_lock_bh(&ct->lock);
968 acct[CTINFO2DIR(ctinfo)].packets++; 968 acct[CTINFO2DIR(ctinfo)].packets++;
969 acct[CTINFO2DIR(ctinfo)].bytes += 969 acct[CTINFO2DIR(ctinfo)].bytes += skb->len;
970 skb->len - skb_network_offset(skb);
971 spin_unlock_bh(&ct->lock); 970 spin_unlock_bh(&ct->lock);
972 } 971 }
973 } 972 }
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index fdc8fb4ae10f..7dcf7a404190 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -23,9 +23,10 @@ void __nf_ct_ext_destroy(struct nf_conn *ct)
23{ 23{
24 unsigned int i; 24 unsigned int i;
25 struct nf_ct_ext_type *t; 25 struct nf_ct_ext_type *t;
26 struct nf_ct_ext *ext = ct->ext;
26 27
27 for (i = 0; i < NF_CT_EXT_NUM; i++) { 28 for (i = 0; i < NF_CT_EXT_NUM; i++) {
28 if (!nf_ct_ext_exist(ct, i)) 29 if (!__nf_ct_ext_exist(ext, i))
29 continue; 30 continue;
30 31
31 rcu_read_lock(); 32 rcu_read_lock();
@@ -73,44 +74,45 @@ static void __nf_ct_ext_free_rcu(struct rcu_head *head)
73 74
74void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) 75void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
75{ 76{
76 struct nf_ct_ext *new; 77 struct nf_ct_ext *old, *new;
77 int i, newlen, newoff; 78 int i, newlen, newoff;
78 struct nf_ct_ext_type *t; 79 struct nf_ct_ext_type *t;
79 80
80 /* Conntrack must not be confirmed to avoid races on reallocation. */ 81 /* Conntrack must not be confirmed to avoid races on reallocation. */
81 NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); 82 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
82 83
83 if (!ct->ext) 84 old = ct->ext;
85 if (!old)
84 return nf_ct_ext_create(&ct->ext, id, gfp); 86 return nf_ct_ext_create(&ct->ext, id, gfp);
85 87
86 if (nf_ct_ext_exist(ct, id)) 88 if (__nf_ct_ext_exist(old, id))
87 return NULL; 89 return NULL;
88 90
89 rcu_read_lock(); 91 rcu_read_lock();
90 t = rcu_dereference(nf_ct_ext_types[id]); 92 t = rcu_dereference(nf_ct_ext_types[id]);
91 BUG_ON(t == NULL); 93 BUG_ON(t == NULL);
92 94
93 newoff = ALIGN(ct->ext->len, t->align); 95 newoff = ALIGN(old->len, t->align);
94 newlen = newoff + t->len; 96 newlen = newoff + t->len;
95 rcu_read_unlock(); 97 rcu_read_unlock();
96 98
97 new = __krealloc(ct->ext, newlen, gfp); 99 new = __krealloc(old, newlen, gfp);
98 if (!new) 100 if (!new)
99 return NULL; 101 return NULL;
100 102
101 if (new != ct->ext) { 103 if (new != old) {
102 for (i = 0; i < NF_CT_EXT_NUM; i++) { 104 for (i = 0; i < NF_CT_EXT_NUM; i++) {
103 if (!nf_ct_ext_exist(ct, i)) 105 if (!__nf_ct_ext_exist(old, i))
104 continue; 106 continue;
105 107
106 rcu_read_lock(); 108 rcu_read_lock();
107 t = rcu_dereference(nf_ct_ext_types[i]); 109 t = rcu_dereference(nf_ct_ext_types[i]);
108 if (t && t->move) 110 if (t && t->move)
109 t->move((void *)new + new->offset[i], 111 t->move((void *)new + new->offset[i],
110 (void *)ct->ext + ct->ext->offset[i]); 112 (void *)old + old->offset[i]);
111 rcu_read_unlock(); 113 rcu_read_unlock();
112 } 114 }
113 call_rcu(&ct->ext->rcu, __nf_ct_ext_free_rcu); 115 call_rcu(&old->rcu, __nf_ct_ext_free_rcu);
114 ct->ext = new; 116 ct->ext = new;
115 } 117 }
116 118
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 802dbffae8b4..c4c885dca3bd 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -585,8 +585,16 @@ static bool tcp_in_window(const struct nf_conn *ct,
585 * Let's try to use the data from the packet. 585 * Let's try to use the data from the packet.
586 */ 586 */
587 sender->td_end = end; 587 sender->td_end = end;
588 win <<= sender->td_scale;
588 sender->td_maxwin = (win == 0 ? 1 : win); 589 sender->td_maxwin = (win == 0 ? 1 : win);
589 sender->td_maxend = end + sender->td_maxwin; 590 sender->td_maxend = end + sender->td_maxwin;
591 /*
592 * We haven't seen traffic in the other direction yet
593 * but we have to tweak window tracking to pass III
594 * and IV until that happens.
595 */
596 if (receiver->td_maxwin == 0)
597 receiver->td_end = receiver->td_maxend = sack;
590 } 598 }
591 } else if (((state->state == TCP_CONNTRACK_SYN_SENT 599 } else if (((state->state == TCP_CONNTRACK_SYN_SENT
592 && dir == IP_CT_DIR_ORIGINAL) 600 && dir == IP_CT_DIR_ORIGINAL)
@@ -680,7 +688,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
680 /* 688 /*
681 * Update receiver data. 689 * Update receiver data.
682 */ 690 */
683 if (after(end, sender->td_maxend)) 691 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
684 receiver->td_maxwin += end - sender->td_maxend; 692 receiver->td_maxwin += end - sender->td_maxend;
685 if (after(sack + win, receiver->td_maxend - 1)) { 693 if (after(sack + win, receiver->td_maxend - 1)) {
686 receiver->td_maxend = sack + win; 694 receiver->td_maxend = sack + win;
diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c
new file mode 100644
index 000000000000..0f642ef8cd26
--- /dev/null
+++ b/net/netfilter/xt_CHECKSUM.c
@@ -0,0 +1,70 @@
1/* iptables module for the packet checksum mangling
2 *
3 * (C) 2002 by Harald Welte <laforge@netfilter.org>
4 * (C) 2010 Red Hat, Inc.
5 *
6 * Author: Michael S. Tsirkin <mst@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11*/
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13#include <linux/module.h>
14#include <linux/skbuff.h>
15
16#include <linux/netfilter/x_tables.h>
17#include <linux/netfilter/xt_CHECKSUM.h>
18
19MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Michael S. Tsirkin <mst@redhat.com>");
21MODULE_DESCRIPTION("Xtables: checksum modification");
22MODULE_ALIAS("ipt_CHECKSUM");
23MODULE_ALIAS("ip6t_CHECKSUM");
24
25static unsigned int
26checksum_tg(struct sk_buff *skb, const struct xt_action_param *par)
27{
28 if (skb->ip_summed == CHECKSUM_PARTIAL)
29 skb_checksum_help(skb);
30
31 return XT_CONTINUE;
32}
33
34static int checksum_tg_check(const struct xt_tgchk_param *par)
35{
36 const struct xt_CHECKSUM_info *einfo = par->targinfo;
37
38 if (einfo->operation & ~XT_CHECKSUM_OP_FILL) {
39 pr_info("unsupported CHECKSUM operation %x\n", einfo->operation);
40 return -EINVAL;
41 }
42 if (!einfo->operation) {
43 pr_info("no CHECKSUM operation enabled\n");
44 return -EINVAL;
45 }
46 return 0;
47}
48
49static struct xt_target checksum_tg_reg __read_mostly = {
50 .name = "CHECKSUM",
51 .family = NFPROTO_UNSPEC,
52 .target = checksum_tg,
53 .targetsize = sizeof(struct xt_CHECKSUM_info),
54 .table = "mangle",
55 .checkentry = checksum_tg_check,
56 .me = THIS_MODULE,
57};
58
59static int __init checksum_tg_init(void)
60{
61 return xt_register_target(&checksum_tg_reg);
62}
63
64static void __exit checksum_tg_exit(void)
65{
66 xt_unregister_target(&checksum_tg_reg);
67}
68
69module_init(checksum_tg_init);
70module_exit(checksum_tg_exit);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index e1a0dedac258..c61294d85fda 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -37,8 +37,10 @@ tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
37 return NF_DROP; 37 return NF_DROP;
38 38
39 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, 39 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
40 iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr, 40 iph->saddr,
41 hp->source, tgi->lport ? tgi->lport : hp->dest, 41 tgi->laddr ? tgi->laddr : iph->daddr,
42 hp->source,
43 tgi->lport ? tgi->lport : hp->dest,
42 par->in, true); 44 par->in, true);
43 45
44 /* NOTE: assign_sock consumes our sk reference */ 46 /* NOTE: assign_sock consumes our sk reference */
diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c
new file mode 100644
index 000000000000..b39db8a5cbae
--- /dev/null
+++ b/net/netfilter/xt_cpu.c
@@ -0,0 +1,63 @@
1/* Kernel module to match running CPU */
2
3/*
4 * Might be used to distribute connections on several daemons, if
5 * RPS (Remote Packet Steering) is enabled or NIC is multiqueue capable,
6 * each RX queue IRQ affined to one CPU (1:1 mapping)
7 *
8 */
9
10/* (C) 2010 Eric Dumazet
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as
14 * published by the Free Software Foundation.
15 */
16
17#include <linux/module.h>
18#include <linux/skbuff.h>
19#include <linux/netfilter/xt_cpu.h>
20#include <linux/netfilter/x_tables.h>
21
22MODULE_LICENSE("GPL");
23MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>");
24MODULE_DESCRIPTION("Xtables: CPU match");
25
26static int cpu_mt_check(const struct xt_mtchk_param *par)
27{
28 const struct xt_cpu_info *info = par->matchinfo;
29
30 if (info->invert & ~1)
31 return -EINVAL;
32 return 0;
33}
34
35static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par)
36{
37 const struct xt_cpu_info *info = par->matchinfo;
38
39 return (info->cpu == smp_processor_id()) ^ info->invert;
40}
41
42static struct xt_match cpu_mt_reg __read_mostly = {
43 .name = "cpu",
44 .revision = 0,
45 .family = NFPROTO_UNSPEC,
46 .checkentry = cpu_mt_check,
47 .match = cpu_mt,
48 .matchsize = sizeof(struct xt_cpu_info),
49 .me = THIS_MODULE,
50};
51
52static int __init cpu_mt_init(void)
53{
54 return xt_register_match(&cpu_mt_reg);
55}
56
57static void __exit cpu_mt_exit(void)
58{
59 xt_unregister_match(&cpu_mt_reg);
60}
61
62module_init(cpu_mt_init);
63module_exit(cpu_mt_exit);
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
new file mode 100644
index 000000000000..7a4d66db95ae
--- /dev/null
+++ b/net/netfilter/xt_ipvs.c
@@ -0,0 +1,189 @@
1/*
2 * xt_ipvs - kernel module to match IPVS connection properties
3 *
4 * Author: Hannes Eder <heder@google.com>
5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8
9#include <linux/module.h>
10#include <linux/moduleparam.h>
11#include <linux/spinlock.h>
12#include <linux/skbuff.h>
13#ifdef CONFIG_IP_VS_IPV6
14#include <net/ipv6.h>
15#endif
16#include <linux/ip_vs.h>
17#include <linux/types.h>
18#include <linux/netfilter/x_tables.h>
19#include <linux/netfilter/x_tables.h>
20#include <linux/netfilter/xt_ipvs.h>
21#include <net/netfilter/nf_conntrack.h>
22
23#include <net/ip_vs.h>
24
25MODULE_AUTHOR("Hannes Eder <heder@google.com>");
26MODULE_DESCRIPTION("Xtables: match IPVS connection properties");
27MODULE_LICENSE("GPL");
28MODULE_ALIAS("ipt_ipvs");
29MODULE_ALIAS("ip6t_ipvs");
30
31/* borrowed from xt_conntrack */
32static bool ipvs_mt_addrcmp(const union nf_inet_addr *kaddr,
33 const union nf_inet_addr *uaddr,
34 const union nf_inet_addr *umask,
35 unsigned int l3proto)
36{
37 if (l3proto == NFPROTO_IPV4)
38 return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0;
39#ifdef CONFIG_IP_VS_IPV6
40 else if (l3proto == NFPROTO_IPV6)
41 return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6,
42 &uaddr->in6) == 0;
43#endif
44 else
45 return false;
46}
47
48static bool
49ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
50{
51 const struct xt_ipvs_mtinfo *data = par->matchinfo;
52 /* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */
53 const u_int8_t family = par->family;
54 struct ip_vs_iphdr iph;
55 struct ip_vs_protocol *pp;
56 struct ip_vs_conn *cp;
57 bool match = true;
58
59 if (data->bitmask == XT_IPVS_IPVS_PROPERTY) {
60 match = skb->ipvs_property ^
61 !!(data->invert & XT_IPVS_IPVS_PROPERTY);
62 goto out;
63 }
64
65 /* other flags than XT_IPVS_IPVS_PROPERTY are set */
66 if (!skb->ipvs_property) {
67 match = false;
68 goto out;
69 }
70
71 ip_vs_fill_iphdr(family, skb_network_header(skb), &iph);
72
73 if (data->bitmask & XT_IPVS_PROTO)
74 if ((iph.protocol == data->l4proto) ^
75 !(data->invert & XT_IPVS_PROTO)) {
76 match = false;
77 goto out;
78 }
79
80 pp = ip_vs_proto_get(iph.protocol);
81 if (unlikely(!pp)) {
82 match = false;
83 goto out;
84 }
85
86 /*
87 * Check if the packet belongs to an existing entry
88 */
89 cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */);
90 if (unlikely(cp == NULL)) {
91 match = false;
92 goto out;
93 }
94
95 /*
96 * We found a connection, i.e. ct != 0, make sure to call
97 * __ip_vs_conn_put before returning. In our case jump to out_put_con.
98 */
99
100 if (data->bitmask & XT_IPVS_VPORT)
101 if ((cp->vport == data->vport) ^
102 !(data->invert & XT_IPVS_VPORT)) {
103 match = false;
104 goto out_put_cp;
105 }
106
107 if (data->bitmask & XT_IPVS_VPORTCTL)
108 if ((cp->control != NULL &&
109 cp->control->vport == data->vportctl) ^
110 !(data->invert & XT_IPVS_VPORTCTL)) {
111 match = false;
112 goto out_put_cp;
113 }
114
115 if (data->bitmask & XT_IPVS_DIR) {
116 enum ip_conntrack_info ctinfo;
117 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
118
119 if (ct == NULL || nf_ct_is_untracked(ct)) {
120 match = false;
121 goto out_put_cp;
122 }
123
124 if ((ctinfo >= IP_CT_IS_REPLY) ^
125 !!(data->invert & XT_IPVS_DIR)) {
126 match = false;
127 goto out_put_cp;
128 }
129 }
130
131 if (data->bitmask & XT_IPVS_METHOD)
132 if (((cp->flags & IP_VS_CONN_F_FWD_MASK) == data->fwd_method) ^
133 !(data->invert & XT_IPVS_METHOD)) {
134 match = false;
135 goto out_put_cp;
136 }
137
138 if (data->bitmask & XT_IPVS_VADDR) {
139 if (ipvs_mt_addrcmp(&cp->vaddr, &data->vaddr,
140 &data->vmask, family) ^
141 !(data->invert & XT_IPVS_VADDR)) {
142 match = false;
143 goto out_put_cp;
144 }
145 }
146
147out_put_cp:
148 __ip_vs_conn_put(cp);
149out:
150 pr_debug("match=%d\n", match);
151 return match;
152}
153
154static int ipvs_mt_check(const struct xt_mtchk_param *par)
155{
156 if (par->family != NFPROTO_IPV4
157#ifdef CONFIG_IP_VS_IPV6
158 && par->family != NFPROTO_IPV6
159#endif
160 ) {
161 pr_info("protocol family %u not supported\n", par->family);
162 return -EINVAL;
163 }
164
165 return 0;
166}
167
168static struct xt_match xt_ipvs_mt_reg __read_mostly = {
169 .name = "ipvs",
170 .revision = 0,
171 .family = NFPROTO_UNSPEC,
172 .match = ipvs_mt,
173 .checkentry = ipvs_mt_check,
174 .matchsize = XT_ALIGN(sizeof(struct xt_ipvs_mtinfo)),
175 .me = THIS_MODULE,
176};
177
178static int __init ipvs_mt_init(void)
179{
180 return xt_register_match(&xt_ipvs_mt_reg);
181}
182
183static void __exit ipvs_mt_exit(void)
184{
185 xt_unregister_match(&xt_ipvs_mt_reg);
186}
187
188module_init(ipvs_mt_init);
189module_exit(ipvs_mt_exit);
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index b4f7dfea5980..70eb2b4984dd 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -11,7 +11,8 @@
11#include <linux/netfilter/xt_quota.h> 11#include <linux/netfilter/xt_quota.h>
12 12
13struct xt_quota_priv { 13struct xt_quota_priv {
14 uint64_t quota; 14 spinlock_t lock;
15 uint64_t quota;
15}; 16};
16 17
17MODULE_LICENSE("GPL"); 18MODULE_LICENSE("GPL");
@@ -20,8 +21,6 @@ MODULE_DESCRIPTION("Xtables: countdown quota match");
20MODULE_ALIAS("ipt_quota"); 21MODULE_ALIAS("ipt_quota");
21MODULE_ALIAS("ip6t_quota"); 22MODULE_ALIAS("ip6t_quota");
22 23
23static DEFINE_SPINLOCK(quota_lock);
24
25static bool 24static bool
26quota_mt(const struct sk_buff *skb, struct xt_action_param *par) 25quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
27{ 26{
@@ -29,7 +28,7 @@ quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
29 struct xt_quota_priv *priv = q->master; 28 struct xt_quota_priv *priv = q->master;
30 bool ret = q->flags & XT_QUOTA_INVERT; 29 bool ret = q->flags & XT_QUOTA_INVERT;
31 30
32 spin_lock_bh(&quota_lock); 31 spin_lock_bh(&priv->lock);
33 if (priv->quota >= skb->len) { 32 if (priv->quota >= skb->len) {
34 priv->quota -= skb->len; 33 priv->quota -= skb->len;
35 ret = !ret; 34 ret = !ret;
@@ -37,9 +36,7 @@ quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
37 /* we do not allow even small packets from now on */ 36 /* we do not allow even small packets from now on */
38 priv->quota = 0; 37 priv->quota = 0;
39 } 38 }
40 /* Copy quota back to matchinfo so that iptables can display it */ 39 spin_unlock_bh(&priv->lock);
41 q->quota = priv->quota;
42 spin_unlock_bh(&quota_lock);
43 40
44 return ret; 41 return ret;
45} 42}
@@ -55,6 +52,7 @@ static int quota_mt_check(const struct xt_mtchk_param *par)
55 if (q->master == NULL) 52 if (q->master == NULL)
56 return -ENOMEM; 53 return -ENOMEM;
57 54
55 spin_lock_init(&q->master->lock);
58 q->master->quota = q->quota; 56 q->master->quota = q->quota;
59 return 0; 57 return 0;
60} 58}