aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2010-10-21 11:21:34 -0400
committerDavid S. Miller <davem@davemloft.net>2010-10-21 11:21:34 -0400
commit9941fb62762253774cc6177d0b9172ece5133fe1 (patch)
tree641fc2b376e2f84c7023aa0cd8b9d76f954cc3a1 /net
parenta5190b4eea1f1c53ee26b3d1176441cafa8e7f79 (diff)
parent3b1a1ce6f418cb7ab35eb55c8a6575987a524e30 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6
Diffstat (limited to 'net')
-rw-r--r--net/dccp/ipv4.c10
-rw-r--r--net/dccp/ipv6.c10
-rw-r--r--net/ipv4/inet_hashtables.c28
-rw-r--r--net/ipv4/netfilter/Kconfig4
-rw-r--r--net/ipv4/netfilter/arp_tables.c62
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c2
-rw-r--r--net/ipv4/netfilter/ip_tables.c84
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c145
-rw-r--r--net/ipv4/netfilter/nf_nat_amanda.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c51
-rw-r--r--net/ipv4/netfilter/nf_nat_ftp.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c53
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c76
-rw-r--r--net/ipv4/netfilter/nf_nat_irc.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c17
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c27
-rw-r--r--net/ipv4/tcp_ipv4.c10
-rw-r--r--net/ipv6/af_inet6.c3
-rw-r--r--net/ipv6/datagram.c19
-rw-r--r--net/ipv6/ipv6_sockglue.c23
-rw-r--r--net/ipv6/netfilter/Kconfig4
-rw-r--r--net/ipv6/netfilter/Makefile5
-rw-r--r--net/ipv6/netfilter/ip6_tables.c84
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c157
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c78
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c14
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c131
-rw-r--r--net/ipv6/tcp_ipv6.c12
-rw-r--r--net/ipv6/udp.c16
-rw-r--r--net/netfilter/core.c6
-rw-r--r--net/netfilter/ipvs/Kconfig20
-rw-r--r--net/netfilter/ipvs/Makefile10
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c286
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c802
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c385
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c194
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c292
-rw-r--r--net/netfilter/ipvs/ip_vs_pe.c147
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c169
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c99
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c52
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c51
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c25
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c46
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c696
-rw-r--r--net/netfilter/nf_conntrack_core.c131
-rw-r--r--net/netfilter/nf_conntrack_expect.c68
-rw-r--r--net/netfilter/nf_conntrack_netlink.c77
-rw-r--r--net/netfilter/nf_conntrack_sip.c42
-rw-r--r--net/netfilter/nf_tproxy_core.c35
-rw-r--r--net/netfilter/x_tables.c12
-rw-r--r--net/netfilter/xt_TPROXY.c366
-rw-r--r--net/netfilter/xt_ipvs.c1
-rw-r--r--net/netfilter/xt_socket.c167
-rw-r--r--net/sched/act_ipt.c14
58 files changed, 3653 insertions, 1714 deletions
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index d4a166f0f391..3f69ea114829 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -392,7 +392,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
392 392
393 newsk = dccp_create_openreq_child(sk, req, skb); 393 newsk = dccp_create_openreq_child(sk, req, skb);
394 if (newsk == NULL) 394 if (newsk == NULL)
395 goto exit; 395 goto exit_nonewsk;
396 396
397 sk_setup_caps(newsk, dst); 397 sk_setup_caps(newsk, dst);
398 398
@@ -409,16 +409,20 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
409 409
410 dccp_sync_mss(newsk, dst_mtu(dst)); 410 dccp_sync_mss(newsk, dst_mtu(dst));
411 411
412 if (__inet_inherit_port(sk, newsk) < 0) {
413 sock_put(newsk);
414 goto exit;
415 }
412 __inet_hash_nolisten(newsk, NULL); 416 __inet_hash_nolisten(newsk, NULL);
413 __inet_inherit_port(sk, newsk);
414 417
415 return newsk; 418 return newsk;
416 419
417exit_overflow: 420exit_overflow:
418 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 421 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
422exit_nonewsk:
423 dst_release(dst);
419exit: 424exit:
420 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 425 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
421 dst_release(dst);
422 return NULL; 426 return NULL;
423} 427}
424 428
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6e3f32575df7..dca711df9b60 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -564,7 +564,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
564 564
565 newsk = dccp_create_openreq_child(sk, req, skb); 565 newsk = dccp_create_openreq_child(sk, req, skb);
566 if (newsk == NULL) 566 if (newsk == NULL)
567 goto out; 567 goto out_nonewsk;
568 568
569 /* 569 /*
570 * No need to charge this sock to the relevant IPv6 refcnt debug socks 570 * No need to charge this sock to the relevant IPv6 refcnt debug socks
@@ -632,18 +632,22 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
632 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 632 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
633 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 633 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
634 634
635 if (__inet_inherit_port(sk, newsk) < 0) {
636 sock_put(newsk);
637 goto out;
638 }
635 __inet6_hash(newsk, NULL); 639 __inet6_hash(newsk, NULL);
636 __inet_inherit_port(sk, newsk);
637 640
638 return newsk; 641 return newsk;
639 642
640out_overflow: 643out_overflow:
641 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 644 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
645out_nonewsk:
646 dst_release(dst);
642out: 647out:
643 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 648 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
644 if (opt != NULL && opt != np->opt) 649 if (opt != NULL && opt != np->opt)
645 sock_kfree_s(sk, opt, opt->tot_len); 650 sock_kfree_s(sk, opt, opt->tot_len);
646 dst_release(dst);
647 return NULL; 651 return NULL;
648} 652}
649 653
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index fb7ad5a21ff3..1b344f30b463 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -101,19 +101,43 @@ void inet_put_port(struct sock *sk)
101} 101}
102EXPORT_SYMBOL(inet_put_port); 102EXPORT_SYMBOL(inet_put_port);
103 103
104void __inet_inherit_port(struct sock *sk, struct sock *child) 104int __inet_inherit_port(struct sock *sk, struct sock *child)
105{ 105{
106 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; 106 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
107 const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->inet_num, 107 unsigned short port = inet_sk(child)->inet_num;
108 const int bhash = inet_bhashfn(sock_net(sk), port,
108 table->bhash_size); 109 table->bhash_size);
109 struct inet_bind_hashbucket *head = &table->bhash[bhash]; 110 struct inet_bind_hashbucket *head = &table->bhash[bhash];
110 struct inet_bind_bucket *tb; 111 struct inet_bind_bucket *tb;
111 112
112 spin_lock(&head->lock); 113 spin_lock(&head->lock);
113 tb = inet_csk(sk)->icsk_bind_hash; 114 tb = inet_csk(sk)->icsk_bind_hash;
115 if (tb->port != port) {
116 /* NOTE: using tproxy and redirecting skbs to a proxy
117 * on a different listener port breaks the assumption
118 * that the listener socket's icsk_bind_hash is the same
119 * as that of the child socket. We have to look up or
120 * create a new bind bucket for the child here. */
121 struct hlist_node *node;
122 inet_bind_bucket_for_each(tb, node, &head->chain) {
123 if (net_eq(ib_net(tb), sock_net(sk)) &&
124 tb->port == port)
125 break;
126 }
127 if (!node) {
128 tb = inet_bind_bucket_create(table->bind_bucket_cachep,
129 sock_net(sk), head, port);
130 if (!tb) {
131 spin_unlock(&head->lock);
132 return -ENOMEM;
133 }
134 }
135 }
114 sk_add_bind_node(child, &tb->owners); 136 sk_add_bind_node(child, &tb->owners);
115 inet_csk(child)->icsk_bind_hash = tb; 137 inet_csk(child)->icsk_bind_hash = tb;
116 spin_unlock(&head->lock); 138 spin_unlock(&head->lock);
139
140 return 0;
117} 141}
118EXPORT_SYMBOL_GPL(__inet_inherit_port); 142EXPORT_SYMBOL_GPL(__inet_inherit_port);
119 143
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 1833bdbf9805..8e3350643b63 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -324,10 +324,10 @@ config IP_NF_TARGET_ECN
324 324
325config IP_NF_TARGET_TTL 325config IP_NF_TARGET_TTL
326 tristate '"TTL" target support' 326 tristate '"TTL" target support'
327 depends on NETFILTER_ADVANCED 327 depends on NETFILTER_ADVANCED && IP_NF_MANGLE
328 select NETFILTER_XT_TARGET_HL 328 select NETFILTER_XT_TARGET_HL
329 ---help--- 329 ---help---
330 This is a backwards-compat option for the user's convenience 330 This is a backwards-compatible option for the user's convenience
331 (e.g. when running oldconfig). It selects 331 (e.g. when running oldconfig). It selects
332 CONFIG_NETFILTER_XT_TARGET_HL. 332 CONFIG_NETFILTER_XT_TARGET_HL.
333 333
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 8b642f152468..3cad2591ace0 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -228,7 +228,7 @@ arpt_error(struct sk_buff *skb, const struct xt_action_param *par)
228 return NF_DROP; 228 return NF_DROP;
229} 229}
230 230
231static inline const struct arpt_entry_target * 231static inline const struct xt_entry_target *
232arpt_get_target_c(const struct arpt_entry *e) 232arpt_get_target_c(const struct arpt_entry *e)
233{ 233{
234 return arpt_get_target((struct arpt_entry *)e); 234 return arpt_get_target((struct arpt_entry *)e);
@@ -282,7 +282,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
282 282
283 arp = arp_hdr(skb); 283 arp = arp_hdr(skb);
284 do { 284 do {
285 const struct arpt_entry_target *t; 285 const struct xt_entry_target *t;
286 286
287 if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { 287 if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
288 e = arpt_next_entry(e); 288 e = arpt_next_entry(e);
@@ -297,10 +297,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
297 if (!t->u.kernel.target->target) { 297 if (!t->u.kernel.target->target) {
298 int v; 298 int v;
299 299
300 v = ((struct arpt_standard_target *)t)->verdict; 300 v = ((struct xt_standard_target *)t)->verdict;
301 if (v < 0) { 301 if (v < 0) {
302 /* Pop from stack? */ 302 /* Pop from stack? */
303 if (v != ARPT_RETURN) { 303 if (v != XT_RETURN) {
304 verdict = (unsigned)(-v) - 1; 304 verdict = (unsigned)(-v) - 1;
305 break; 305 break;
306 } 306 }
@@ -332,7 +332,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
332 /* Target might have changed stuff. */ 332 /* Target might have changed stuff. */
333 arp = arp_hdr(skb); 333 arp = arp_hdr(skb);
334 334
335 if (verdict == ARPT_CONTINUE) 335 if (verdict == XT_CONTINUE)
336 e = arpt_next_entry(e); 336 e = arpt_next_entry(e);
337 else 337 else
338 /* Verdict */ 338 /* Verdict */
@@ -377,7 +377,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
377 e->counters.pcnt = pos; 377 e->counters.pcnt = pos;
378 378
379 for (;;) { 379 for (;;) {
380 const struct arpt_standard_target *t 380 const struct xt_standard_target *t
381 = (void *)arpt_get_target_c(e); 381 = (void *)arpt_get_target_c(e);
382 int visited = e->comefrom & (1 << hook); 382 int visited = e->comefrom & (1 << hook);
383 383
@@ -392,13 +392,13 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
392 /* Unconditional return/END. */ 392 /* Unconditional return/END. */
393 if ((e->target_offset == sizeof(struct arpt_entry) && 393 if ((e->target_offset == sizeof(struct arpt_entry) &&
394 (strcmp(t->target.u.user.name, 394 (strcmp(t->target.u.user.name,
395 ARPT_STANDARD_TARGET) == 0) && 395 XT_STANDARD_TARGET) == 0) &&
396 t->verdict < 0 && unconditional(&e->arp)) || 396 t->verdict < 0 && unconditional(&e->arp)) ||
397 visited) { 397 visited) {
398 unsigned int oldpos, size; 398 unsigned int oldpos, size;
399 399
400 if ((strcmp(t->target.u.user.name, 400 if ((strcmp(t->target.u.user.name,
401 ARPT_STANDARD_TARGET) == 0) && 401 XT_STANDARD_TARGET) == 0) &&
402 t->verdict < -NF_MAX_VERDICT - 1) { 402 t->verdict < -NF_MAX_VERDICT - 1) {
403 duprintf("mark_source_chains: bad " 403 duprintf("mark_source_chains: bad "
404 "negative verdict (%i)\n", 404 "negative verdict (%i)\n",
@@ -433,7 +433,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
433 int newpos = t->verdict; 433 int newpos = t->verdict;
434 434
435 if (strcmp(t->target.u.user.name, 435 if (strcmp(t->target.u.user.name,
436 ARPT_STANDARD_TARGET) == 0 && 436 XT_STANDARD_TARGET) == 0 &&
437 newpos >= 0) { 437 newpos >= 0) {
438 if (newpos > newinfo->size - 438 if (newpos > newinfo->size -
439 sizeof(struct arpt_entry)) { 439 sizeof(struct arpt_entry)) {
@@ -464,14 +464,14 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
464 464
465static inline int check_entry(const struct arpt_entry *e, const char *name) 465static inline int check_entry(const struct arpt_entry *e, const char *name)
466{ 466{
467 const struct arpt_entry_target *t; 467 const struct xt_entry_target *t;
468 468
469 if (!arp_checkentry(&e->arp)) { 469 if (!arp_checkentry(&e->arp)) {
470 duprintf("arp_tables: arp check failed %p %s.\n", e, name); 470 duprintf("arp_tables: arp check failed %p %s.\n", e, name);
471 return -EINVAL; 471 return -EINVAL;
472 } 472 }
473 473
474 if (e->target_offset + sizeof(struct arpt_entry_target) > e->next_offset) 474 if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset)
475 return -EINVAL; 475 return -EINVAL;
476 476
477 t = arpt_get_target_c(e); 477 t = arpt_get_target_c(e);
@@ -483,7 +483,7 @@ static inline int check_entry(const struct arpt_entry *e, const char *name)
483 483
484static inline int check_target(struct arpt_entry *e, const char *name) 484static inline int check_target(struct arpt_entry *e, const char *name)
485{ 485{
486 struct arpt_entry_target *t = arpt_get_target(e); 486 struct xt_entry_target *t = arpt_get_target(e);
487 int ret; 487 int ret;
488 struct xt_tgchk_param par = { 488 struct xt_tgchk_param par = {
489 .table = name, 489 .table = name,
@@ -506,7 +506,7 @@ static inline int check_target(struct arpt_entry *e, const char *name)
506static inline int 506static inline int
507find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) 507find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
508{ 508{
509 struct arpt_entry_target *t; 509 struct xt_entry_target *t;
510 struct xt_target *target; 510 struct xt_target *target;
511 int ret; 511 int ret;
512 512
@@ -536,7 +536,7 @@ out:
536 536
537static bool check_underflow(const struct arpt_entry *e) 537static bool check_underflow(const struct arpt_entry *e)
538{ 538{
539 const struct arpt_entry_target *t; 539 const struct xt_entry_target *t;
540 unsigned int verdict; 540 unsigned int verdict;
541 541
542 if (!unconditional(&e->arp)) 542 if (!unconditional(&e->arp))
@@ -544,7 +544,7 @@ static bool check_underflow(const struct arpt_entry *e)
544 t = arpt_get_target_c(e); 544 t = arpt_get_target_c(e);
545 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) 545 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
546 return false; 546 return false;
547 verdict = ((struct arpt_standard_target *)t)->verdict; 547 verdict = ((struct xt_standard_target *)t)->verdict;
548 verdict = -verdict - 1; 548 verdict = -verdict - 1;
549 return verdict == NF_DROP || verdict == NF_ACCEPT; 549 return verdict == NF_DROP || verdict == NF_ACCEPT;
550} 550}
@@ -566,7 +566,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
566 } 566 }
567 567
568 if (e->next_offset 568 if (e->next_offset
569 < sizeof(struct arpt_entry) + sizeof(struct arpt_entry_target)) { 569 < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) {
570 duprintf("checking: element %p size %u\n", 570 duprintf("checking: element %p size %u\n",
571 e, e->next_offset); 571 e, e->next_offset);
572 return -EINVAL; 572 return -EINVAL;
@@ -598,7 +598,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
598static inline void cleanup_entry(struct arpt_entry *e) 598static inline void cleanup_entry(struct arpt_entry *e)
599{ 599{
600 struct xt_tgdtor_param par; 600 struct xt_tgdtor_param par;
601 struct arpt_entry_target *t; 601 struct xt_entry_target *t;
602 602
603 t = arpt_get_target(e); 603 t = arpt_get_target(e);
604 par.target = t->u.kernel.target; 604 par.target = t->u.kernel.target;
@@ -794,7 +794,7 @@ static int copy_entries_to_user(unsigned int total_size,
794 /* FIXME: use iterator macros --RR */ 794 /* FIXME: use iterator macros --RR */
795 /* ... then go back and fix counters and names */ 795 /* ... then go back and fix counters and names */
796 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ 796 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
797 const struct arpt_entry_target *t; 797 const struct xt_entry_target *t;
798 798
799 e = (struct arpt_entry *)(loc_cpu_entry + off); 799 e = (struct arpt_entry *)(loc_cpu_entry + off);
800 if (copy_to_user(userptr + off 800 if (copy_to_user(userptr + off
@@ -807,7 +807,7 @@ static int copy_entries_to_user(unsigned int total_size,
807 807
808 t = arpt_get_target_c(e); 808 t = arpt_get_target_c(e);
809 if (copy_to_user(userptr + off + e->target_offset 809 if (copy_to_user(userptr + off + e->target_offset
810 + offsetof(struct arpt_entry_target, 810 + offsetof(struct xt_entry_target,
811 u.user.name), 811 u.user.name),
812 t->u.kernel.target->name, 812 t->u.kernel.target->name,
813 strlen(t->u.kernel.target->name)+1) != 0) { 813 strlen(t->u.kernel.target->name)+1) != 0) {
@@ -844,7 +844,7 @@ static int compat_calc_entry(const struct arpt_entry *e,
844 const struct xt_table_info *info, 844 const struct xt_table_info *info,
845 const void *base, struct xt_table_info *newinfo) 845 const void *base, struct xt_table_info *newinfo)
846{ 846{
847 const struct arpt_entry_target *t; 847 const struct xt_entry_target *t;
848 unsigned int entry_offset; 848 unsigned int entry_offset;
849 int off, i, ret; 849 int off, i, ret;
850 850
@@ -895,7 +895,7 @@ static int compat_table_info(const struct xt_table_info *info,
895static int get_info(struct net *net, void __user *user, 895static int get_info(struct net *net, void __user *user,
896 const int *len, int compat) 896 const int *len, int compat)
897{ 897{
898 char name[ARPT_TABLE_MAXNAMELEN]; 898 char name[XT_TABLE_MAXNAMELEN];
899 struct xt_table *t; 899 struct xt_table *t;
900 int ret; 900 int ret;
901 901
@@ -908,7 +908,7 @@ static int get_info(struct net *net, void __user *user,
908 if (copy_from_user(name, user, sizeof(name)) != 0) 908 if (copy_from_user(name, user, sizeof(name)) != 0)
909 return -EFAULT; 909 return -EFAULT;
910 910
911 name[ARPT_TABLE_MAXNAMELEN-1] = '\0'; 911 name[XT_TABLE_MAXNAMELEN-1] = '\0';
912#ifdef CONFIG_COMPAT 912#ifdef CONFIG_COMPAT
913 if (compat) 913 if (compat)
914 xt_compat_lock(NFPROTO_ARP); 914 xt_compat_lock(NFPROTO_ARP);
@@ -1204,7 +1204,7 @@ static int do_add_counters(struct net *net, const void __user *user,
1204#ifdef CONFIG_COMPAT 1204#ifdef CONFIG_COMPAT
1205static inline void compat_release_entry(struct compat_arpt_entry *e) 1205static inline void compat_release_entry(struct compat_arpt_entry *e)
1206{ 1206{
1207 struct arpt_entry_target *t; 1207 struct xt_entry_target *t;
1208 1208
1209 t = compat_arpt_get_target(e); 1209 t = compat_arpt_get_target(e);
1210 module_put(t->u.kernel.target->me); 1210 module_put(t->u.kernel.target->me);
@@ -1220,7 +1220,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
1220 const unsigned int *underflows, 1220 const unsigned int *underflows,
1221 const char *name) 1221 const char *name)
1222{ 1222{
1223 struct arpt_entry_target *t; 1223 struct xt_entry_target *t;
1224 struct xt_target *target; 1224 struct xt_target *target;
1225 unsigned int entry_offset; 1225 unsigned int entry_offset;
1226 int ret, off, h; 1226 int ret, off, h;
@@ -1288,7 +1288,7 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
1288 unsigned int *size, const char *name, 1288 unsigned int *size, const char *name,
1289 struct xt_table_info *newinfo, unsigned char *base) 1289 struct xt_table_info *newinfo, unsigned char *base)
1290{ 1290{
1291 struct arpt_entry_target *t; 1291 struct xt_entry_target *t;
1292 struct xt_target *target; 1292 struct xt_target *target;
1293 struct arpt_entry *de; 1293 struct arpt_entry *de;
1294 unsigned int origsize; 1294 unsigned int origsize;
@@ -1474,7 +1474,7 @@ out_unlock:
1474} 1474}
1475 1475
1476struct compat_arpt_replace { 1476struct compat_arpt_replace {
1477 char name[ARPT_TABLE_MAXNAMELEN]; 1477 char name[XT_TABLE_MAXNAMELEN];
1478 u32 valid_hooks; 1478 u32 valid_hooks;
1479 u32 num_entries; 1479 u32 num_entries;
1480 u32 size; 1480 u32 size;
@@ -1567,7 +1567,7 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
1567 struct xt_counters *counters, 1567 struct xt_counters *counters,
1568 unsigned int i) 1568 unsigned int i)
1569{ 1569{
1570 struct arpt_entry_target *t; 1570 struct xt_entry_target *t;
1571 struct compat_arpt_entry __user *ce; 1571 struct compat_arpt_entry __user *ce;
1572 u_int16_t target_offset, next_offset; 1572 u_int16_t target_offset, next_offset;
1573 compat_uint_t origsize; 1573 compat_uint_t origsize;
@@ -1628,7 +1628,7 @@ static int compat_copy_entries_to_user(unsigned int total_size,
1628} 1628}
1629 1629
1630struct compat_arpt_get_entries { 1630struct compat_arpt_get_entries {
1631 char name[ARPT_TABLE_MAXNAMELEN]; 1631 char name[XT_TABLE_MAXNAMELEN];
1632 compat_uint_t size; 1632 compat_uint_t size;
1633 struct compat_arpt_entry entrytable[0]; 1633 struct compat_arpt_entry entrytable[0];
1634}; 1634};
@@ -1828,7 +1828,7 @@ void arpt_unregister_table(struct xt_table *table)
1828/* The built-in targets: standard (NULL) and error. */ 1828/* The built-in targets: standard (NULL) and error. */
1829static struct xt_target arpt_builtin_tg[] __read_mostly = { 1829static struct xt_target arpt_builtin_tg[] __read_mostly = {
1830 { 1830 {
1831 .name = ARPT_STANDARD_TARGET, 1831 .name = XT_STANDARD_TARGET,
1832 .targetsize = sizeof(int), 1832 .targetsize = sizeof(int),
1833 .family = NFPROTO_ARP, 1833 .family = NFPROTO_ARP,
1834#ifdef CONFIG_COMPAT 1834#ifdef CONFIG_COMPAT
@@ -1838,9 +1838,9 @@ static struct xt_target arpt_builtin_tg[] __read_mostly = {
1838#endif 1838#endif
1839 }, 1839 },
1840 { 1840 {
1841 .name = ARPT_ERROR_TARGET, 1841 .name = XT_ERROR_TARGET,
1842 .target = arpt_error, 1842 .target = arpt_error,
1843 .targetsize = ARPT_FUNCTION_MAXNAMELEN, 1843 .targetsize = XT_FUNCTION_MAXNAMELEN,
1844 .family = NFPROTO_ARP, 1844 .family = NFPROTO_ARP,
1845 }, 1845 },
1846}; 1846};
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index e1be7dd1171b..b8ddcc480ed9 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -63,7 +63,7 @@ static int checkentry(const struct xt_tgchk_param *par)
63 return false; 63 return false;
64 64
65 if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT && 65 if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT &&
66 mangle->target != ARPT_CONTINUE) 66 mangle->target != XT_CONTINUE)
67 return false; 67 return false;
68 return true; 68 return true;
69} 69}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d163f2e3b2e9..d31b007a6d80 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -186,7 +186,7 @@ static inline bool unconditional(const struct ipt_ip *ip)
186} 186}
187 187
188/* for const-correctness */ 188/* for const-correctness */
189static inline const struct ipt_entry_target * 189static inline const struct xt_entry_target *
190ipt_get_target_c(const struct ipt_entry *e) 190ipt_get_target_c(const struct ipt_entry *e)
191{ 191{
192 return ipt_get_target((struct ipt_entry *)e); 192 return ipt_get_target((struct ipt_entry *)e);
@@ -230,9 +230,9 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
230 const char *hookname, const char **chainname, 230 const char *hookname, const char **chainname,
231 const char **comment, unsigned int *rulenum) 231 const char **comment, unsigned int *rulenum)
232{ 232{
233 const struct ipt_standard_target *t = (void *)ipt_get_target_c(s); 233 const struct xt_standard_target *t = (void *)ipt_get_target_c(s);
234 234
235 if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) { 235 if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) {
236 /* Head of user chain: ERROR target with chainname */ 236 /* Head of user chain: ERROR target with chainname */
237 *chainname = t->target.data; 237 *chainname = t->target.data;
238 (*rulenum) = 0; 238 (*rulenum) = 0;
@@ -241,7 +241,7 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
241 241
242 if (s->target_offset == sizeof(struct ipt_entry) && 242 if (s->target_offset == sizeof(struct ipt_entry) &&
243 strcmp(t->target.u.kernel.target->name, 243 strcmp(t->target.u.kernel.target->name,
244 IPT_STANDARD_TARGET) == 0 && 244 XT_STANDARD_TARGET) == 0 &&
245 t->verdict < 0 && 245 t->verdict < 0 &&
246 unconditional(&s->ip)) { 246 unconditional(&s->ip)) {
247 /* Tail of chains: STANDARD target (return/policy) */ 247 /* Tail of chains: STANDARD target (return/policy) */
@@ -346,7 +346,7 @@ ipt_do_table(struct sk_buff *skb,
346 get_entry(table_base, private->underflow[hook])); 346 get_entry(table_base, private->underflow[hook]));
347 347
348 do { 348 do {
349 const struct ipt_entry_target *t; 349 const struct xt_entry_target *t;
350 const struct xt_entry_match *ematch; 350 const struct xt_entry_match *ematch;
351 351
352 IP_NF_ASSERT(e); 352 IP_NF_ASSERT(e);
@@ -380,10 +380,10 @@ ipt_do_table(struct sk_buff *skb,
380 if (!t->u.kernel.target->target) { 380 if (!t->u.kernel.target->target) {
381 int v; 381 int v;
382 382
383 v = ((struct ipt_standard_target *)t)->verdict; 383 v = ((struct xt_standard_target *)t)->verdict;
384 if (v < 0) { 384 if (v < 0) {
385 /* Pop from stack? */ 385 /* Pop from stack? */
386 if (v != IPT_RETURN) { 386 if (v != XT_RETURN) {
387 verdict = (unsigned)(-v) - 1; 387 verdict = (unsigned)(-v) - 1;
388 break; 388 break;
389 } 389 }
@@ -421,7 +421,7 @@ ipt_do_table(struct sk_buff *skb,
421 verdict = t->u.kernel.target->target(skb, &acpar); 421 verdict = t->u.kernel.target->target(skb, &acpar);
422 /* Target might have changed stuff. */ 422 /* Target might have changed stuff. */
423 ip = ip_hdr(skb); 423 ip = ip_hdr(skb);
424 if (verdict == IPT_CONTINUE) 424 if (verdict == XT_CONTINUE)
425 e = ipt_next_entry(e); 425 e = ipt_next_entry(e);
426 else 426 else
427 /* Verdict */ 427 /* Verdict */
@@ -461,7 +461,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
461 e->counters.pcnt = pos; 461 e->counters.pcnt = pos;
462 462
463 for (;;) { 463 for (;;) {
464 const struct ipt_standard_target *t 464 const struct xt_standard_target *t
465 = (void *)ipt_get_target_c(e); 465 = (void *)ipt_get_target_c(e);
466 int visited = e->comefrom & (1 << hook); 466 int visited = e->comefrom & (1 << hook);
467 467
@@ -475,13 +475,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
475 /* Unconditional return/END. */ 475 /* Unconditional return/END. */
476 if ((e->target_offset == sizeof(struct ipt_entry) && 476 if ((e->target_offset == sizeof(struct ipt_entry) &&
477 (strcmp(t->target.u.user.name, 477 (strcmp(t->target.u.user.name,
478 IPT_STANDARD_TARGET) == 0) && 478 XT_STANDARD_TARGET) == 0) &&
479 t->verdict < 0 && unconditional(&e->ip)) || 479 t->verdict < 0 && unconditional(&e->ip)) ||
480 visited) { 480 visited) {
481 unsigned int oldpos, size; 481 unsigned int oldpos, size;
482 482
483 if ((strcmp(t->target.u.user.name, 483 if ((strcmp(t->target.u.user.name,
484 IPT_STANDARD_TARGET) == 0) && 484 XT_STANDARD_TARGET) == 0) &&
485 t->verdict < -NF_MAX_VERDICT - 1) { 485 t->verdict < -NF_MAX_VERDICT - 1) {
486 duprintf("mark_source_chains: bad " 486 duprintf("mark_source_chains: bad "
487 "negative verdict (%i)\n", 487 "negative verdict (%i)\n",
@@ -524,7 +524,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
524 int newpos = t->verdict; 524 int newpos = t->verdict;
525 525
526 if (strcmp(t->target.u.user.name, 526 if (strcmp(t->target.u.user.name,
527 IPT_STANDARD_TARGET) == 0 && 527 XT_STANDARD_TARGET) == 0 &&
528 newpos >= 0) { 528 newpos >= 0) {
529 if (newpos > newinfo->size - 529 if (newpos > newinfo->size -
530 sizeof(struct ipt_entry)) { 530 sizeof(struct ipt_entry)) {
@@ -552,7 +552,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
552 return 1; 552 return 1;
553} 553}
554 554
555static void cleanup_match(struct ipt_entry_match *m, struct net *net) 555static void cleanup_match(struct xt_entry_match *m, struct net *net)
556{ 556{
557 struct xt_mtdtor_param par; 557 struct xt_mtdtor_param par;
558 558
@@ -568,14 +568,14 @@ static void cleanup_match(struct ipt_entry_match *m, struct net *net)
568static int 568static int
569check_entry(const struct ipt_entry *e, const char *name) 569check_entry(const struct ipt_entry *e, const char *name)
570{ 570{
571 const struct ipt_entry_target *t; 571 const struct xt_entry_target *t;
572 572
573 if (!ip_checkentry(&e->ip)) { 573 if (!ip_checkentry(&e->ip)) {
574 duprintf("ip check failed %p %s.\n", e, par->match->name); 574 duprintf("ip check failed %p %s.\n", e, par->match->name);
575 return -EINVAL; 575 return -EINVAL;
576 } 576 }
577 577
578 if (e->target_offset + sizeof(struct ipt_entry_target) > 578 if (e->target_offset + sizeof(struct xt_entry_target) >
579 e->next_offset) 579 e->next_offset)
580 return -EINVAL; 580 return -EINVAL;
581 581
@@ -587,7 +587,7 @@ check_entry(const struct ipt_entry *e, const char *name)
587} 587}
588 588
589static int 589static int
590check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par) 590check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
591{ 591{
592 const struct ipt_ip *ip = par->entryinfo; 592 const struct ipt_ip *ip = par->entryinfo;
593 int ret; 593 int ret;
@@ -605,7 +605,7 @@ check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
605} 605}
606 606
607static int 607static int
608find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par) 608find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
609{ 609{
610 struct xt_match *match; 610 struct xt_match *match;
611 int ret; 611 int ret;
@@ -630,7 +630,7 @@ err:
630 630
631static int check_target(struct ipt_entry *e, struct net *net, const char *name) 631static int check_target(struct ipt_entry *e, struct net *net, const char *name)
632{ 632{
633 struct ipt_entry_target *t = ipt_get_target(e); 633 struct xt_entry_target *t = ipt_get_target(e);
634 struct xt_tgchk_param par = { 634 struct xt_tgchk_param par = {
635 .net = net, 635 .net = net,
636 .table = name, 636 .table = name,
@@ -656,7 +656,7 @@ static int
656find_check_entry(struct ipt_entry *e, struct net *net, const char *name, 656find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
657 unsigned int size) 657 unsigned int size)
658{ 658{
659 struct ipt_entry_target *t; 659 struct xt_entry_target *t;
660 struct xt_target *target; 660 struct xt_target *target;
661 int ret; 661 int ret;
662 unsigned int j; 662 unsigned int j;
@@ -707,7 +707,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
707 707
708static bool check_underflow(const struct ipt_entry *e) 708static bool check_underflow(const struct ipt_entry *e)
709{ 709{
710 const struct ipt_entry_target *t; 710 const struct xt_entry_target *t;
711 unsigned int verdict; 711 unsigned int verdict;
712 712
713 if (!unconditional(&e->ip)) 713 if (!unconditional(&e->ip))
@@ -715,7 +715,7 @@ static bool check_underflow(const struct ipt_entry *e)
715 t = ipt_get_target_c(e); 715 t = ipt_get_target_c(e);
716 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) 716 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
717 return false; 717 return false;
718 verdict = ((struct ipt_standard_target *)t)->verdict; 718 verdict = ((struct xt_standard_target *)t)->verdict;
719 verdict = -verdict - 1; 719 verdict = -verdict - 1;
720 return verdict == NF_DROP || verdict == NF_ACCEPT; 720 return verdict == NF_DROP || verdict == NF_ACCEPT;
721} 721}
@@ -738,7 +738,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
738 } 738 }
739 739
740 if (e->next_offset 740 if (e->next_offset
741 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) { 741 < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) {
742 duprintf("checking: element %p size %u\n", 742 duprintf("checking: element %p size %u\n",
743 e, e->next_offset); 743 e, e->next_offset);
744 return -EINVAL; 744 return -EINVAL;
@@ -771,7 +771,7 @@ static void
771cleanup_entry(struct ipt_entry *e, struct net *net) 771cleanup_entry(struct ipt_entry *e, struct net *net)
772{ 772{
773 struct xt_tgdtor_param par; 773 struct xt_tgdtor_param par;
774 struct ipt_entry_target *t; 774 struct xt_entry_target *t;
775 struct xt_entry_match *ematch; 775 struct xt_entry_match *ematch;
776 776
777 /* Cleanup all matches */ 777 /* Cleanup all matches */
@@ -972,8 +972,8 @@ copy_entries_to_user(unsigned int total_size,
972 /* ... then go back and fix counters and names */ 972 /* ... then go back and fix counters and names */
973 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ 973 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
974 unsigned int i; 974 unsigned int i;
975 const struct ipt_entry_match *m; 975 const struct xt_entry_match *m;
976 const struct ipt_entry_target *t; 976 const struct xt_entry_target *t;
977 977
978 e = (struct ipt_entry *)(loc_cpu_entry + off); 978 e = (struct ipt_entry *)(loc_cpu_entry + off);
979 if (copy_to_user(userptr + off 979 if (copy_to_user(userptr + off
@@ -990,7 +990,7 @@ copy_entries_to_user(unsigned int total_size,
990 m = (void *)e + i; 990 m = (void *)e + i;
991 991
992 if (copy_to_user(userptr + off + i 992 if (copy_to_user(userptr + off + i
993 + offsetof(struct ipt_entry_match, 993 + offsetof(struct xt_entry_match,
994 u.user.name), 994 u.user.name),
995 m->u.kernel.match->name, 995 m->u.kernel.match->name,
996 strlen(m->u.kernel.match->name)+1) 996 strlen(m->u.kernel.match->name)+1)
@@ -1002,7 +1002,7 @@ copy_entries_to_user(unsigned int total_size,
1002 1002
1003 t = ipt_get_target_c(e); 1003 t = ipt_get_target_c(e);
1004 if (copy_to_user(userptr + off + e->target_offset 1004 if (copy_to_user(userptr + off + e->target_offset
1005 + offsetof(struct ipt_entry_target, 1005 + offsetof(struct xt_entry_target,
1006 u.user.name), 1006 u.user.name),
1007 t->u.kernel.target->name, 1007 t->u.kernel.target->name,
1008 strlen(t->u.kernel.target->name)+1) != 0) { 1008 strlen(t->u.kernel.target->name)+1) != 0) {
@@ -1040,7 +1040,7 @@ static int compat_calc_entry(const struct ipt_entry *e,
1040 const void *base, struct xt_table_info *newinfo) 1040 const void *base, struct xt_table_info *newinfo)
1041{ 1041{
1042 const struct xt_entry_match *ematch; 1042 const struct xt_entry_match *ematch;
1043 const struct ipt_entry_target *t; 1043 const struct xt_entry_target *t;
1044 unsigned int entry_offset; 1044 unsigned int entry_offset;
1045 int off, i, ret; 1045 int off, i, ret;
1046 1046
@@ -1092,7 +1092,7 @@ static int compat_table_info(const struct xt_table_info *info,
1092static int get_info(struct net *net, void __user *user, 1092static int get_info(struct net *net, void __user *user,
1093 const int *len, int compat) 1093 const int *len, int compat)
1094{ 1094{
1095 char name[IPT_TABLE_MAXNAMELEN]; 1095 char name[XT_TABLE_MAXNAMELEN];
1096 struct xt_table *t; 1096 struct xt_table *t;
1097 int ret; 1097 int ret;
1098 1098
@@ -1105,7 +1105,7 @@ static int get_info(struct net *net, void __user *user,
1105 if (copy_from_user(name, user, sizeof(name)) != 0) 1105 if (copy_from_user(name, user, sizeof(name)) != 0)
1106 return -EFAULT; 1106 return -EFAULT;
1107 1107
1108 name[IPT_TABLE_MAXNAMELEN-1] = '\0'; 1108 name[XT_TABLE_MAXNAMELEN-1] = '\0';
1109#ifdef CONFIG_COMPAT 1109#ifdef CONFIG_COMPAT
1110 if (compat) 1110 if (compat)
1111 xt_compat_lock(AF_INET); 1111 xt_compat_lock(AF_INET);
@@ -1400,14 +1400,14 @@ do_add_counters(struct net *net, const void __user *user,
1400 1400
1401#ifdef CONFIG_COMPAT 1401#ifdef CONFIG_COMPAT
1402struct compat_ipt_replace { 1402struct compat_ipt_replace {
1403 char name[IPT_TABLE_MAXNAMELEN]; 1403 char name[XT_TABLE_MAXNAMELEN];
1404 u32 valid_hooks; 1404 u32 valid_hooks;
1405 u32 num_entries; 1405 u32 num_entries;
1406 u32 size; 1406 u32 size;
1407 u32 hook_entry[NF_INET_NUMHOOKS]; 1407 u32 hook_entry[NF_INET_NUMHOOKS];
1408 u32 underflow[NF_INET_NUMHOOKS]; 1408 u32 underflow[NF_INET_NUMHOOKS];
1409 u32 num_counters; 1409 u32 num_counters;
1410 compat_uptr_t counters; /* struct ipt_counters * */ 1410 compat_uptr_t counters; /* struct xt_counters * */
1411 struct compat_ipt_entry entries[0]; 1411 struct compat_ipt_entry entries[0];
1412}; 1412};
1413 1413
@@ -1416,7 +1416,7 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1416 unsigned int *size, struct xt_counters *counters, 1416 unsigned int *size, struct xt_counters *counters,
1417 unsigned int i) 1417 unsigned int i)
1418{ 1418{
1419 struct ipt_entry_target *t; 1419 struct xt_entry_target *t;
1420 struct compat_ipt_entry __user *ce; 1420 struct compat_ipt_entry __user *ce;
1421 u_int16_t target_offset, next_offset; 1421 u_int16_t target_offset, next_offset;
1422 compat_uint_t origsize; 1422 compat_uint_t origsize;
@@ -1451,7 +1451,7 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1451} 1451}
1452 1452
1453static int 1453static int
1454compat_find_calc_match(struct ipt_entry_match *m, 1454compat_find_calc_match(struct xt_entry_match *m,
1455 const char *name, 1455 const char *name,
1456 const struct ipt_ip *ip, 1456 const struct ipt_ip *ip,
1457 unsigned int hookmask, 1457 unsigned int hookmask,
@@ -1473,7 +1473,7 @@ compat_find_calc_match(struct ipt_entry_match *m,
1473 1473
1474static void compat_release_entry(struct compat_ipt_entry *e) 1474static void compat_release_entry(struct compat_ipt_entry *e)
1475{ 1475{
1476 struct ipt_entry_target *t; 1476 struct xt_entry_target *t;
1477 struct xt_entry_match *ematch; 1477 struct xt_entry_match *ematch;
1478 1478
1479 /* Cleanup all matches */ 1479 /* Cleanup all matches */
@@ -1494,7 +1494,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1494 const char *name) 1494 const char *name)
1495{ 1495{
1496 struct xt_entry_match *ematch; 1496 struct xt_entry_match *ematch;
1497 struct ipt_entry_target *t; 1497 struct xt_entry_target *t;
1498 struct xt_target *target; 1498 struct xt_target *target;
1499 unsigned int entry_offset; 1499 unsigned int entry_offset;
1500 unsigned int j; 1500 unsigned int j;
@@ -1576,7 +1576,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1576 unsigned int *size, const char *name, 1576 unsigned int *size, const char *name,
1577 struct xt_table_info *newinfo, unsigned char *base) 1577 struct xt_table_info *newinfo, unsigned char *base)
1578{ 1578{
1579 struct ipt_entry_target *t; 1579 struct xt_entry_target *t;
1580 struct xt_target *target; 1580 struct xt_target *target;
1581 struct ipt_entry *de; 1581 struct ipt_entry *de;
1582 unsigned int origsize; 1582 unsigned int origsize;
@@ -1884,7 +1884,7 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
1884} 1884}
1885 1885
1886struct compat_ipt_get_entries { 1886struct compat_ipt_get_entries {
1887 char name[IPT_TABLE_MAXNAMELEN]; 1887 char name[XT_TABLE_MAXNAMELEN];
1888 compat_uint_t size; 1888 compat_uint_t size;
1889 struct compat_ipt_entry entrytable[0]; 1889 struct compat_ipt_entry entrytable[0];
1890}; 1890};
@@ -2039,7 +2039,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2039 2039
2040 case IPT_SO_GET_REVISION_MATCH: 2040 case IPT_SO_GET_REVISION_MATCH:
2041 case IPT_SO_GET_REVISION_TARGET: { 2041 case IPT_SO_GET_REVISION_TARGET: {
2042 struct ipt_get_revision rev; 2042 struct xt_get_revision rev;
2043 int target; 2043 int target;
2044 2044
2045 if (*len != sizeof(rev)) { 2045 if (*len != sizeof(rev)) {
@@ -2176,7 +2176,7 @@ static int icmp_checkentry(const struct xt_mtchk_param *par)
2176 2176
2177static struct xt_target ipt_builtin_tg[] __read_mostly = { 2177static struct xt_target ipt_builtin_tg[] __read_mostly = {
2178 { 2178 {
2179 .name = IPT_STANDARD_TARGET, 2179 .name = XT_STANDARD_TARGET,
2180 .targetsize = sizeof(int), 2180 .targetsize = sizeof(int),
2181 .family = NFPROTO_IPV4, 2181 .family = NFPROTO_IPV4,
2182#ifdef CONFIG_COMPAT 2182#ifdef CONFIG_COMPAT
@@ -2186,9 +2186,9 @@ static struct xt_target ipt_builtin_tg[] __read_mostly = {
2186#endif 2186#endif
2187 }, 2187 },
2188 { 2188 {
2189 .name = IPT_ERROR_TARGET, 2189 .name = XT_ERROR_TARGET,
2190 .target = ipt_error, 2190 .target = ipt_error,
2191 .targetsize = IPT_FUNCTION_MAXNAMELEN, 2191 .targetsize = XT_FUNCTION_MAXNAMELEN,
2192 .family = NFPROTO_IPV4, 2192 .family = NFPROTO_IPV4,
2193 }, 2193 },
2194}; 2194};
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 915fc17d7ce2..72ffc8fda2e9 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -24,16 +24,15 @@
24#include <linux/netfilter/x_tables.h> 24#include <linux/netfilter/x_tables.h>
25#include <linux/netfilter_ipv4/ipt_LOG.h> 25#include <linux/netfilter_ipv4/ipt_LOG.h>
26#include <net/netfilter/nf_log.h> 26#include <net/netfilter/nf_log.h>
27#include <net/netfilter/xt_log.h>
27 28
28MODULE_LICENSE("GPL"); 29MODULE_LICENSE("GPL");
29MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 30MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
30MODULE_DESCRIPTION("Xtables: IPv4 packet logging to syslog"); 31MODULE_DESCRIPTION("Xtables: IPv4 packet logging to syslog");
31 32
32/* Use lock to serialize, so printks don't overlap */
33static DEFINE_SPINLOCK(log_lock);
34
35/* One level of recursion won't kill us */ 33/* One level of recursion won't kill us */
36static void dump_packet(const struct nf_loginfo *info, 34static void dump_packet(struct sbuff *m,
35 const struct nf_loginfo *info,
37 const struct sk_buff *skb, 36 const struct sk_buff *skb,
38 unsigned int iphoff) 37 unsigned int iphoff)
39{ 38{
@@ -48,32 +47,32 @@ static void dump_packet(const struct nf_loginfo *info,
48 47
49 ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); 48 ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
50 if (ih == NULL) { 49 if (ih == NULL) {
51 printk("TRUNCATED"); 50 sb_add(m, "TRUNCATED");
52 return; 51 return;
53 } 52 }
54 53
55 /* Important fields: 54 /* Important fields:
56 * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ 55 * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
57 /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ 56 /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
58 printk("SRC=%pI4 DST=%pI4 ", 57 sb_add(m, "SRC=%pI4 DST=%pI4 ",
59 &ih->saddr, &ih->daddr); 58 &ih->saddr, &ih->daddr);
60 59
61 /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ 60 /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
62 printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", 61 sb_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
63 ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK, 62 ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
64 ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id)); 63 ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
65 64
66 /* Max length: 6 "CE DF MF " */ 65 /* Max length: 6 "CE DF MF " */
67 if (ntohs(ih->frag_off) & IP_CE) 66 if (ntohs(ih->frag_off) & IP_CE)
68 printk("CE "); 67 sb_add(m, "CE ");
69 if (ntohs(ih->frag_off) & IP_DF) 68 if (ntohs(ih->frag_off) & IP_DF)
70 printk("DF "); 69 sb_add(m, "DF ");
71 if (ntohs(ih->frag_off) & IP_MF) 70 if (ntohs(ih->frag_off) & IP_MF)
72 printk("MF "); 71 sb_add(m, "MF ");
73 72
74 /* Max length: 11 "FRAG:65535 " */ 73 /* Max length: 11 "FRAG:65535 " */
75 if (ntohs(ih->frag_off) & IP_OFFSET) 74 if (ntohs(ih->frag_off) & IP_OFFSET)
76 printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); 75 sb_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
77 76
78 if ((logflags & IPT_LOG_IPOPT) && 77 if ((logflags & IPT_LOG_IPOPT) &&
79 ih->ihl * 4 > sizeof(struct iphdr)) { 78 ih->ihl * 4 > sizeof(struct iphdr)) {
@@ -85,15 +84,15 @@ static void dump_packet(const struct nf_loginfo *info,
85 op = skb_header_pointer(skb, iphoff+sizeof(_iph), 84 op = skb_header_pointer(skb, iphoff+sizeof(_iph),
86 optsize, _opt); 85 optsize, _opt);
87 if (op == NULL) { 86 if (op == NULL) {
88 printk("TRUNCATED"); 87 sb_add(m, "TRUNCATED");
89 return; 88 return;
90 } 89 }
91 90
92 /* Max length: 127 "OPT (" 15*4*2chars ") " */ 91 /* Max length: 127 "OPT (" 15*4*2chars ") " */
93 printk("OPT ("); 92 sb_add(m, "OPT (");
94 for (i = 0; i < optsize; i++) 93 for (i = 0; i < optsize; i++)
95 printk("%02X", op[i]); 94 sb_add(m, "%02X", op[i]);
96 printk(") "); 95 sb_add(m, ") ");
97 } 96 }
98 97
99 switch (ih->protocol) { 98 switch (ih->protocol) {
@@ -102,7 +101,7 @@ static void dump_packet(const struct nf_loginfo *info,
102 const struct tcphdr *th; 101 const struct tcphdr *th;
103 102
104 /* Max length: 10 "PROTO=TCP " */ 103 /* Max length: 10 "PROTO=TCP " */
105 printk("PROTO=TCP "); 104 sb_add(m, "PROTO=TCP ");
106 105
107 if (ntohs(ih->frag_off) & IP_OFFSET) 106 if (ntohs(ih->frag_off) & IP_OFFSET)
108 break; 107 break;
@@ -111,41 +110,41 @@ static void dump_packet(const struct nf_loginfo *info,
111 th = skb_header_pointer(skb, iphoff + ih->ihl * 4, 110 th = skb_header_pointer(skb, iphoff + ih->ihl * 4,
112 sizeof(_tcph), &_tcph); 111 sizeof(_tcph), &_tcph);
113 if (th == NULL) { 112 if (th == NULL) {
114 printk("INCOMPLETE [%u bytes] ", 113 sb_add(m, "INCOMPLETE [%u bytes] ",
115 skb->len - iphoff - ih->ihl*4); 114 skb->len - iphoff - ih->ihl*4);
116 break; 115 break;
117 } 116 }
118 117
119 /* Max length: 20 "SPT=65535 DPT=65535 " */ 118 /* Max length: 20 "SPT=65535 DPT=65535 " */
120 printk("SPT=%u DPT=%u ", 119 sb_add(m, "SPT=%u DPT=%u ",
121 ntohs(th->source), ntohs(th->dest)); 120 ntohs(th->source), ntohs(th->dest));
122 /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ 121 /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
123 if (logflags & IPT_LOG_TCPSEQ) 122 if (logflags & IPT_LOG_TCPSEQ)
124 printk("SEQ=%u ACK=%u ", 123 sb_add(m, "SEQ=%u ACK=%u ",
125 ntohl(th->seq), ntohl(th->ack_seq)); 124 ntohl(th->seq), ntohl(th->ack_seq));
126 /* Max length: 13 "WINDOW=65535 " */ 125 /* Max length: 13 "WINDOW=65535 " */
127 printk("WINDOW=%u ", ntohs(th->window)); 126 sb_add(m, "WINDOW=%u ", ntohs(th->window));
128 /* Max length: 9 "RES=0x3F " */ 127 /* Max length: 9 "RES=0x3F " */
129 printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22)); 128 sb_add(m, "RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
130 /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ 129 /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
131 if (th->cwr) 130 if (th->cwr)
132 printk("CWR "); 131 sb_add(m, "CWR ");
133 if (th->ece) 132 if (th->ece)
134 printk("ECE "); 133 sb_add(m, "ECE ");
135 if (th->urg) 134 if (th->urg)
136 printk("URG "); 135 sb_add(m, "URG ");
137 if (th->ack) 136 if (th->ack)
138 printk("ACK "); 137 sb_add(m, "ACK ");
139 if (th->psh) 138 if (th->psh)
140 printk("PSH "); 139 sb_add(m, "PSH ");
141 if (th->rst) 140 if (th->rst)
142 printk("RST "); 141 sb_add(m, "RST ");
143 if (th->syn) 142 if (th->syn)
144 printk("SYN "); 143 sb_add(m, "SYN ");
145 if (th->fin) 144 if (th->fin)
146 printk("FIN "); 145 sb_add(m, "FIN ");
147 /* Max length: 11 "URGP=65535 " */ 146 /* Max length: 11 "URGP=65535 " */
148 printk("URGP=%u ", ntohs(th->urg_ptr)); 147 sb_add(m, "URGP=%u ", ntohs(th->urg_ptr));
149 148
150 if ((logflags & IPT_LOG_TCPOPT) && 149 if ((logflags & IPT_LOG_TCPOPT) &&
151 th->doff * 4 > sizeof(struct tcphdr)) { 150 th->doff * 4 > sizeof(struct tcphdr)) {
@@ -158,15 +157,15 @@ static void dump_packet(const struct nf_loginfo *info,
158 iphoff+ih->ihl*4+sizeof(_tcph), 157 iphoff+ih->ihl*4+sizeof(_tcph),
159 optsize, _opt); 158 optsize, _opt);
160 if (op == NULL) { 159 if (op == NULL) {
161 printk("TRUNCATED"); 160 sb_add(m, "TRUNCATED");
162 return; 161 return;
163 } 162 }
164 163
165 /* Max length: 127 "OPT (" 15*4*2chars ") " */ 164 /* Max length: 127 "OPT (" 15*4*2chars ") " */
166 printk("OPT ("); 165 sb_add(m, "OPT (");
167 for (i = 0; i < optsize; i++) 166 for (i = 0; i < optsize; i++)
168 printk("%02X", op[i]); 167 sb_add(m, "%02X", op[i]);
169 printk(") "); 168 sb_add(m, ") ");
170 } 169 }
171 break; 170 break;
172 } 171 }
@@ -177,9 +176,9 @@ static void dump_packet(const struct nf_loginfo *info,
177 176
178 if (ih->protocol == IPPROTO_UDP) 177 if (ih->protocol == IPPROTO_UDP)
179 /* Max length: 10 "PROTO=UDP " */ 178 /* Max length: 10 "PROTO=UDP " */
180 printk("PROTO=UDP " ); 179 sb_add(m, "PROTO=UDP " );
181 else /* Max length: 14 "PROTO=UDPLITE " */ 180 else /* Max length: 14 "PROTO=UDPLITE " */
182 printk("PROTO=UDPLITE "); 181 sb_add(m, "PROTO=UDPLITE ");
183 182
184 if (ntohs(ih->frag_off) & IP_OFFSET) 183 if (ntohs(ih->frag_off) & IP_OFFSET)
185 break; 184 break;
@@ -188,13 +187,13 @@ static void dump_packet(const struct nf_loginfo *info,
188 uh = skb_header_pointer(skb, iphoff+ih->ihl*4, 187 uh = skb_header_pointer(skb, iphoff+ih->ihl*4,
189 sizeof(_udph), &_udph); 188 sizeof(_udph), &_udph);
190 if (uh == NULL) { 189 if (uh == NULL) {
191 printk("INCOMPLETE [%u bytes] ", 190 sb_add(m, "INCOMPLETE [%u bytes] ",
192 skb->len - iphoff - ih->ihl*4); 191 skb->len - iphoff - ih->ihl*4);
193 break; 192 break;
194 } 193 }
195 194
196 /* Max length: 20 "SPT=65535 DPT=65535 " */ 195 /* Max length: 20 "SPT=65535 DPT=65535 " */
197 printk("SPT=%u DPT=%u LEN=%u ", 196 sb_add(m, "SPT=%u DPT=%u LEN=%u ",
198 ntohs(uh->source), ntohs(uh->dest), 197 ntohs(uh->source), ntohs(uh->dest),
199 ntohs(uh->len)); 198 ntohs(uh->len));
200 break; 199 break;
@@ -221,7 +220,7 @@ static void dump_packet(const struct nf_loginfo *info,
221 [ICMP_ADDRESSREPLY] = 12 }; 220 [ICMP_ADDRESSREPLY] = 12 };
222 221
223 /* Max length: 11 "PROTO=ICMP " */ 222 /* Max length: 11 "PROTO=ICMP " */
224 printk("PROTO=ICMP "); 223 sb_add(m, "PROTO=ICMP ");
225 224
226 if (ntohs(ih->frag_off) & IP_OFFSET) 225 if (ntohs(ih->frag_off) & IP_OFFSET)
227 break; 226 break;
@@ -230,19 +229,19 @@ static void dump_packet(const struct nf_loginfo *info,
230 ich = skb_header_pointer(skb, iphoff + ih->ihl * 4, 229 ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
231 sizeof(_icmph), &_icmph); 230 sizeof(_icmph), &_icmph);
232 if (ich == NULL) { 231 if (ich == NULL) {
233 printk("INCOMPLETE [%u bytes] ", 232 sb_add(m, "INCOMPLETE [%u bytes] ",
234 skb->len - iphoff - ih->ihl*4); 233 skb->len - iphoff - ih->ihl*4);
235 break; 234 break;
236 } 235 }
237 236
238 /* Max length: 18 "TYPE=255 CODE=255 " */ 237 /* Max length: 18 "TYPE=255 CODE=255 " */
239 printk("TYPE=%u CODE=%u ", ich->type, ich->code); 238 sb_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code);
240 239
241 /* Max length: 25 "INCOMPLETE [65535 bytes] " */ 240 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
242 if (ich->type <= NR_ICMP_TYPES && 241 if (ich->type <= NR_ICMP_TYPES &&
243 required_len[ich->type] && 242 required_len[ich->type] &&
244 skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { 243 skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
245 printk("INCOMPLETE [%u bytes] ", 244 sb_add(m, "INCOMPLETE [%u bytes] ",
246 skb->len - iphoff - ih->ihl*4); 245 skb->len - iphoff - ih->ihl*4);
247 break; 246 break;
248 } 247 }
@@ -251,35 +250,35 @@ static void dump_packet(const struct nf_loginfo *info,
251 case ICMP_ECHOREPLY: 250 case ICMP_ECHOREPLY:
252 case ICMP_ECHO: 251 case ICMP_ECHO:
253 /* Max length: 19 "ID=65535 SEQ=65535 " */ 252 /* Max length: 19 "ID=65535 SEQ=65535 " */
254 printk("ID=%u SEQ=%u ", 253 sb_add(m, "ID=%u SEQ=%u ",
255 ntohs(ich->un.echo.id), 254 ntohs(ich->un.echo.id),
256 ntohs(ich->un.echo.sequence)); 255 ntohs(ich->un.echo.sequence));
257 break; 256 break;
258 257
259 case ICMP_PARAMETERPROB: 258 case ICMP_PARAMETERPROB:
260 /* Max length: 14 "PARAMETER=255 " */ 259 /* Max length: 14 "PARAMETER=255 " */
261 printk("PARAMETER=%u ", 260 sb_add(m, "PARAMETER=%u ",
262 ntohl(ich->un.gateway) >> 24); 261 ntohl(ich->un.gateway) >> 24);
263 break; 262 break;
264 case ICMP_REDIRECT: 263 case ICMP_REDIRECT:
265 /* Max length: 24 "GATEWAY=255.255.255.255 " */ 264 /* Max length: 24 "GATEWAY=255.255.255.255 " */
266 printk("GATEWAY=%pI4 ", &ich->un.gateway); 265 sb_add(m, "GATEWAY=%pI4 ", &ich->un.gateway);
267 /* Fall through */ 266 /* Fall through */
268 case ICMP_DEST_UNREACH: 267 case ICMP_DEST_UNREACH:
269 case ICMP_SOURCE_QUENCH: 268 case ICMP_SOURCE_QUENCH:
270 case ICMP_TIME_EXCEEDED: 269 case ICMP_TIME_EXCEEDED:
271 /* Max length: 3+maxlen */ 270 /* Max length: 3+maxlen */
272 if (!iphoff) { /* Only recurse once. */ 271 if (!iphoff) { /* Only recurse once. */
273 printk("["); 272 sb_add(m, "[");
274 dump_packet(info, skb, 273 dump_packet(m, info, skb,
275 iphoff + ih->ihl*4+sizeof(_icmph)); 274 iphoff + ih->ihl*4+sizeof(_icmph));
276 printk("] "); 275 sb_add(m, "] ");
277 } 276 }
278 277
279 /* Max length: 10 "MTU=65535 " */ 278 /* Max length: 10 "MTU=65535 " */
280 if (ich->type == ICMP_DEST_UNREACH && 279 if (ich->type == ICMP_DEST_UNREACH &&
281 ich->code == ICMP_FRAG_NEEDED) 280 ich->code == ICMP_FRAG_NEEDED)
282 printk("MTU=%u ", ntohs(ich->un.frag.mtu)); 281 sb_add(m, "MTU=%u ", ntohs(ich->un.frag.mtu));
283 } 282 }
284 break; 283 break;
285 } 284 }
@@ -292,19 +291,19 @@ static void dump_packet(const struct nf_loginfo *info,
292 break; 291 break;
293 292
294 /* Max length: 9 "PROTO=AH " */ 293 /* Max length: 9 "PROTO=AH " */
295 printk("PROTO=AH "); 294 sb_add(m, "PROTO=AH ");
296 295
297 /* Max length: 25 "INCOMPLETE [65535 bytes] " */ 296 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
298 ah = skb_header_pointer(skb, iphoff+ih->ihl*4, 297 ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
299 sizeof(_ahdr), &_ahdr); 298 sizeof(_ahdr), &_ahdr);
300 if (ah == NULL) { 299 if (ah == NULL) {
301 printk("INCOMPLETE [%u bytes] ", 300 sb_add(m, "INCOMPLETE [%u bytes] ",
302 skb->len - iphoff - ih->ihl*4); 301 skb->len - iphoff - ih->ihl*4);
303 break; 302 break;
304 } 303 }
305 304
306 /* Length: 15 "SPI=0xF1234567 " */ 305 /* Length: 15 "SPI=0xF1234567 " */
307 printk("SPI=0x%x ", ntohl(ah->spi)); 306 sb_add(m, "SPI=0x%x ", ntohl(ah->spi));
308 break; 307 break;
309 } 308 }
310 case IPPROTO_ESP: { 309 case IPPROTO_ESP: {
@@ -312,7 +311,7 @@ static void dump_packet(const struct nf_loginfo *info,
312 const struct ip_esp_hdr *eh; 311 const struct ip_esp_hdr *eh;
313 312
314 /* Max length: 10 "PROTO=ESP " */ 313 /* Max length: 10 "PROTO=ESP " */
315 printk("PROTO=ESP "); 314 sb_add(m, "PROTO=ESP ");
316 315
317 if (ntohs(ih->frag_off) & IP_OFFSET) 316 if (ntohs(ih->frag_off) & IP_OFFSET)
318 break; 317 break;
@@ -321,25 +320,25 @@ static void dump_packet(const struct nf_loginfo *info,
321 eh = skb_header_pointer(skb, iphoff+ih->ihl*4, 320 eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
322 sizeof(_esph), &_esph); 321 sizeof(_esph), &_esph);
323 if (eh == NULL) { 322 if (eh == NULL) {
324 printk("INCOMPLETE [%u bytes] ", 323 sb_add(m, "INCOMPLETE [%u bytes] ",
325 skb->len - iphoff - ih->ihl*4); 324 skb->len - iphoff - ih->ihl*4);
326 break; 325 break;
327 } 326 }
328 327
329 /* Length: 15 "SPI=0xF1234567 " */ 328 /* Length: 15 "SPI=0xF1234567 " */
330 printk("SPI=0x%x ", ntohl(eh->spi)); 329 sb_add(m, "SPI=0x%x ", ntohl(eh->spi));
331 break; 330 break;
332 } 331 }
333 /* Max length: 10 "PROTO 255 " */ 332 /* Max length: 10 "PROTO 255 " */
334 default: 333 default:
335 printk("PROTO=%u ", ih->protocol); 334 sb_add(m, "PROTO=%u ", ih->protocol);
336 } 335 }
337 336
338 /* Max length: 15 "UID=4294967295 " */ 337 /* Max length: 15 "UID=4294967295 " */
339 if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) { 338 if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
340 read_lock_bh(&skb->sk->sk_callback_lock); 339 read_lock_bh(&skb->sk->sk_callback_lock);
341 if (skb->sk->sk_socket && skb->sk->sk_socket->file) 340 if (skb->sk->sk_socket && skb->sk->sk_socket->file)
342 printk("UID=%u GID=%u ", 341 sb_add(m, "UID=%u GID=%u ",
343 skb->sk->sk_socket->file->f_cred->fsuid, 342 skb->sk->sk_socket->file->f_cred->fsuid,
344 skb->sk->sk_socket->file->f_cred->fsgid); 343 skb->sk->sk_socket->file->f_cred->fsgid);
345 read_unlock_bh(&skb->sk->sk_callback_lock); 344 read_unlock_bh(&skb->sk->sk_callback_lock);
@@ -347,7 +346,7 @@ static void dump_packet(const struct nf_loginfo *info,
347 346
348 /* Max length: 16 "MARK=0xFFFFFFFF " */ 347 /* Max length: 16 "MARK=0xFFFFFFFF " */
349 if (!iphoff && skb->mark) 348 if (!iphoff && skb->mark)
350 printk("MARK=0x%x ", skb->mark); 349 sb_add(m, "MARK=0x%x ", skb->mark);
351 350
352 /* Proto Max log string length */ 351 /* Proto Max log string length */
353 /* IP: 40+46+6+11+127 = 230 */ 352 /* IP: 40+46+6+11+127 = 230 */
@@ -364,7 +363,8 @@ static void dump_packet(const struct nf_loginfo *info,
364 /* maxlen = 230+ 91 + 230 + 252 = 803 */ 363 /* maxlen = 230+ 91 + 230 + 252 = 803 */
365} 364}
366 365
367static void dump_mac_header(const struct nf_loginfo *info, 366static void dump_mac_header(struct sbuff *m,
367 const struct nf_loginfo *info,
368 const struct sk_buff *skb) 368 const struct sk_buff *skb)
369{ 369{
370 struct net_device *dev = skb->dev; 370 struct net_device *dev = skb->dev;
@@ -378,7 +378,7 @@ static void dump_mac_header(const struct nf_loginfo *info,
378 378
379 switch (dev->type) { 379 switch (dev->type) {
380 case ARPHRD_ETHER: 380 case ARPHRD_ETHER:
381 printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ", 381 sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
382 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, 382 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
383 ntohs(eth_hdr(skb)->h_proto)); 383 ntohs(eth_hdr(skb)->h_proto));
384 return; 384 return;
@@ -387,17 +387,17 @@ static void dump_mac_header(const struct nf_loginfo *info,
387 } 387 }
388 388
389fallback: 389fallback:
390 printk("MAC="); 390 sb_add(m, "MAC=");
391 if (dev->hard_header_len && 391 if (dev->hard_header_len &&
392 skb->mac_header != skb->network_header) { 392 skb->mac_header != skb->network_header) {
393 const unsigned char *p = skb_mac_header(skb); 393 const unsigned char *p = skb_mac_header(skb);
394 unsigned int i; 394 unsigned int i;
395 395
396 printk("%02x", *p++); 396 sb_add(m, "%02x", *p++);
397 for (i = 1; i < dev->hard_header_len; i++, p++) 397 for (i = 1; i < dev->hard_header_len; i++, p++)
398 printk(":%02x", *p); 398 sb_add(m, ":%02x", *p);
399 } 399 }
400 printk(" "); 400 sb_add(m, " ");
401} 401}
402 402
403static struct nf_loginfo default_loginfo = { 403static struct nf_loginfo default_loginfo = {
@@ -419,11 +419,12 @@ ipt_log_packet(u_int8_t pf,
419 const struct nf_loginfo *loginfo, 419 const struct nf_loginfo *loginfo,
420 const char *prefix) 420 const char *prefix)
421{ 421{
422 struct sbuff *m = sb_open();
423
422 if (!loginfo) 424 if (!loginfo)
423 loginfo = &default_loginfo; 425 loginfo = &default_loginfo;
424 426
425 spin_lock_bh(&log_lock); 427 sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
426 printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
427 prefix, 428 prefix,
428 in ? in->name : "", 429 in ? in->name : "",
429 out ? out->name : ""); 430 out ? out->name : "");
@@ -434,20 +435,20 @@ ipt_log_packet(u_int8_t pf,
434 435
435 physindev = skb->nf_bridge->physindev; 436 physindev = skb->nf_bridge->physindev;
436 if (physindev && in != physindev) 437 if (physindev && in != physindev)
437 printk("PHYSIN=%s ", physindev->name); 438 sb_add(m, "PHYSIN=%s ", physindev->name);
438 physoutdev = skb->nf_bridge->physoutdev; 439 physoutdev = skb->nf_bridge->physoutdev;
439 if (physoutdev && out != physoutdev) 440 if (physoutdev && out != physoutdev)
440 printk("PHYSOUT=%s ", physoutdev->name); 441 sb_add(m, "PHYSOUT=%s ", physoutdev->name);
441 } 442 }
442#endif 443#endif
443 444
444 /* MAC logging for input path only. */ 445 /* MAC logging for input path only. */
445 if (in && !out) 446 if (in && !out)
446 dump_mac_header(loginfo, skb); 447 dump_mac_header(m, loginfo, skb);
448
449 dump_packet(m, loginfo, skb, 0);
447 450
448 dump_packet(loginfo, skb, 0); 451 sb_close(m);
449 printk("\n");
450 spin_unlock_bh(&log_lock);
451} 452}
452 453
453static unsigned int 454static unsigned int
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index c31b87668250..0f23b3f06df0 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -44,9 +44,16 @@ static unsigned int help(struct sk_buff *skb,
44 44
45 /* Try to get same port: if not, try to change it. */ 45 /* Try to get same port: if not, try to change it. */
46 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { 46 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
47 int ret;
48
47 exp->tuple.dst.u.tcp.port = htons(port); 49 exp->tuple.dst.u.tcp.port = htons(port);
48 if (nf_ct_expect_related(exp) == 0) 50 ret = nf_ct_expect_related(exp);
51 if (ret == 0)
52 break;
53 else if (ret != -EBUSY) {
54 port = 0;
49 break; 55 break;
56 }
50 } 57 }
51 58
52 if (port == 0) 59 if (port == 0)
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 8c8632d9b93c..0047923c1f22 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -47,7 +47,7 @@ __nf_nat_proto_find(u_int8_t protonum)
47 return rcu_dereference(nf_nat_protos[protonum]); 47 return rcu_dereference(nf_nat_protos[protonum]);
48} 48}
49 49
50const struct nf_nat_protocol * 50static const struct nf_nat_protocol *
51nf_nat_proto_find_get(u_int8_t protonum) 51nf_nat_proto_find_get(u_int8_t protonum)
52{ 52{
53 const struct nf_nat_protocol *p; 53 const struct nf_nat_protocol *p;
@@ -60,14 +60,12 @@ nf_nat_proto_find_get(u_int8_t protonum)
60 60
61 return p; 61 return p;
62} 62}
63EXPORT_SYMBOL_GPL(nf_nat_proto_find_get);
64 63
65void 64static void
66nf_nat_proto_put(const struct nf_nat_protocol *p) 65nf_nat_proto_put(const struct nf_nat_protocol *p)
67{ 66{
68 module_put(p->me); 67 module_put(p->me);
69} 68}
70EXPORT_SYMBOL_GPL(nf_nat_proto_put);
71 69
72/* We keep an extra hash for each conntrack, for fast searching. */ 70/* We keep an extra hash for each conntrack, for fast searching. */
73static inline unsigned int 71static inline unsigned int
@@ -262,11 +260,17 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
262 proto = __nf_nat_proto_find(orig_tuple->dst.protonum); 260 proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
263 261
264 /* Only bother mapping if it's not already in range and unique */ 262 /* Only bother mapping if it's not already in range and unique */
265 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM) && 263 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
266 (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || 264 if (range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) {
267 proto->in_range(tuple, maniptype, &range->min, &range->max)) && 265 if (proto->in_range(tuple, maniptype, &range->min,
268 !nf_nat_used_tuple(tuple, ct)) 266 &range->max) &&
269 goto out; 267 (range->min.all == range->max.all ||
268 !nf_nat_used_tuple(tuple, ct)))
269 goto out;
270 } else if (!nf_nat_used_tuple(tuple, ct)) {
271 goto out;
272 }
273 }
270 274
271 /* Last change: get protocol to try to obtain unique tuple. */ 275 /* Last change: get protocol to try to obtain unique tuple. */
272 proto->unique_tuple(tuple, range, maniptype, ct); 276 proto->unique_tuple(tuple, range, maniptype, ct);
@@ -458,6 +462,18 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
458 return 0; 462 return 0;
459 } 463 }
460 464
465 if (manip == IP_NAT_MANIP_SRC)
466 statusbit = IPS_SRC_NAT;
467 else
468 statusbit = IPS_DST_NAT;
469
470 /* Invert if this is reply dir. */
471 if (dir == IP_CT_DIR_REPLY)
472 statusbit ^= IPS_NAT_MASK;
473
474 if (!(ct->status & statusbit))
475 return 1;
476
461 pr_debug("icmp_reply_translation: translating error %p manip %u " 477 pr_debug("icmp_reply_translation: translating error %p manip %u "
462 "dir %s\n", skb, manip, 478 "dir %s\n", skb, manip,
463 dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); 479 dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
@@ -492,20 +508,9 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
492 508
493 /* Change outer to look the reply to an incoming packet 509 /* Change outer to look the reply to an incoming packet
494 * (proto 0 means don't invert per-proto part). */ 510 * (proto 0 means don't invert per-proto part). */
495 if (manip == IP_NAT_MANIP_SRC) 511 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
496 statusbit = IPS_SRC_NAT; 512 if (!manip_pkt(0, skb, 0, &target, manip))
497 else 513 return 0;
498 statusbit = IPS_DST_NAT;
499
500 /* Invert if this is reply dir. */
501 if (dir == IP_CT_DIR_REPLY)
502 statusbit ^= IPS_NAT_MASK;
503
504 if (ct->status & statusbit) {
505 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
506 if (!manip_pkt(0, skb, 0, &target, manip))
507 return 0;
508 }
509 514
510 return 1; 515 return 1;
511} 516}
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index 86e0e84ff0a0..dc73abb3fe27 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -79,9 +79,16 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
79 79
80 /* Try to get same port: if not, try to change it. */ 80 /* Try to get same port: if not, try to change it. */
81 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { 81 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
82 int ret;
83
82 exp->tuple.dst.u.tcp.port = htons(port); 84 exp->tuple.dst.u.tcp.port = htons(port);
83 if (nf_ct_expect_related(exp) == 0) 85 ret = nf_ct_expect_related(exp);
86 if (ret == 0)
87 break;
88 else if (ret != -EBUSY) {
89 port = 0;
84 break; 90 break;
91 }
85 } 92 }
86 93
87 if (port == 0) 94 if (port == 0)
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 5045196d853c..790f3160e012 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -222,13 +222,24 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
222 /* Try to get a pair of ports. */ 222 /* Try to get a pair of ports. */
223 for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port); 223 for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port);
224 nated_port != 0; nated_port += 2) { 224 nated_port != 0; nated_port += 2) {
225 int ret;
226
225 rtp_exp->tuple.dst.u.udp.port = htons(nated_port); 227 rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
226 if (nf_ct_expect_related(rtp_exp) == 0) { 228 ret = nf_ct_expect_related(rtp_exp);
229 if (ret == 0) {
227 rtcp_exp->tuple.dst.u.udp.port = 230 rtcp_exp->tuple.dst.u.udp.port =
228 htons(nated_port + 1); 231 htons(nated_port + 1);
229 if (nf_ct_expect_related(rtcp_exp) == 0) 232 ret = nf_ct_expect_related(rtcp_exp);
233 if (ret == 0)
234 break;
235 else if (ret != -EBUSY) {
236 nf_ct_unexpect_related(rtp_exp);
237 nated_port = 0;
230 break; 238 break;
231 nf_ct_unexpect_related(rtp_exp); 239 }
240 } else if (ret != -EBUSY) {
241 nated_port = 0;
242 break;
232 } 243 }
233 } 244 }
234 245
@@ -284,9 +295,16 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
284 295
285 /* Try to get same port: if not, try to change it. */ 296 /* Try to get same port: if not, try to change it. */
286 for (; nated_port != 0; nated_port++) { 297 for (; nated_port != 0; nated_port++) {
298 int ret;
299
287 exp->tuple.dst.u.tcp.port = htons(nated_port); 300 exp->tuple.dst.u.tcp.port = htons(nated_port);
288 if (nf_ct_expect_related(exp) == 0) 301 ret = nf_ct_expect_related(exp);
302 if (ret == 0)
303 break;
304 else if (ret != -EBUSY) {
305 nated_port = 0;
289 break; 306 break;
307 }
290 } 308 }
291 309
292 if (nated_port == 0) { /* No port available */ 310 if (nated_port == 0) { /* No port available */
@@ -334,9 +352,16 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
334 352
335 /* Try to get same port: if not, try to change it. */ 353 /* Try to get same port: if not, try to change it. */
336 for (; nated_port != 0; nated_port++) { 354 for (; nated_port != 0; nated_port++) {
355 int ret;
356
337 exp->tuple.dst.u.tcp.port = htons(nated_port); 357 exp->tuple.dst.u.tcp.port = htons(nated_port);
338 if (nf_ct_expect_related(exp) == 0) 358 ret = nf_ct_expect_related(exp);
359 if (ret == 0)
339 break; 360 break;
361 else if (ret != -EBUSY) {
362 nated_port = 0;
363 break;
364 }
340 } 365 }
341 366
342 if (nated_port == 0) { /* No port available */ 367 if (nated_port == 0) { /* No port available */
@@ -418,9 +443,16 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
418 443
419 /* Try to get same port: if not, try to change it. */ 444 /* Try to get same port: if not, try to change it. */
420 for (; nated_port != 0; nated_port++) { 445 for (; nated_port != 0; nated_port++) {
446 int ret;
447
421 exp->tuple.dst.u.tcp.port = htons(nated_port); 448 exp->tuple.dst.u.tcp.port = htons(nated_port);
422 if (nf_ct_expect_related(exp) == 0) 449 ret = nf_ct_expect_related(exp);
450 if (ret == 0)
451 break;
452 else if (ret != -EBUSY) {
453 nated_port = 0;
423 break; 454 break;
455 }
424 } 456 }
425 457
426 if (nated_port == 0) { /* No port available */ 458 if (nated_port == 0) { /* No port available */
@@ -500,9 +532,16 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
500 532
501 /* Try to get same port: if not, try to change it. */ 533 /* Try to get same port: if not, try to change it. */
502 for (nated_port = ntohs(port); nated_port != 0; nated_port++) { 534 for (nated_port = ntohs(port); nated_port != 0; nated_port++) {
535 int ret;
536
503 exp->tuple.dst.u.tcp.port = htons(nated_port); 537 exp->tuple.dst.u.tcp.port = htons(nated_port);
504 if (nf_ct_expect_related(exp) == 0) 538 ret = nf_ct_expect_related(exp);
539 if (ret == 0)
505 break; 540 break;
541 else if (ret != -EBUSY) {
542 nated_port = 0;
543 break;
544 }
506 } 545 }
507 546
508 if (nated_port == 0) { /* No port available */ 547 if (nated_port == 0) { /* No port available */
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 4a0c6b548eee..31427fb57aa8 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -153,6 +153,35 @@ void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
153} 153}
154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); 154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
155 155
156static void nf_nat_csum(struct sk_buff *skb, struct iphdr *iph, void *data,
157 int datalen, __sum16 *check, int oldlen)
158{
159 struct rtable *rt = skb_rtable(skb);
160
161 if (skb->ip_summed != CHECKSUM_PARTIAL) {
162 if (!(rt->rt_flags & RTCF_LOCAL) &&
163 skb->dev->features & NETIF_F_V4_CSUM) {
164 skb->ip_summed = CHECKSUM_PARTIAL;
165 skb->csum_start = skb_headroom(skb) +
166 skb_network_offset(skb) +
167 iph->ihl * 4;
168 skb->csum_offset = (void *)check - data;
169 *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
170 datalen, iph->protocol, 0);
171 } else {
172 *check = 0;
173 *check = csum_tcpudp_magic(iph->saddr, iph->daddr,
174 datalen, iph->protocol,
175 csum_partial(data, datalen,
176 0));
177 if (iph->protocol == IPPROTO_UDP && !*check)
178 *check = CSUM_MANGLED_0;
179 }
180 } else
181 inet_proto_csum_replace2(check, skb,
182 htons(oldlen), htons(datalen), 1);
183}
184
156/* Generic function for mangling variable-length address changes inside 185/* Generic function for mangling variable-length address changes inside
157 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX 186 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
158 * command in FTP). 187 * command in FTP).
@@ -169,7 +198,6 @@ int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
169 const char *rep_buffer, 198 const char *rep_buffer,
170 unsigned int rep_len, bool adjust) 199 unsigned int rep_len, bool adjust)
171{ 200{
172 struct rtable *rt = skb_rtable(skb);
173 struct iphdr *iph; 201 struct iphdr *iph;
174 struct tcphdr *tcph; 202 struct tcphdr *tcph;
175 int oldlen, datalen; 203 int oldlen, datalen;
@@ -192,26 +220,7 @@ int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
192 match_offset, match_len, rep_buffer, rep_len); 220 match_offset, match_len, rep_buffer, rep_len);
193 221
194 datalen = skb->len - iph->ihl*4; 222 datalen = skb->len - iph->ihl*4;
195 if (skb->ip_summed != CHECKSUM_PARTIAL) { 223 nf_nat_csum(skb, iph, tcph, datalen, &tcph->check, oldlen);
196 if (!(rt->rt_flags & RTCF_LOCAL) &&
197 skb->dev->features & NETIF_F_V4_CSUM) {
198 skb->ip_summed = CHECKSUM_PARTIAL;
199 skb->csum_start = skb_headroom(skb) +
200 skb_network_offset(skb) +
201 iph->ihl * 4;
202 skb->csum_offset = offsetof(struct tcphdr, check);
203 tcph->check = ~tcp_v4_check(datalen,
204 iph->saddr, iph->daddr, 0);
205 } else {
206 tcph->check = 0;
207 tcph->check = tcp_v4_check(datalen,
208 iph->saddr, iph->daddr,
209 csum_partial(tcph,
210 datalen, 0));
211 }
212 } else
213 inet_proto_csum_replace2(&tcph->check, skb,
214 htons(oldlen), htons(datalen), 1);
215 224
216 if (adjust && rep_len != match_len) 225 if (adjust && rep_len != match_len)
217 nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, 226 nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq,
@@ -240,7 +249,6 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
240 const char *rep_buffer, 249 const char *rep_buffer,
241 unsigned int rep_len) 250 unsigned int rep_len)
242{ 251{
243 struct rtable *rt = skb_rtable(skb);
244 struct iphdr *iph; 252 struct iphdr *iph;
245 struct udphdr *udph; 253 struct udphdr *udph;
246 int datalen, oldlen; 254 int datalen, oldlen;
@@ -274,29 +282,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
274 if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) 282 if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
275 return 1; 283 return 1;
276 284
277 if (skb->ip_summed != CHECKSUM_PARTIAL) { 285 nf_nat_csum(skb, iph, udph, datalen, &udph->check, oldlen);
278 if (!(rt->rt_flags & RTCF_LOCAL) &&
279 skb->dev->features & NETIF_F_V4_CSUM) {
280 skb->ip_summed = CHECKSUM_PARTIAL;
281 skb->csum_start = skb_headroom(skb) +
282 skb_network_offset(skb) +
283 iph->ihl * 4;
284 skb->csum_offset = offsetof(struct udphdr, check);
285 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
286 datalen, IPPROTO_UDP,
287 0);
288 } else {
289 udph->check = 0;
290 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
291 datalen, IPPROTO_UDP,
292 csum_partial(udph,
293 datalen, 0));
294 if (!udph->check)
295 udph->check = CSUM_MANGLED_0;
296 }
297 } else
298 inet_proto_csum_replace2(&udph->check, skb,
299 htons(oldlen), htons(datalen), 1);
300 286
301 return 1; 287 return 1;
302} 288}
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
index ea83a886b03e..535e1a802356 100644
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -45,9 +45,16 @@ static unsigned int help(struct sk_buff *skb,
45 45
46 /* Try to get same port: if not, try to change it. */ 46 /* Try to get same port: if not, try to change it. */
47 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { 47 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
48 int ret;
49
48 exp->tuple.dst.u.tcp.port = htons(port); 50 exp->tuple.dst.u.tcp.port = htons(port);
49 if (nf_ct_expect_related(exp) == 0) 51 ret = nf_ct_expect_related(exp);
52 if (ret == 0)
53 break;
54 else if (ret != -EBUSY) {
55 port = 0;
50 break; 56 break;
57 }
51 } 58 }
52 59
53 if (port == 0) 60 if (port == 0)
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index ebbd319f62f5..21c30426480b 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -106,16 +106,15 @@ alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
106{ 106{
107 /* Force range to this IP; let proto decide mapping for 107 /* Force range to this IP; let proto decide mapping for
108 per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). 108 per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
109 Use reply in case it's already been mangled (eg local packet).
110 */ 109 */
111 __be32 ip 110 struct nf_nat_range range;
112 = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC 111
113 ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip 112 range.flags = 0;
114 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); 113 pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
115 struct nf_nat_range range 114 HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ?
116 = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } }; 115 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
117 116 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
118 pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, &ip); 117
119 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); 118 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
120} 119}
121 120
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 11b538deaaec..e40cf7816fdb 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -307,9 +307,16 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff,
307 exp->expectfn = ip_nat_sip_expected; 307 exp->expectfn = ip_nat_sip_expected;
308 308
309 for (; port != 0; port++) { 309 for (; port != 0; port++) {
310 int ret;
311
310 exp->tuple.dst.u.udp.port = htons(port); 312 exp->tuple.dst.u.udp.port = htons(port);
311 if (nf_ct_expect_related(exp) == 0) 313 ret = nf_ct_expect_related(exp);
314 if (ret == 0)
315 break;
316 else if (ret != -EBUSY) {
317 port = 0;
312 break; 318 break;
319 }
313 } 320 }
314 321
315 if (port == 0) 322 if (port == 0)
@@ -480,13 +487,25 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
480 /* Try to get same pair of ports: if not, try to change them. */ 487 /* Try to get same pair of ports: if not, try to change them. */
481 for (port = ntohs(rtp_exp->tuple.dst.u.udp.port); 488 for (port = ntohs(rtp_exp->tuple.dst.u.udp.port);
482 port != 0; port += 2) { 489 port != 0; port += 2) {
490 int ret;
491
483 rtp_exp->tuple.dst.u.udp.port = htons(port); 492 rtp_exp->tuple.dst.u.udp.port = htons(port);
484 if (nf_ct_expect_related(rtp_exp) != 0) 493 ret = nf_ct_expect_related(rtp_exp);
494 if (ret == -EBUSY)
485 continue; 495 continue;
496 else if (ret < 0) {
497 port = 0;
498 break;
499 }
486 rtcp_exp->tuple.dst.u.udp.port = htons(port + 1); 500 rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
487 if (nf_ct_expect_related(rtcp_exp) == 0) 501 ret = nf_ct_expect_related(rtcp_exp);
502 if (ret == 0)
488 break; 503 break;
489 nf_ct_unexpect_related(rtp_exp); 504 else if (ret != -EBUSY) {
505 nf_ct_unexpect_related(rtp_exp);
506 port = 0;
507 break;
508 }
490 } 509 }
491 510
492 if (port == 0) 511 if (port == 0)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a0232f3a358b..8f8527d41682 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1422,7 +1422,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1422 1422
1423 newsk = tcp_create_openreq_child(sk, req, skb); 1423 newsk = tcp_create_openreq_child(sk, req, skb);
1424 if (!newsk) 1424 if (!newsk)
1425 goto exit; 1425 goto exit_nonewsk;
1426 1426
1427 newsk->sk_gso_type = SKB_GSO_TCPV4; 1427 newsk->sk_gso_type = SKB_GSO_TCPV4;
1428 sk_setup_caps(newsk, dst); 1428 sk_setup_caps(newsk, dst);
@@ -1469,16 +1469,20 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1469 } 1469 }
1470#endif 1470#endif
1471 1471
1472 if (__inet_inherit_port(sk, newsk) < 0) {
1473 sock_put(newsk);
1474 goto exit;
1475 }
1472 __inet_hash_nolisten(newsk, NULL); 1476 __inet_hash_nolisten(newsk, NULL);
1473 __inet_inherit_port(sk, newsk);
1474 1477
1475 return newsk; 1478 return newsk;
1476 1479
1477exit_overflow: 1480exit_overflow:
1478 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1481 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1482exit_nonewsk:
1483 dst_release(dst);
1479exit: 1484exit:
1480 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 1485 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1481 dst_release(dst);
1482 return NULL; 1486 return NULL;
1483} 1487}
1484EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 1488EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 60220985bb80..54e8e42f7a88 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -343,7 +343,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
343 */ 343 */
344 v4addr = LOOPBACK4_IPV6; 344 v4addr = LOOPBACK4_IPV6;
345 if (!(addr_type & IPV6_ADDR_MULTICAST)) { 345 if (!(addr_type & IPV6_ADDR_MULTICAST)) {
346 if (!ipv6_chk_addr(net, &addr->sin6_addr, 346 if (!inet->transparent &&
347 !ipv6_chk_addr(net, &addr->sin6_addr,
347 dev, 0)) { 348 dev, 0)) {
348 err = -EADDRNOTAVAIL; 349 err = -EADDRNOTAVAIL;
349 goto out_unlock; 350 goto out_unlock;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index ef371aa01ac5..320bdb877eed 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -577,6 +577,25 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
577 u8 *ptr = nh + opt->dst1; 577 u8 *ptr = nh + opt->dst1;
578 put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); 578 put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
579 } 579 }
580 if (np->rxopt.bits.rxorigdstaddr) {
581 struct sockaddr_in6 sin6;
582 u16 *ports = (u16 *) skb_transport_header(skb);
583
584 if (skb_transport_offset(skb) + 4 <= skb->len) {
585 /* All current transport protocols have the port numbers in the
586 * first four bytes of the transport header and this function is
587 * written with this assumption in mind.
588 */
589
590 sin6.sin6_family = AF_INET6;
591 ipv6_addr_copy(&sin6.sin6_addr, &ipv6_hdr(skb)->daddr);
592 sin6.sin6_port = ports[1];
593 sin6.sin6_flowinfo = 0;
594 sin6.sin6_scope_id = 0;
595
596 put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
597 }
598 }
580 return 0; 599 return 0;
581} 600}
582 601
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a7f66bc8f0b0..0553867a317f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -342,6 +342,21 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
342 retv = 0; 342 retv = 0;
343 break; 343 break;
344 344
345 case IPV6_TRANSPARENT:
346 if (optlen < sizeof(int))
347 goto e_inval;
348 /* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */
349 inet_sk(sk)->transparent = valbool;
350 retv = 0;
351 break;
352
353 case IPV6_RECVORIGDSTADDR:
354 if (optlen < sizeof(int))
355 goto e_inval;
356 np->rxopt.bits.rxorigdstaddr = valbool;
357 retv = 0;
358 break;
359
345 case IPV6_HOPOPTS: 360 case IPV6_HOPOPTS:
346 case IPV6_RTHDRDSTOPTS: 361 case IPV6_RTHDRDSTOPTS:
347 case IPV6_RTHDR: 362 case IPV6_RTHDR:
@@ -1104,6 +1119,14 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
1104 break; 1119 break;
1105 } 1120 }
1106 1121
1122 case IPV6_TRANSPARENT:
1123 val = inet_sk(sk)->transparent;
1124 break;
1125
1126 case IPV6_RECVORIGDSTADDR:
1127 val = np->rxopt.bits.rxorigdstaddr;
1128 break;
1129
1107 case IPV6_UNICAST_HOPS: 1130 case IPV6_UNICAST_HOPS:
1108 case IPV6_MULTICAST_HOPS: 1131 case IPV6_MULTICAST_HOPS:
1109 { 1132 {
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 29d643bcafa4..44d2eeac089b 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -132,10 +132,10 @@ config IP6_NF_MATCH_RT
132# The targets 132# The targets
133config IP6_NF_TARGET_HL 133config IP6_NF_TARGET_HL
134 tristate '"HL" hoplimit target support' 134 tristate '"HL" hoplimit target support'
135 depends on NETFILTER_ADVANCED 135 depends on NETFILTER_ADVANCED && IP6_NF_MANGLE
136 select NETFILTER_XT_TARGET_HL 136 select NETFILTER_XT_TARGET_HL
137 ---help--- 137 ---help---
138 This is a backwards-compat option for the user's convenience 138 This is a backwards-compatible option for the user's convenience
139 (e.g. when running oldconfig). It selects 139 (e.g. when running oldconfig). It selects
140 CONFIG_NETFILTER_XT_TARGET_HL. 140 CONFIG_NETFILTER_XT_TARGET_HL.
141 141
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index aafbba30c899..3f8e4a3d83ce 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -11,10 +11,11 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
11obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o 11obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
12 12
13# objects for l3 independent conntrack 13# objects for l3 independent conntrack
14nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o 14nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
15nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
15 16
16# l3 independent conntrack 17# l3 independent conntrack
17obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o 18obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
18 19
19# matches 20# matches
20obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o 21obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 6b331e9b5706..51df035897e7 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -215,7 +215,7 @@ static inline bool unconditional(const struct ip6t_ip6 *ipv6)
215 return memcmp(ipv6, &uncond, sizeof(uncond)) == 0; 215 return memcmp(ipv6, &uncond, sizeof(uncond)) == 0;
216} 216}
217 217
218static inline const struct ip6t_entry_target * 218static inline const struct xt_entry_target *
219ip6t_get_target_c(const struct ip6t_entry *e) 219ip6t_get_target_c(const struct ip6t_entry *e)
220{ 220{
221 return ip6t_get_target((struct ip6t_entry *)e); 221 return ip6t_get_target((struct ip6t_entry *)e);
@@ -260,9 +260,9 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
260 const char *hookname, const char **chainname, 260 const char *hookname, const char **chainname,
261 const char **comment, unsigned int *rulenum) 261 const char **comment, unsigned int *rulenum)
262{ 262{
263 const struct ip6t_standard_target *t = (void *)ip6t_get_target_c(s); 263 const struct xt_standard_target *t = (void *)ip6t_get_target_c(s);
264 264
265 if (strcmp(t->target.u.kernel.target->name, IP6T_ERROR_TARGET) == 0) { 265 if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) {
266 /* Head of user chain: ERROR target with chainname */ 266 /* Head of user chain: ERROR target with chainname */
267 *chainname = t->target.data; 267 *chainname = t->target.data;
268 (*rulenum) = 0; 268 (*rulenum) = 0;
@@ -271,7 +271,7 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
271 271
272 if (s->target_offset == sizeof(struct ip6t_entry) && 272 if (s->target_offset == sizeof(struct ip6t_entry) &&
273 strcmp(t->target.u.kernel.target->name, 273 strcmp(t->target.u.kernel.target->name,
274 IP6T_STANDARD_TARGET) == 0 && 274 XT_STANDARD_TARGET) == 0 &&
275 t->verdict < 0 && 275 t->verdict < 0 &&
276 unconditional(&s->ipv6)) { 276 unconditional(&s->ipv6)) {
277 /* Tail of chains: STANDARD target (return/policy) */ 277 /* Tail of chains: STANDARD target (return/policy) */
@@ -369,7 +369,7 @@ ip6t_do_table(struct sk_buff *skb,
369 e = get_entry(table_base, private->hook_entry[hook]); 369 e = get_entry(table_base, private->hook_entry[hook]);
370 370
371 do { 371 do {
372 const struct ip6t_entry_target *t; 372 const struct xt_entry_target *t;
373 const struct xt_entry_match *ematch; 373 const struct xt_entry_match *ematch;
374 374
375 IP_NF_ASSERT(e); 375 IP_NF_ASSERT(e);
@@ -403,10 +403,10 @@ ip6t_do_table(struct sk_buff *skb,
403 if (!t->u.kernel.target->target) { 403 if (!t->u.kernel.target->target) {
404 int v; 404 int v;
405 405
406 v = ((struct ip6t_standard_target *)t)->verdict; 406 v = ((struct xt_standard_target *)t)->verdict;
407 if (v < 0) { 407 if (v < 0) {
408 /* Pop from stack? */ 408 /* Pop from stack? */
409 if (v != IP6T_RETURN) { 409 if (v != XT_RETURN) {
410 verdict = (unsigned)(-v) - 1; 410 verdict = (unsigned)(-v) - 1;
411 break; 411 break;
412 } 412 }
@@ -434,7 +434,7 @@ ip6t_do_table(struct sk_buff *skb,
434 acpar.targinfo = t->data; 434 acpar.targinfo = t->data;
435 435
436 verdict = t->u.kernel.target->target(skb, &acpar); 436 verdict = t->u.kernel.target->target(skb, &acpar);
437 if (verdict == IP6T_CONTINUE) 437 if (verdict == XT_CONTINUE)
438 e = ip6t_next_entry(e); 438 e = ip6t_next_entry(e);
439 else 439 else
440 /* Verdict */ 440 /* Verdict */
@@ -474,7 +474,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
474 e->counters.pcnt = pos; 474 e->counters.pcnt = pos;
475 475
476 for (;;) { 476 for (;;) {
477 const struct ip6t_standard_target *t 477 const struct xt_standard_target *t
478 = (void *)ip6t_get_target_c(e); 478 = (void *)ip6t_get_target_c(e);
479 int visited = e->comefrom & (1 << hook); 479 int visited = e->comefrom & (1 << hook);
480 480
@@ -488,13 +488,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
488 /* Unconditional return/END. */ 488 /* Unconditional return/END. */
489 if ((e->target_offset == sizeof(struct ip6t_entry) && 489 if ((e->target_offset == sizeof(struct ip6t_entry) &&
490 (strcmp(t->target.u.user.name, 490 (strcmp(t->target.u.user.name,
491 IP6T_STANDARD_TARGET) == 0) && 491 XT_STANDARD_TARGET) == 0) &&
492 t->verdict < 0 && 492 t->verdict < 0 &&
493 unconditional(&e->ipv6)) || visited) { 493 unconditional(&e->ipv6)) || visited) {
494 unsigned int oldpos, size; 494 unsigned int oldpos, size;
495 495
496 if ((strcmp(t->target.u.user.name, 496 if ((strcmp(t->target.u.user.name,
497 IP6T_STANDARD_TARGET) == 0) && 497 XT_STANDARD_TARGET) == 0) &&
498 t->verdict < -NF_MAX_VERDICT - 1) { 498 t->verdict < -NF_MAX_VERDICT - 1) {
499 duprintf("mark_source_chains: bad " 499 duprintf("mark_source_chains: bad "
500 "negative verdict (%i)\n", 500 "negative verdict (%i)\n",
@@ -537,7 +537,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
537 int newpos = t->verdict; 537 int newpos = t->verdict;
538 538
539 if (strcmp(t->target.u.user.name, 539 if (strcmp(t->target.u.user.name,
540 IP6T_STANDARD_TARGET) == 0 && 540 XT_STANDARD_TARGET) == 0 &&
541 newpos >= 0) { 541 newpos >= 0) {
542 if (newpos > newinfo->size - 542 if (newpos > newinfo->size -
543 sizeof(struct ip6t_entry)) { 543 sizeof(struct ip6t_entry)) {
@@ -565,7 +565,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
565 return 1; 565 return 1;
566} 566}
567 567
568static void cleanup_match(struct ip6t_entry_match *m, struct net *net) 568static void cleanup_match(struct xt_entry_match *m, struct net *net)
569{ 569{
570 struct xt_mtdtor_param par; 570 struct xt_mtdtor_param par;
571 571
@@ -581,14 +581,14 @@ static void cleanup_match(struct ip6t_entry_match *m, struct net *net)
581static int 581static int
582check_entry(const struct ip6t_entry *e, const char *name) 582check_entry(const struct ip6t_entry *e, const char *name)
583{ 583{
584 const struct ip6t_entry_target *t; 584 const struct xt_entry_target *t;
585 585
586 if (!ip6_checkentry(&e->ipv6)) { 586 if (!ip6_checkentry(&e->ipv6)) {
587 duprintf("ip_tables: ip check failed %p %s.\n", e, name); 587 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
588 return -EINVAL; 588 return -EINVAL;
589 } 589 }
590 590
591 if (e->target_offset + sizeof(struct ip6t_entry_target) > 591 if (e->target_offset + sizeof(struct xt_entry_target) >
592 e->next_offset) 592 e->next_offset)
593 return -EINVAL; 593 return -EINVAL;
594 594
@@ -599,7 +599,7 @@ check_entry(const struct ip6t_entry *e, const char *name)
599 return 0; 599 return 0;
600} 600}
601 601
602static int check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par) 602static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
603{ 603{
604 const struct ip6t_ip6 *ipv6 = par->entryinfo; 604 const struct ip6t_ip6 *ipv6 = par->entryinfo;
605 int ret; 605 int ret;
@@ -618,7 +618,7 @@ static int check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par)
618} 618}
619 619
620static int 620static int
621find_check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par) 621find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
622{ 622{
623 struct xt_match *match; 623 struct xt_match *match;
624 int ret; 624 int ret;
@@ -643,7 +643,7 @@ err:
643 643
644static int check_target(struct ip6t_entry *e, struct net *net, const char *name) 644static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
645{ 645{
646 struct ip6t_entry_target *t = ip6t_get_target(e); 646 struct xt_entry_target *t = ip6t_get_target(e);
647 struct xt_tgchk_param par = { 647 struct xt_tgchk_param par = {
648 .net = net, 648 .net = net,
649 .table = name, 649 .table = name,
@@ -670,7 +670,7 @@ static int
670find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, 670find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
671 unsigned int size) 671 unsigned int size)
672{ 672{
673 struct ip6t_entry_target *t; 673 struct xt_entry_target *t;
674 struct xt_target *target; 674 struct xt_target *target;
675 int ret; 675 int ret;
676 unsigned int j; 676 unsigned int j;
@@ -721,7 +721,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
721 721
722static bool check_underflow(const struct ip6t_entry *e) 722static bool check_underflow(const struct ip6t_entry *e)
723{ 723{
724 const struct ip6t_entry_target *t; 724 const struct xt_entry_target *t;
725 unsigned int verdict; 725 unsigned int verdict;
726 726
727 if (!unconditional(&e->ipv6)) 727 if (!unconditional(&e->ipv6))
@@ -729,7 +729,7 @@ static bool check_underflow(const struct ip6t_entry *e)
729 t = ip6t_get_target_c(e); 729 t = ip6t_get_target_c(e);
730 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) 730 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
731 return false; 731 return false;
732 verdict = ((struct ip6t_standard_target *)t)->verdict; 732 verdict = ((struct xt_standard_target *)t)->verdict;
733 verdict = -verdict - 1; 733 verdict = -verdict - 1;
734 return verdict == NF_DROP || verdict == NF_ACCEPT; 734 return verdict == NF_DROP || verdict == NF_ACCEPT;
735} 735}
@@ -752,7 +752,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
752 } 752 }
753 753
754 if (e->next_offset 754 if (e->next_offset
755 < sizeof(struct ip6t_entry) + sizeof(struct ip6t_entry_target)) { 755 < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target)) {
756 duprintf("checking: element %p size %u\n", 756 duprintf("checking: element %p size %u\n",
757 e, e->next_offset); 757 e, e->next_offset);
758 return -EINVAL; 758 return -EINVAL;
@@ -784,7 +784,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
784static void cleanup_entry(struct ip6t_entry *e, struct net *net) 784static void cleanup_entry(struct ip6t_entry *e, struct net *net)
785{ 785{
786 struct xt_tgdtor_param par; 786 struct xt_tgdtor_param par;
787 struct ip6t_entry_target *t; 787 struct xt_entry_target *t;
788 struct xt_entry_match *ematch; 788 struct xt_entry_match *ematch;
789 789
790 /* Cleanup all matches */ 790 /* Cleanup all matches */
@@ -985,8 +985,8 @@ copy_entries_to_user(unsigned int total_size,
985 /* ... then go back and fix counters and names */ 985 /* ... then go back and fix counters and names */
986 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ 986 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
987 unsigned int i; 987 unsigned int i;
988 const struct ip6t_entry_match *m; 988 const struct xt_entry_match *m;
989 const struct ip6t_entry_target *t; 989 const struct xt_entry_target *t;
990 990
991 e = (struct ip6t_entry *)(loc_cpu_entry + off); 991 e = (struct ip6t_entry *)(loc_cpu_entry + off);
992 if (copy_to_user(userptr + off 992 if (copy_to_user(userptr + off
@@ -1003,7 +1003,7 @@ copy_entries_to_user(unsigned int total_size,
1003 m = (void *)e + i; 1003 m = (void *)e + i;
1004 1004
1005 if (copy_to_user(userptr + off + i 1005 if (copy_to_user(userptr + off + i
1006 + offsetof(struct ip6t_entry_match, 1006 + offsetof(struct xt_entry_match,
1007 u.user.name), 1007 u.user.name),
1008 m->u.kernel.match->name, 1008 m->u.kernel.match->name,
1009 strlen(m->u.kernel.match->name)+1) 1009 strlen(m->u.kernel.match->name)+1)
@@ -1015,7 +1015,7 @@ copy_entries_to_user(unsigned int total_size,
1015 1015
1016 t = ip6t_get_target_c(e); 1016 t = ip6t_get_target_c(e);
1017 if (copy_to_user(userptr + off + e->target_offset 1017 if (copy_to_user(userptr + off + e->target_offset
1018 + offsetof(struct ip6t_entry_target, 1018 + offsetof(struct xt_entry_target,
1019 u.user.name), 1019 u.user.name),
1020 t->u.kernel.target->name, 1020 t->u.kernel.target->name,
1021 strlen(t->u.kernel.target->name)+1) != 0) { 1021 strlen(t->u.kernel.target->name)+1) != 0) {
@@ -1053,7 +1053,7 @@ static int compat_calc_entry(const struct ip6t_entry *e,
1053 const void *base, struct xt_table_info *newinfo) 1053 const void *base, struct xt_table_info *newinfo)
1054{ 1054{
1055 const struct xt_entry_match *ematch; 1055 const struct xt_entry_match *ematch;
1056 const struct ip6t_entry_target *t; 1056 const struct xt_entry_target *t;
1057 unsigned int entry_offset; 1057 unsigned int entry_offset;
1058 int off, i, ret; 1058 int off, i, ret;
1059 1059
@@ -1105,7 +1105,7 @@ static int compat_table_info(const struct xt_table_info *info,
1105static int get_info(struct net *net, void __user *user, 1105static int get_info(struct net *net, void __user *user,
1106 const int *len, int compat) 1106 const int *len, int compat)
1107{ 1107{
1108 char name[IP6T_TABLE_MAXNAMELEN]; 1108 char name[XT_TABLE_MAXNAMELEN];
1109 struct xt_table *t; 1109 struct xt_table *t;
1110 int ret; 1110 int ret;
1111 1111
@@ -1118,7 +1118,7 @@ static int get_info(struct net *net, void __user *user,
1118 if (copy_from_user(name, user, sizeof(name)) != 0) 1118 if (copy_from_user(name, user, sizeof(name)) != 0)
1119 return -EFAULT; 1119 return -EFAULT;
1120 1120
1121 name[IP6T_TABLE_MAXNAMELEN-1] = '\0'; 1121 name[XT_TABLE_MAXNAMELEN-1] = '\0';
1122#ifdef CONFIG_COMPAT 1122#ifdef CONFIG_COMPAT
1123 if (compat) 1123 if (compat)
1124 xt_compat_lock(AF_INET6); 1124 xt_compat_lock(AF_INET6);
@@ -1415,14 +1415,14 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
1415 1415
1416#ifdef CONFIG_COMPAT 1416#ifdef CONFIG_COMPAT
1417struct compat_ip6t_replace { 1417struct compat_ip6t_replace {
1418 char name[IP6T_TABLE_MAXNAMELEN]; 1418 char name[XT_TABLE_MAXNAMELEN];
1419 u32 valid_hooks; 1419 u32 valid_hooks;
1420 u32 num_entries; 1420 u32 num_entries;
1421 u32 size; 1421 u32 size;
1422 u32 hook_entry[NF_INET_NUMHOOKS]; 1422 u32 hook_entry[NF_INET_NUMHOOKS];
1423 u32 underflow[NF_INET_NUMHOOKS]; 1423 u32 underflow[NF_INET_NUMHOOKS];
1424 u32 num_counters; 1424 u32 num_counters;
1425 compat_uptr_t counters; /* struct ip6t_counters * */ 1425 compat_uptr_t counters; /* struct xt_counters * */
1426 struct compat_ip6t_entry entries[0]; 1426 struct compat_ip6t_entry entries[0];
1427}; 1427};
1428 1428
@@ -1431,7 +1431,7 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
1431 unsigned int *size, struct xt_counters *counters, 1431 unsigned int *size, struct xt_counters *counters,
1432 unsigned int i) 1432 unsigned int i)
1433{ 1433{
1434 struct ip6t_entry_target *t; 1434 struct xt_entry_target *t;
1435 struct compat_ip6t_entry __user *ce; 1435 struct compat_ip6t_entry __user *ce;
1436 u_int16_t target_offset, next_offset; 1436 u_int16_t target_offset, next_offset;
1437 compat_uint_t origsize; 1437 compat_uint_t origsize;
@@ -1466,7 +1466,7 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
1466} 1466}
1467 1467
1468static int 1468static int
1469compat_find_calc_match(struct ip6t_entry_match *m, 1469compat_find_calc_match(struct xt_entry_match *m,
1470 const char *name, 1470 const char *name,
1471 const struct ip6t_ip6 *ipv6, 1471 const struct ip6t_ip6 *ipv6,
1472 unsigned int hookmask, 1472 unsigned int hookmask,
@@ -1488,7 +1488,7 @@ compat_find_calc_match(struct ip6t_entry_match *m,
1488 1488
1489static void compat_release_entry(struct compat_ip6t_entry *e) 1489static void compat_release_entry(struct compat_ip6t_entry *e)
1490{ 1490{
1491 struct ip6t_entry_target *t; 1491 struct xt_entry_target *t;
1492 struct xt_entry_match *ematch; 1492 struct xt_entry_match *ematch;
1493 1493
1494 /* Cleanup all matches */ 1494 /* Cleanup all matches */
@@ -1509,7 +1509,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
1509 const char *name) 1509 const char *name)
1510{ 1510{
1511 struct xt_entry_match *ematch; 1511 struct xt_entry_match *ematch;
1512 struct ip6t_entry_target *t; 1512 struct xt_entry_target *t;
1513 struct xt_target *target; 1513 struct xt_target *target;
1514 unsigned int entry_offset; 1514 unsigned int entry_offset;
1515 unsigned int j; 1515 unsigned int j;
@@ -1591,7 +1591,7 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
1591 unsigned int *size, const char *name, 1591 unsigned int *size, const char *name,
1592 struct xt_table_info *newinfo, unsigned char *base) 1592 struct xt_table_info *newinfo, unsigned char *base)
1593{ 1593{
1594 struct ip6t_entry_target *t; 1594 struct xt_entry_target *t;
1595 struct xt_target *target; 1595 struct xt_target *target;
1596 struct ip6t_entry *de; 1596 struct ip6t_entry *de;
1597 unsigned int origsize; 1597 unsigned int origsize;
@@ -1899,7 +1899,7 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
1899} 1899}
1900 1900
1901struct compat_ip6t_get_entries { 1901struct compat_ip6t_get_entries {
1902 char name[IP6T_TABLE_MAXNAMELEN]; 1902 char name[XT_TABLE_MAXNAMELEN];
1903 compat_uint_t size; 1903 compat_uint_t size;
1904 struct compat_ip6t_entry entrytable[0]; 1904 struct compat_ip6t_entry entrytable[0];
1905}; 1905};
@@ -2054,7 +2054,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2054 2054
2055 case IP6T_SO_GET_REVISION_MATCH: 2055 case IP6T_SO_GET_REVISION_MATCH:
2056 case IP6T_SO_GET_REVISION_TARGET: { 2056 case IP6T_SO_GET_REVISION_TARGET: {
2057 struct ip6t_get_revision rev; 2057 struct xt_get_revision rev;
2058 int target; 2058 int target;
2059 2059
2060 if (*len != sizeof(rev)) { 2060 if (*len != sizeof(rev)) {
@@ -2191,7 +2191,7 @@ static int icmp6_checkentry(const struct xt_mtchk_param *par)
2191/* The built-in targets: standard (NULL) and error. */ 2191/* The built-in targets: standard (NULL) and error. */
2192static struct xt_target ip6t_builtin_tg[] __read_mostly = { 2192static struct xt_target ip6t_builtin_tg[] __read_mostly = {
2193 { 2193 {
2194 .name = IP6T_STANDARD_TARGET, 2194 .name = XT_STANDARD_TARGET,
2195 .targetsize = sizeof(int), 2195 .targetsize = sizeof(int),
2196 .family = NFPROTO_IPV6, 2196 .family = NFPROTO_IPV6,
2197#ifdef CONFIG_COMPAT 2197#ifdef CONFIG_COMPAT
@@ -2201,9 +2201,9 @@ static struct xt_target ip6t_builtin_tg[] __read_mostly = {
2201#endif 2201#endif
2202 }, 2202 },
2203 { 2203 {
2204 .name = IP6T_ERROR_TARGET, 2204 .name = XT_ERROR_TARGET,
2205 .target = ip6t_error, 2205 .target = ip6t_error,
2206 .targetsize = IP6T_FUNCTION_MAXNAMELEN, 2206 .targetsize = XT_FUNCTION_MAXNAMELEN,
2207 .family = NFPROTO_IPV6, 2207 .family = NFPROTO_IPV6,
2208 }, 2208 },
2209}; 2209};
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 0a07ae7b933f..09c88891a753 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -23,6 +23,7 @@
23#include <linux/netfilter/x_tables.h> 23#include <linux/netfilter/x_tables.h>
24#include <linux/netfilter_ipv6/ip6_tables.h> 24#include <linux/netfilter_ipv6/ip6_tables.h>
25#include <net/netfilter/nf_log.h> 25#include <net/netfilter/nf_log.h>
26#include <net/netfilter/xt_log.h>
26 27
27MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>"); 28MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>");
28MODULE_DESCRIPTION("Xtables: IPv6 packet logging to syslog"); 29MODULE_DESCRIPTION("Xtables: IPv6 packet logging to syslog");
@@ -32,11 +33,9 @@ struct in_device;
32#include <net/route.h> 33#include <net/route.h>
33#include <linux/netfilter_ipv6/ip6t_LOG.h> 34#include <linux/netfilter_ipv6/ip6t_LOG.h>
34 35
35/* Use lock to serialize, so printks don't overlap */
36static DEFINE_SPINLOCK(log_lock);
37
38/* One level of recursion won't kill us */ 36/* One level of recursion won't kill us */
39static void dump_packet(const struct nf_loginfo *info, 37static void dump_packet(struct sbuff *m,
38 const struct nf_loginfo *info,
40 const struct sk_buff *skb, unsigned int ip6hoff, 39 const struct sk_buff *skb, unsigned int ip6hoff,
41 int recurse) 40 int recurse)
42{ 41{
@@ -55,15 +54,15 @@ static void dump_packet(const struct nf_loginfo *info,
55 54
56 ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); 55 ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
57 if (ih == NULL) { 56 if (ih == NULL) {
58 printk("TRUNCATED"); 57 sb_add(m, "TRUNCATED");
59 return; 58 return;
60 } 59 }
61 60
62 /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ 61 /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */
63 printk("SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); 62 sb_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr);
64 63
65 /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ 64 /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
66 printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", 65 sb_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
67 ntohs(ih->payload_len) + sizeof(struct ipv6hdr), 66 ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
68 (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, 67 (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20,
69 ih->hop_limit, 68 ih->hop_limit,
@@ -78,35 +77,35 @@ static void dump_packet(const struct nf_loginfo *info,
78 77
79 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); 78 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
80 if (hp == NULL) { 79 if (hp == NULL) {
81 printk("TRUNCATED"); 80 sb_add(m, "TRUNCATED");
82 return; 81 return;
83 } 82 }
84 83
85 /* Max length: 48 "OPT (...) " */ 84 /* Max length: 48 "OPT (...) " */
86 if (logflags & IP6T_LOG_IPOPT) 85 if (logflags & IP6T_LOG_IPOPT)
87 printk("OPT ( "); 86 sb_add(m, "OPT ( ");
88 87
89 switch (currenthdr) { 88 switch (currenthdr) {
90 case IPPROTO_FRAGMENT: { 89 case IPPROTO_FRAGMENT: {
91 struct frag_hdr _fhdr; 90 struct frag_hdr _fhdr;
92 const struct frag_hdr *fh; 91 const struct frag_hdr *fh;
93 92
94 printk("FRAG:"); 93 sb_add(m, "FRAG:");
95 fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), 94 fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
96 &_fhdr); 95 &_fhdr);
97 if (fh == NULL) { 96 if (fh == NULL) {
98 printk("TRUNCATED "); 97 sb_add(m, "TRUNCATED ");
99 return; 98 return;
100 } 99 }
101 100
102 /* Max length: 6 "65535 " */ 101 /* Max length: 6 "65535 " */
103 printk("%u ", ntohs(fh->frag_off) & 0xFFF8); 102 sb_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8);
104 103
105 /* Max length: 11 "INCOMPLETE " */ 104 /* Max length: 11 "INCOMPLETE " */
106 if (fh->frag_off & htons(0x0001)) 105 if (fh->frag_off & htons(0x0001))
107 printk("INCOMPLETE "); 106 sb_add(m, "INCOMPLETE ");
108 107
109 printk("ID:%08x ", ntohl(fh->identification)); 108 sb_add(m, "ID:%08x ", ntohl(fh->identification));
110 109
111 if (ntohs(fh->frag_off) & 0xFFF8) 110 if (ntohs(fh->frag_off) & 0xFFF8)
112 fragment = 1; 111 fragment = 1;
@@ -120,7 +119,7 @@ static void dump_packet(const struct nf_loginfo *info,
120 case IPPROTO_HOPOPTS: 119 case IPPROTO_HOPOPTS:
121 if (fragment) { 120 if (fragment) {
122 if (logflags & IP6T_LOG_IPOPT) 121 if (logflags & IP6T_LOG_IPOPT)
123 printk(")"); 122 sb_add(m, ")");
124 return; 123 return;
125 } 124 }
126 hdrlen = ipv6_optlen(hp); 125 hdrlen = ipv6_optlen(hp);
@@ -132,10 +131,10 @@ static void dump_packet(const struct nf_loginfo *info,
132 const struct ip_auth_hdr *ah; 131 const struct ip_auth_hdr *ah;
133 132
134 /* Max length: 3 "AH " */ 133 /* Max length: 3 "AH " */
135 printk("AH "); 134 sb_add(m, "AH ");
136 135
137 if (fragment) { 136 if (fragment) {
138 printk(")"); 137 sb_add(m, ")");
139 return; 138 return;
140 } 139 }
141 140
@@ -146,13 +145,13 @@ static void dump_packet(const struct nf_loginfo *info,
146 * Max length: 26 "INCOMPLETE [65535 145 * Max length: 26 "INCOMPLETE [65535
147 * bytes] )" 146 * bytes] )"
148 */ 147 */
149 printk("INCOMPLETE [%u bytes] )", 148 sb_add(m, "INCOMPLETE [%u bytes] )",
150 skb->len - ptr); 149 skb->len - ptr);
151 return; 150 return;
152 } 151 }
153 152
154 /* Length: 15 "SPI=0xF1234567 */ 153 /* Length: 15 "SPI=0xF1234567 */
155 printk("SPI=0x%x ", ntohl(ah->spi)); 154 sb_add(m, "SPI=0x%x ", ntohl(ah->spi));
156 155
157 } 156 }
158 157
@@ -164,10 +163,10 @@ static void dump_packet(const struct nf_loginfo *info,
164 const struct ip_esp_hdr *eh; 163 const struct ip_esp_hdr *eh;
165 164
166 /* Max length: 4 "ESP " */ 165 /* Max length: 4 "ESP " */
167 printk("ESP "); 166 sb_add(m, "ESP ");
168 167
169 if (fragment) { 168 if (fragment) {
170 printk(")"); 169 sb_add(m, ")");
171 return; 170 return;
172 } 171 }
173 172
@@ -177,23 +176,23 @@ static void dump_packet(const struct nf_loginfo *info,
177 eh = skb_header_pointer(skb, ptr, sizeof(_esph), 176 eh = skb_header_pointer(skb, ptr, sizeof(_esph),
178 &_esph); 177 &_esph);
179 if (eh == NULL) { 178 if (eh == NULL) {
180 printk("INCOMPLETE [%u bytes] )", 179 sb_add(m, "INCOMPLETE [%u bytes] )",
181 skb->len - ptr); 180 skb->len - ptr);
182 return; 181 return;
183 } 182 }
184 183
185 /* Length: 16 "SPI=0xF1234567 )" */ 184 /* Length: 16 "SPI=0xF1234567 )" */
186 printk("SPI=0x%x )", ntohl(eh->spi) ); 185 sb_add(m, "SPI=0x%x )", ntohl(eh->spi) );
187 186
188 } 187 }
189 return; 188 return;
190 default: 189 default:
191 /* Max length: 20 "Unknown Ext Hdr 255" */ 190 /* Max length: 20 "Unknown Ext Hdr 255" */
192 printk("Unknown Ext Hdr %u", currenthdr); 191 sb_add(m, "Unknown Ext Hdr %u", currenthdr);
193 return; 192 return;
194 } 193 }
195 if (logflags & IP6T_LOG_IPOPT) 194 if (logflags & IP6T_LOG_IPOPT)
196 printk(") "); 195 sb_add(m, ") ");
197 196
198 currenthdr = hp->nexthdr; 197 currenthdr = hp->nexthdr;
199 ptr += hdrlen; 198 ptr += hdrlen;
@@ -205,7 +204,7 @@ static void dump_packet(const struct nf_loginfo *info,
205 const struct tcphdr *th; 204 const struct tcphdr *th;
206 205
207 /* Max length: 10 "PROTO=TCP " */ 206 /* Max length: 10 "PROTO=TCP " */
208 printk("PROTO=TCP "); 207 sb_add(m, "PROTO=TCP ");
209 208
210 if (fragment) 209 if (fragment)
211 break; 210 break;
@@ -213,40 +212,40 @@ static void dump_packet(const struct nf_loginfo *info,
213 /* Max length: 25 "INCOMPLETE [65535 bytes] " */ 212 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
214 th = skb_header_pointer(skb, ptr, sizeof(_tcph), &_tcph); 213 th = skb_header_pointer(skb, ptr, sizeof(_tcph), &_tcph);
215 if (th == NULL) { 214 if (th == NULL) {
216 printk("INCOMPLETE [%u bytes] ", skb->len - ptr); 215 sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
217 return; 216 return;
218 } 217 }
219 218
220 /* Max length: 20 "SPT=65535 DPT=65535 " */ 219 /* Max length: 20 "SPT=65535 DPT=65535 " */
221 printk("SPT=%u DPT=%u ", 220 sb_add(m, "SPT=%u DPT=%u ",
222 ntohs(th->source), ntohs(th->dest)); 221 ntohs(th->source), ntohs(th->dest));
223 /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ 222 /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
224 if (logflags & IP6T_LOG_TCPSEQ) 223 if (logflags & IP6T_LOG_TCPSEQ)
225 printk("SEQ=%u ACK=%u ", 224 sb_add(m, "SEQ=%u ACK=%u ",
226 ntohl(th->seq), ntohl(th->ack_seq)); 225 ntohl(th->seq), ntohl(th->ack_seq));
227 /* Max length: 13 "WINDOW=65535 " */ 226 /* Max length: 13 "WINDOW=65535 " */
228 printk("WINDOW=%u ", ntohs(th->window)); 227 sb_add(m, "WINDOW=%u ", ntohs(th->window));
229 /* Max length: 9 "RES=0x3C " */ 228 /* Max length: 9 "RES=0x3C " */
230 printk("RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22)); 229 sb_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
231 /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ 230 /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
232 if (th->cwr) 231 if (th->cwr)
233 printk("CWR "); 232 sb_add(m, "CWR ");
234 if (th->ece) 233 if (th->ece)
235 printk("ECE "); 234 sb_add(m, "ECE ");
236 if (th->urg) 235 if (th->urg)
237 printk("URG "); 236 sb_add(m, "URG ");
238 if (th->ack) 237 if (th->ack)
239 printk("ACK "); 238 sb_add(m, "ACK ");
240 if (th->psh) 239 if (th->psh)
241 printk("PSH "); 240 sb_add(m, "PSH ");
242 if (th->rst) 241 if (th->rst)
243 printk("RST "); 242 sb_add(m, "RST ");
244 if (th->syn) 243 if (th->syn)
245 printk("SYN "); 244 sb_add(m, "SYN ");
246 if (th->fin) 245 if (th->fin)
247 printk("FIN "); 246 sb_add(m, "FIN ");
248 /* Max length: 11 "URGP=65535 " */ 247 /* Max length: 11 "URGP=65535 " */
249 printk("URGP=%u ", ntohs(th->urg_ptr)); 248 sb_add(m, "URGP=%u ", ntohs(th->urg_ptr));
250 249
251 if ((logflags & IP6T_LOG_TCPOPT) && 250 if ((logflags & IP6T_LOG_TCPOPT) &&
252 th->doff * 4 > sizeof(struct tcphdr)) { 251 th->doff * 4 > sizeof(struct tcphdr)) {
@@ -260,15 +259,15 @@ static void dump_packet(const struct nf_loginfo *info,
260 ptr + sizeof(struct tcphdr), 259 ptr + sizeof(struct tcphdr),
261 optsize, _opt); 260 optsize, _opt);
262 if (op == NULL) { 261 if (op == NULL) {
263 printk("OPT (TRUNCATED)"); 262 sb_add(m, "OPT (TRUNCATED)");
264 return; 263 return;
265 } 264 }
266 265
267 /* Max length: 127 "OPT (" 15*4*2chars ") " */ 266 /* Max length: 127 "OPT (" 15*4*2chars ") " */
268 printk("OPT ("); 267 sb_add(m, "OPT (");
269 for (i =0; i < optsize; i++) 268 for (i =0; i < optsize; i++)
270 printk("%02X", op[i]); 269 sb_add(m, "%02X", op[i]);
271 printk(") "); 270 sb_add(m, ") ");
272 } 271 }
273 break; 272 break;
274 } 273 }
@@ -279,9 +278,9 @@ static void dump_packet(const struct nf_loginfo *info,
279 278
280 if (currenthdr == IPPROTO_UDP) 279 if (currenthdr == IPPROTO_UDP)
281 /* Max length: 10 "PROTO=UDP " */ 280 /* Max length: 10 "PROTO=UDP " */
282 printk("PROTO=UDP " ); 281 sb_add(m, "PROTO=UDP " );
283 else /* Max length: 14 "PROTO=UDPLITE " */ 282 else /* Max length: 14 "PROTO=UDPLITE " */
284 printk("PROTO=UDPLITE "); 283 sb_add(m, "PROTO=UDPLITE ");
285 284
286 if (fragment) 285 if (fragment)
287 break; 286 break;
@@ -289,12 +288,12 @@ static void dump_packet(const struct nf_loginfo *info,
289 /* Max length: 25 "INCOMPLETE [65535 bytes] " */ 288 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
290 uh = skb_header_pointer(skb, ptr, sizeof(_udph), &_udph); 289 uh = skb_header_pointer(skb, ptr, sizeof(_udph), &_udph);
291 if (uh == NULL) { 290 if (uh == NULL) {
292 printk("INCOMPLETE [%u bytes] ", skb->len - ptr); 291 sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
293 return; 292 return;
294 } 293 }
295 294
296 /* Max length: 20 "SPT=65535 DPT=65535 " */ 295 /* Max length: 20 "SPT=65535 DPT=65535 " */
297 printk("SPT=%u DPT=%u LEN=%u ", 296 sb_add(m, "SPT=%u DPT=%u LEN=%u ",
298 ntohs(uh->source), ntohs(uh->dest), 297 ntohs(uh->source), ntohs(uh->dest),
299 ntohs(uh->len)); 298 ntohs(uh->len));
300 break; 299 break;
@@ -304,7 +303,7 @@ static void dump_packet(const struct nf_loginfo *info,
304 const struct icmp6hdr *ic; 303 const struct icmp6hdr *ic;
305 304
306 /* Max length: 13 "PROTO=ICMPv6 " */ 305 /* Max length: 13 "PROTO=ICMPv6 " */
307 printk("PROTO=ICMPv6 "); 306 sb_add(m, "PROTO=ICMPv6 ");
308 307
309 if (fragment) 308 if (fragment)
310 break; 309 break;
@@ -312,18 +311,18 @@ static void dump_packet(const struct nf_loginfo *info,
312 /* Max length: 25 "INCOMPLETE [65535 bytes] " */ 311 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
313 ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h); 312 ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h);
314 if (ic == NULL) { 313 if (ic == NULL) {
315 printk("INCOMPLETE [%u bytes] ", skb->len - ptr); 314 sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
316 return; 315 return;
317 } 316 }
318 317
319 /* Max length: 18 "TYPE=255 CODE=255 " */ 318 /* Max length: 18 "TYPE=255 CODE=255 " */
320 printk("TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code); 319 sb_add(m, "TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code);
321 320
322 switch (ic->icmp6_type) { 321 switch (ic->icmp6_type) {
323 case ICMPV6_ECHO_REQUEST: 322 case ICMPV6_ECHO_REQUEST:
324 case ICMPV6_ECHO_REPLY: 323 case ICMPV6_ECHO_REPLY:
325 /* Max length: 19 "ID=65535 SEQ=65535 " */ 324 /* Max length: 19 "ID=65535 SEQ=65535 " */
326 printk("ID=%u SEQ=%u ", 325 sb_add(m, "ID=%u SEQ=%u ",
327 ntohs(ic->icmp6_identifier), 326 ntohs(ic->icmp6_identifier),
328 ntohs(ic->icmp6_sequence)); 327 ntohs(ic->icmp6_sequence));
329 break; 328 break;
@@ -334,35 +333,35 @@ static void dump_packet(const struct nf_loginfo *info,
334 333
335 case ICMPV6_PARAMPROB: 334 case ICMPV6_PARAMPROB:
336 /* Max length: 17 "POINTER=ffffffff " */ 335 /* Max length: 17 "POINTER=ffffffff " */
337 printk("POINTER=%08x ", ntohl(ic->icmp6_pointer)); 336 sb_add(m, "POINTER=%08x ", ntohl(ic->icmp6_pointer));
338 /* Fall through */ 337 /* Fall through */
339 case ICMPV6_DEST_UNREACH: 338 case ICMPV6_DEST_UNREACH:
340 case ICMPV6_PKT_TOOBIG: 339 case ICMPV6_PKT_TOOBIG:
341 case ICMPV6_TIME_EXCEED: 340 case ICMPV6_TIME_EXCEED:
342 /* Max length: 3+maxlen */ 341 /* Max length: 3+maxlen */
343 if (recurse) { 342 if (recurse) {
344 printk("["); 343 sb_add(m, "[");
345 dump_packet(info, skb, ptr + sizeof(_icmp6h), 344 dump_packet(m, info, skb,
346 0); 345 ptr + sizeof(_icmp6h), 0);
347 printk("] "); 346 sb_add(m, "] ");
348 } 347 }
349 348
350 /* Max length: 10 "MTU=65535 " */ 349 /* Max length: 10 "MTU=65535 " */
351 if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) 350 if (ic->icmp6_type == ICMPV6_PKT_TOOBIG)
352 printk("MTU=%u ", ntohl(ic->icmp6_mtu)); 351 sb_add(m, "MTU=%u ", ntohl(ic->icmp6_mtu));
353 } 352 }
354 break; 353 break;
355 } 354 }
356 /* Max length: 10 "PROTO=255 " */ 355 /* Max length: 10 "PROTO=255 " */
357 default: 356 default:
358 printk("PROTO=%u ", currenthdr); 357 sb_add(m, "PROTO=%u ", currenthdr);
359 } 358 }
360 359
361 /* Max length: 15 "UID=4294967295 " */ 360 /* Max length: 15 "UID=4294967295 " */
362 if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) { 361 if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) {
363 read_lock_bh(&skb->sk->sk_callback_lock); 362 read_lock_bh(&skb->sk->sk_callback_lock);
364 if (skb->sk->sk_socket && skb->sk->sk_socket->file) 363 if (skb->sk->sk_socket && skb->sk->sk_socket->file)
365 printk("UID=%u GID=%u ", 364 sb_add(m, "UID=%u GID=%u ",
366 skb->sk->sk_socket->file->f_cred->fsuid, 365 skb->sk->sk_socket->file->f_cred->fsuid,
367 skb->sk->sk_socket->file->f_cred->fsgid); 366 skb->sk->sk_socket->file->f_cred->fsgid);
368 read_unlock_bh(&skb->sk->sk_callback_lock); 367 read_unlock_bh(&skb->sk->sk_callback_lock);
@@ -370,10 +369,11 @@ static void dump_packet(const struct nf_loginfo *info,
370 369
371 /* Max length: 16 "MARK=0xFFFFFFFF " */ 370 /* Max length: 16 "MARK=0xFFFFFFFF " */
372 if (!recurse && skb->mark) 371 if (!recurse && skb->mark)
373 printk("MARK=0x%x ", skb->mark); 372 sb_add(m, "MARK=0x%x ", skb->mark);
374} 373}
375 374
376static void dump_mac_header(const struct nf_loginfo *info, 375static void dump_mac_header(struct sbuff *m,
376 const struct nf_loginfo *info,
377 const struct sk_buff *skb) 377 const struct sk_buff *skb)
378{ 378{
379 struct net_device *dev = skb->dev; 379 struct net_device *dev = skb->dev;
@@ -387,7 +387,7 @@ static void dump_mac_header(const struct nf_loginfo *info,
387 387
388 switch (dev->type) { 388 switch (dev->type) {
389 case ARPHRD_ETHER: 389 case ARPHRD_ETHER:
390 printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ", 390 sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
391 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, 391 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
392 ntohs(eth_hdr(skb)->h_proto)); 392 ntohs(eth_hdr(skb)->h_proto));
393 return; 393 return;
@@ -396,7 +396,7 @@ static void dump_mac_header(const struct nf_loginfo *info,
396 } 396 }
397 397
398fallback: 398fallback:
399 printk("MAC="); 399 sb_add(m, "MAC=");
400 if (dev->hard_header_len && 400 if (dev->hard_header_len &&
401 skb->mac_header != skb->network_header) { 401 skb->mac_header != skb->network_header) {
402 const unsigned char *p = skb_mac_header(skb); 402 const unsigned char *p = skb_mac_header(skb);
@@ -408,19 +408,19 @@ fallback:
408 p = NULL; 408 p = NULL;
409 409
410 if (p != NULL) { 410 if (p != NULL) {
411 printk("%02x", *p++); 411 sb_add(m, "%02x", *p++);
412 for (i = 1; i < len; i++) 412 for (i = 1; i < len; i++)
413 printk(":%02x", p[i]); 413 sb_add(m, ":%02x", p[i]);
414 } 414 }
415 printk(" "); 415 sb_add(m, " ");
416 416
417 if (dev->type == ARPHRD_SIT) { 417 if (dev->type == ARPHRD_SIT) {
418 const struct iphdr *iph = 418 const struct iphdr *iph =
419 (struct iphdr *)skb_mac_header(skb); 419 (struct iphdr *)skb_mac_header(skb);
420 printk("TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr); 420 sb_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr);
421 } 421 }
422 } else 422 } else
423 printk(" "); 423 sb_add(m, " ");
424} 424}
425 425
426static struct nf_loginfo default_loginfo = { 426static struct nf_loginfo default_loginfo = {
@@ -442,22 +442,23 @@ ip6t_log_packet(u_int8_t pf,
442 const struct nf_loginfo *loginfo, 442 const struct nf_loginfo *loginfo,
443 const char *prefix) 443 const char *prefix)
444{ 444{
445 struct sbuff *m = sb_open();
446
445 if (!loginfo) 447 if (!loginfo)
446 loginfo = &default_loginfo; 448 loginfo = &default_loginfo;
447 449
448 spin_lock_bh(&log_lock); 450 sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
449 printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, 451 prefix,
450 prefix, 452 in ? in->name : "",
451 in ? in->name : "", 453 out ? out->name : "");
452 out ? out->name : "");
453 454
454 /* MAC logging for input path only. */ 455 /* MAC logging for input path only. */
455 if (in && !out) 456 if (in && !out)
456 dump_mac_header(loginfo, skb); 457 dump_mac_header(m, loginfo, skb);
458
459 dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);
457 460
458 dump_packet(loginfo, skb, skb_network_offset(skb), 1); 461 sb_close(m);
459 printk("\n");
460 spin_unlock_bh(&log_lock);
461} 462}
462 463
463static unsigned int 464static unsigned int
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index ff43461704be..c8af58b22562 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -16,7 +16,6 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/skbuff.h> 17#include <linux/skbuff.h>
18#include <linux/icmp.h> 18#include <linux/icmp.h>
19#include <linux/sysctl.h>
20#include <net/ipv6.h> 19#include <net/ipv6.h>
21#include <net/inet_frag.h> 20#include <net/inet_frag.h>
22 21
@@ -29,6 +28,7 @@
29#include <net/netfilter/nf_conntrack_core.h> 28#include <net/netfilter/nf_conntrack_core.h>
30#include <net/netfilter/nf_conntrack_zones.h> 29#include <net/netfilter/nf_conntrack_zones.h>
31#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 30#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
31#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
32#include <net/netfilter/nf_log.h> 32#include <net/netfilter/nf_log.h>
33 33
34static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 34static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
@@ -189,53 +189,6 @@ out:
189 return nf_conntrack_confirm(skb); 189 return nf_conntrack_confirm(skb);
190} 190}
191 191
192static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
193 struct sk_buff *skb)
194{
195 u16 zone = NF_CT_DEFAULT_ZONE;
196
197 if (skb->nfct)
198 zone = nf_ct_zone((struct nf_conn *)skb->nfct);
199
200#ifdef CONFIG_BRIDGE_NETFILTER
201 if (skb->nf_bridge &&
202 skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
203 return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
204#endif
205 if (hooknum == NF_INET_PRE_ROUTING)
206 return IP6_DEFRAG_CONNTRACK_IN + zone;
207 else
208 return IP6_DEFRAG_CONNTRACK_OUT + zone;
209
210}
211
212static unsigned int ipv6_defrag(unsigned int hooknum,
213 struct sk_buff *skb,
214 const struct net_device *in,
215 const struct net_device *out,
216 int (*okfn)(struct sk_buff *))
217{
218 struct sk_buff *reasm;
219
220 /* Previously seen (loopback)? */
221 if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
222 return NF_ACCEPT;
223
224 reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
225 /* queued */
226 if (reasm == NULL)
227 return NF_STOLEN;
228
229 /* error occured or not fragmented */
230 if (reasm == skb)
231 return NF_ACCEPT;
232
233 nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
234 (struct net_device *)out, okfn);
235
236 return NF_STOLEN;
237}
238
239static unsigned int __ipv6_conntrack_in(struct net *net, 192static unsigned int __ipv6_conntrack_in(struct net *net,
240 unsigned int hooknum, 193 unsigned int hooknum,
241 struct sk_buff *skb, 194 struct sk_buff *skb,
@@ -288,13 +241,6 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
288 241
289static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { 242static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
290 { 243 {
291 .hook = ipv6_defrag,
292 .owner = THIS_MODULE,
293 .pf = NFPROTO_IPV6,
294 .hooknum = NF_INET_PRE_ROUTING,
295 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
296 },
297 {
298 .hook = ipv6_conntrack_in, 244 .hook = ipv6_conntrack_in,
299 .owner = THIS_MODULE, 245 .owner = THIS_MODULE,
300 .pf = NFPROTO_IPV6, 246 .pf = NFPROTO_IPV6,
@@ -309,13 +255,6 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
309 .priority = NF_IP6_PRI_CONNTRACK, 255 .priority = NF_IP6_PRI_CONNTRACK,
310 }, 256 },
311 { 257 {
312 .hook = ipv6_defrag,
313 .owner = THIS_MODULE,
314 .pf = NFPROTO_IPV6,
315 .hooknum = NF_INET_LOCAL_OUT,
316 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
317 },
318 {
319 .hook = ipv6_confirm, 258 .hook = ipv6_confirm,
320 .owner = THIS_MODULE, 259 .owner = THIS_MODULE,
321 .pf = NFPROTO_IPV6, 260 .pf = NFPROTO_IPV6,
@@ -387,10 +326,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
387 .nlattr_to_tuple = ipv6_nlattr_to_tuple, 326 .nlattr_to_tuple = ipv6_nlattr_to_tuple,
388 .nla_policy = ipv6_nla_policy, 327 .nla_policy = ipv6_nla_policy,
389#endif 328#endif
390#ifdef CONFIG_SYSCTL
391 .ctl_table_path = nf_net_netfilter_sysctl_path,
392 .ctl_table = nf_ct_ipv6_sysctl_table,
393#endif
394 .me = THIS_MODULE, 329 .me = THIS_MODULE,
395}; 330};
396 331
@@ -403,16 +338,12 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
403 int ret = 0; 338 int ret = 0;
404 339
405 need_conntrack(); 340 need_conntrack();
341 nf_defrag_ipv6_enable();
406 342
407 ret = nf_ct_frag6_init();
408 if (ret < 0) {
409 pr_err("nf_conntrack_ipv6: can't initialize frag6.\n");
410 return ret;
411 }
412 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6); 343 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6);
413 if (ret < 0) { 344 if (ret < 0) {
414 pr_err("nf_conntrack_ipv6: can't register tcp.\n"); 345 pr_err("nf_conntrack_ipv6: can't register tcp.\n");
415 goto cleanup_frag6; 346 return ret;
416 } 347 }
417 348
418 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6); 349 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6);
@@ -450,8 +381,6 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
450 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6); 381 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
451 cleanup_tcp: 382 cleanup_tcp:
452 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6); 383 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
453 cleanup_frag6:
454 nf_ct_frag6_cleanup();
455 return ret; 384 return ret;
456} 385}
457 386
@@ -463,7 +392,6 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void)
463 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); 392 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
464 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6); 393 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
465 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6); 394 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
466 nf_ct_frag6_cleanup();
467} 395}
468 396
469module_init(nf_conntrack_l3proto_ipv6_init); 397module_init(nf_conntrack_l3proto_ipv6_init);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 138a8b362706..489d71b844ac 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -73,7 +73,7 @@ static struct inet_frags nf_frags;
73static struct netns_frags nf_init_frags; 73static struct netns_frags nf_init_frags;
74 74
75#ifdef CONFIG_SYSCTL 75#ifdef CONFIG_SYSCTL
76struct ctl_table nf_ct_ipv6_sysctl_table[] = { 76struct ctl_table nf_ct_frag6_sysctl_table[] = {
77 { 77 {
78 .procname = "nf_conntrack_frag6_timeout", 78 .procname = "nf_conntrack_frag6_timeout",
79 .data = &nf_init_frags.timeout, 79 .data = &nf_init_frags.timeout,
@@ -97,6 +97,8 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = {
97 }, 97 },
98 { } 98 { }
99}; 99};
100
101static struct ctl_table_header *nf_ct_frag6_sysctl_header;
100#endif 102#endif
101 103
102static unsigned int nf_hashfn(struct inet_frag_queue *q) 104static unsigned int nf_hashfn(struct inet_frag_queue *q)
@@ -623,11 +625,21 @@ int nf_ct_frag6_init(void)
623 inet_frags_init_net(&nf_init_frags); 625 inet_frags_init_net(&nf_init_frags);
624 inet_frags_init(&nf_frags); 626 inet_frags_init(&nf_frags);
625 627
628 nf_ct_frag6_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path,
629 nf_ct_frag6_sysctl_table);
630 if (!nf_ct_frag6_sysctl_header) {
631 inet_frags_fini(&nf_frags);
632 return -ENOMEM;
633 }
634
626 return 0; 635 return 0;
627} 636}
628 637
629void nf_ct_frag6_cleanup(void) 638void nf_ct_frag6_cleanup(void)
630{ 639{
640 unregister_sysctl_table(nf_ct_frag6_sysctl_header);
641 nf_ct_frag6_sysctl_header = NULL;
642
631 inet_frags_fini(&nf_frags); 643 inet_frags_fini(&nf_frags);
632 644
633 nf_init_frags.low_thresh = 0; 645 nf_init_frags.low_thresh = 0;
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
new file mode 100644
index 000000000000..99abfb53bab9
--- /dev/null
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -0,0 +1,131 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/types.h>
10#include <linux/ipv6.h>
11#include <linux/in6.h>
12#include <linux/netfilter.h>
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/icmp.h>
16#include <linux/sysctl.h>
17#include <net/ipv6.h>
18#include <net/inet_frag.h>
19
20#include <linux/netfilter_ipv6.h>
21#include <linux/netfilter_bridge.h>
22#include <net/netfilter/nf_conntrack.h>
23#include <net/netfilter/nf_conntrack_helper.h>
24#include <net/netfilter/nf_conntrack_l4proto.h>
25#include <net/netfilter/nf_conntrack_l3proto.h>
26#include <net/netfilter/nf_conntrack_core.h>
27#include <net/netfilter/nf_conntrack_zones.h>
28#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
29#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
30
31static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
32 struct sk_buff *skb)
33{
34 u16 zone = NF_CT_DEFAULT_ZONE;
35
36 if (skb->nfct)
37 zone = nf_ct_zone((struct nf_conn *)skb->nfct);
38
39#ifdef CONFIG_BRIDGE_NETFILTER
40 if (skb->nf_bridge &&
41 skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
42 return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
43#endif
44 if (hooknum == NF_INET_PRE_ROUTING)
45 return IP6_DEFRAG_CONNTRACK_IN + zone;
46 else
47 return IP6_DEFRAG_CONNTRACK_OUT + zone;
48
49}
50
51static unsigned int ipv6_defrag(unsigned int hooknum,
52 struct sk_buff *skb,
53 const struct net_device *in,
54 const struct net_device *out,
55 int (*okfn)(struct sk_buff *))
56{
57 struct sk_buff *reasm;
58
59 /* Previously seen (loopback)? */
60 if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
61 return NF_ACCEPT;
62
63 reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
64 /* queued */
65 if (reasm == NULL)
66 return NF_STOLEN;
67
68 /* error occured or not fragmented */
69 if (reasm == skb)
70 return NF_ACCEPT;
71
72 nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
73 (struct net_device *)out, okfn);
74
75 return NF_STOLEN;
76}
77
78static struct nf_hook_ops ipv6_defrag_ops[] = {
79 {
80 .hook = ipv6_defrag,
81 .owner = THIS_MODULE,
82 .pf = NFPROTO_IPV6,
83 .hooknum = NF_INET_PRE_ROUTING,
84 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
85 },
86 {
87 .hook = ipv6_defrag,
88 .owner = THIS_MODULE,
89 .pf = NFPROTO_IPV6,
90 .hooknum = NF_INET_LOCAL_OUT,
91 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
92 },
93};
94
95static int __init nf_defrag_init(void)
96{
97 int ret = 0;
98
99 ret = nf_ct_frag6_init();
100 if (ret < 0) {
101 pr_err("nf_defrag_ipv6: can't initialize frag6.\n");
102 return ret;
103 }
104 ret = nf_register_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops));
105 if (ret < 0) {
106 pr_err("nf_defrag_ipv6: can't register hooks\n");
107 goto cleanup_frag6;
108 }
109 return ret;
110
111cleanup_frag6:
112 nf_ct_frag6_cleanup();
113 return ret;
114
115}
116
117static void __exit nf_defrag_fini(void)
118{
119 nf_unregister_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops));
120 nf_ct_frag6_cleanup();
121}
122
123void nf_defrag_ipv6_enable(void)
124{
125}
126EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable);
127
128module_init(nf_defrag_init);
129module_exit(nf_defrag_fini);
130
131MODULE_LICENSE("GPL");
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8d93f6d81979..7e41e2cbb85e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1409,7 +1409,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1409 1409
1410 newsk = tcp_create_openreq_child(sk, req, skb); 1410 newsk = tcp_create_openreq_child(sk, req, skb);
1411 if (newsk == NULL) 1411 if (newsk == NULL)
1412 goto out; 1412 goto out_nonewsk;
1413 1413
1414 /* 1414 /*
1415 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1415 * No need to charge this sock to the relevant IPv6 refcnt debug socks
@@ -1497,18 +1497,22 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1497 } 1497 }
1498#endif 1498#endif
1499 1499
1500 if (__inet_inherit_port(sk, newsk) < 0) {
1501 sock_put(newsk);
1502 goto out;
1503 }
1500 __inet6_hash(newsk, NULL); 1504 __inet6_hash(newsk, NULL);
1501 __inet_inherit_port(sk, newsk);
1502 1505
1503 return newsk; 1506 return newsk;
1504 1507
1505out_overflow: 1508out_overflow:
1506 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1509 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1507out: 1510out_nonewsk:
1508 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1509 if (opt && opt != np->opt) 1511 if (opt && opt != np->opt)
1510 sock_kfree_s(sk, opt, opt->tot_len); 1512 sock_kfree_s(sk, opt, opt->tot_len);
1511 dst_release(dst); 1513 dst_release(dst);
1514out:
1515 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1512 return NULL; 1516 return NULL;
1513} 1517}
1514 1518
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5acb3560ff15..c84dad432114 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -122,8 +122,8 @@ static void udp_v6_rehash(struct sock *sk)
122 122
123static inline int compute_score(struct sock *sk, struct net *net, 123static inline int compute_score(struct sock *sk, struct net *net,
124 unsigned short hnum, 124 unsigned short hnum,
125 struct in6_addr *saddr, __be16 sport, 125 const struct in6_addr *saddr, __be16 sport,
126 struct in6_addr *daddr, __be16 dport, 126 const struct in6_addr *daddr, __be16 dport,
127 int dif) 127 int dif)
128{ 128{
129 int score = -1; 129 int score = -1;
@@ -239,8 +239,8 @@ exact_match:
239} 239}
240 240
241static struct sock *__udp6_lib_lookup(struct net *net, 241static struct sock *__udp6_lib_lookup(struct net *net,
242 struct in6_addr *saddr, __be16 sport, 242 const struct in6_addr *saddr, __be16 sport,
243 struct in6_addr *daddr, __be16 dport, 243 const struct in6_addr *daddr, __be16 dport,
244 int dif, struct udp_table *udptable) 244 int dif, struct udp_table *udptable)
245{ 245{
246 struct sock *sk, *result; 246 struct sock *sk, *result;
@@ -320,6 +320,14 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
320 udptable); 320 udptable);
321} 321}
322 322
323struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
324 const struct in6_addr *daddr, __be16 dport, int dif)
325{
326 return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
327}
328EXPORT_SYMBOL_GPL(udp6_lib_lookup);
329
330
323/* 331/*
324 * This should be easy, if there is something there we 332 * This should be easy, if there is something there we
325 * return it, otherwise we block. 333 * return it, otherwise we block.
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 78b505d33bfb..8f014f22d132 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -105,10 +105,8 @@ EXPORT_SYMBOL(nf_register_hooks);
105 105
106void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) 106void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
107{ 107{
108 unsigned int i; 108 while (n-- > 0)
109 109 nf_unregister_hook(&reg[n]);
110 for (i = 0; i < n; i++)
111 nf_unregister_hook(&reg[i]);
112} 110}
113EXPORT_SYMBOL(nf_unregister_hooks); 111EXPORT_SYMBOL(nf_unregister_hooks);
114 112
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 46a77d5c3887..a22dac227055 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -3,7 +3,7 @@
3# 3#
4menuconfig IP_VS 4menuconfig IP_VS
5 tristate "IP virtual server support" 5 tristate "IP virtual server support"
6 depends on NET && INET && NETFILTER && NF_CONNTRACK 6 depends on NET && INET && NETFILTER
7 ---help--- 7 ---help---
8 IP Virtual Server support will let you build a high-performance 8 IP Virtual Server support will let you build a high-performance
9 virtual server based on cluster of two or more real servers. This 9 virtual server based on cluster of two or more real servers. This
@@ -235,7 +235,8 @@ comment 'IPVS application helper'
235 235
236config IP_VS_FTP 236config IP_VS_FTP
237 tristate "FTP protocol helper" 237 tristate "FTP protocol helper"
238 depends on IP_VS_PROTO_TCP && NF_NAT 238 depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT
239 select IP_VS_NFCT
239 ---help--- 240 ---help---
240 FTP is a protocol that transfers IP address and/or port number in 241 FTP is a protocol that transfers IP address and/or port number in
241 the payload. In the virtual server via Network Address Translation, 242 the payload. In the virtual server via Network Address Translation,
@@ -247,4 +248,19 @@ config IP_VS_FTP
247 If you want to compile it in kernel, say Y. To compile it as a 248 If you want to compile it in kernel, say Y. To compile it as a
248 module, choose M here. If unsure, say N. 249 module, choose M here. If unsure, say N.
249 250
251config IP_VS_NFCT
252 bool "Netfilter connection tracking"
253 depends on NF_CONNTRACK
254 ---help---
255 The Netfilter connection tracking support allows the IPVS
256 connection state to be exported to the Netfilter framework
257 for filtering purposes.
258
259config IP_VS_PE_SIP
260 tristate "SIP persistence engine"
261 depends on IP_VS_PROTO_UDP
262 depends on NF_CONNTRACK_SIP
263 ---help---
264 Allow persistence based on the SIP Call-ID
265
250endif # IP_VS 266endif # IP_VS
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index e3baefd7066e..34ee602ddb66 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
9ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o 9ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
10ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o 10ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o
11 11
12ip_vs-extra_objs-y :=
13ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
14
12ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ 15ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
13 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ 16 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
14 ip_vs_est.o ip_vs_proto.o \ 17 ip_vs_est.o ip_vs_proto.o ip_vs_pe.o \
15 $(ip_vs_proto-objs-y) 18 $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
16 19
17 20
18# IPVS core 21# IPVS core
@@ -32,3 +35,6 @@ obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
32 35
33# IPVS application helpers 36# IPVS application helpers
34obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o 37obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o
38
39# IPVS connection template retrievers
40obj-$(CONFIG_IP_VS_PE_SIP) += ip_vs_pe_sip.o
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index e76f87f4aca8..a475edee0912 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -103,8 +103,8 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
103 goto out; 103 goto out;
104 104
105 list_add(&inc->a_list, &app->incs_list); 105 list_add(&inc->a_list, &app->incs_list);
106 IP_VS_DBG(9, "%s application %s:%u registered\n", 106 IP_VS_DBG(9, "%s App %s:%u registered\n",
107 pp->name, inc->name, inc->port); 107 pp->name, inc->name, ntohs(inc->port));
108 108
109 return 0; 109 return 0;
110 110
@@ -130,7 +130,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc)
130 pp->unregister_app(inc); 130 pp->unregister_app(inc);
131 131
132 IP_VS_DBG(9, "%s App %s:%u unregistered\n", 132 IP_VS_DBG(9, "%s App %s:%u unregistered\n",
133 pp->name, inc->name, inc->port); 133 pp->name, inc->name, ntohs(inc->port));
134 134
135 list_del(&inc->a_list); 135 list_del(&inc->a_list);
136 136
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index b71c69a2db13..e9adecdc8ca4 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -148,6 +148,42 @@ static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
148 & ip_vs_conn_tab_mask; 148 & ip_vs_conn_tab_mask;
149} 149}
150 150
151static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
152 bool inverse)
153{
154 const union nf_inet_addr *addr;
155 __be16 port;
156
157 if (p->pe_data && p->pe->hashkey_raw)
158 return p->pe->hashkey_raw(p, ip_vs_conn_rnd, inverse) &
159 ip_vs_conn_tab_mask;
160
161 if (likely(!inverse)) {
162 addr = p->caddr;
163 port = p->cport;
164 } else {
165 addr = p->vaddr;
166 port = p->vport;
167 }
168
169 return ip_vs_conn_hashkey(p->af, p->protocol, addr, port);
170}
171
172static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
173{
174 struct ip_vs_conn_param p;
175
176 ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport,
177 NULL, 0, &p);
178
179 if (cp->dest && cp->dest->svc->pe) {
180 p.pe = cp->dest->svc->pe;
181 p.pe_data = cp->pe_data;
182 p.pe_data_len = cp->pe_data_len;
183 }
184
185 return ip_vs_conn_hashkey_param(&p, false);
186}
151 187
152/* 188/*
153 * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. 189 * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
@@ -162,7 +198,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
162 return 0; 198 return 0;
163 199
164 /* Hash by protocol, client address and port */ 200 /* Hash by protocol, client address and port */
165 hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); 201 hash = ip_vs_conn_hashkey_conn(cp);
166 202
167 ct_write_lock(hash); 203 ct_write_lock(hash);
168 spin_lock(&cp->lock); 204 spin_lock(&cp->lock);
@@ -195,7 +231,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
195 int ret; 231 int ret;
196 232
197 /* unhash it and decrease its reference counter */ 233 /* unhash it and decrease its reference counter */
198 hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); 234 hash = ip_vs_conn_hashkey_conn(cp);
199 235
200 ct_write_lock(hash); 236 ct_write_lock(hash);
201 spin_lock(&cp->lock); 237 spin_lock(&cp->lock);
@@ -218,27 +254,26 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
218/* 254/*
219 * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. 255 * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
220 * Called for pkts coming from OUTside-to-INside. 256 * Called for pkts coming from OUTside-to-INside.
221 * s_addr, s_port: pkt source address (foreign host) 257 * p->caddr, p->cport: pkt source address (foreign host)
222 * d_addr, d_port: pkt dest address (load balancer) 258 * p->vaddr, p->vport: pkt dest address (load balancer)
223 */ 259 */
224static inline struct ip_vs_conn *__ip_vs_conn_in_get 260static inline struct ip_vs_conn *
225(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, 261__ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
226 const union nf_inet_addr *d_addr, __be16 d_port)
227{ 262{
228 unsigned hash; 263 unsigned hash;
229 struct ip_vs_conn *cp; 264 struct ip_vs_conn *cp;
230 265
231 hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port); 266 hash = ip_vs_conn_hashkey_param(p, false);
232 267
233 ct_read_lock(hash); 268 ct_read_lock(hash);
234 269
235 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 270 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
236 if (cp->af == af && 271 if (cp->af == p->af &&
237 ip_vs_addr_equal(af, s_addr, &cp->caddr) && 272 ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
238 ip_vs_addr_equal(af, d_addr, &cp->vaddr) && 273 ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
239 s_port == cp->cport && d_port == cp->vport && 274 p->cport == cp->cport && p->vport == cp->vport &&
240 ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && 275 ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
241 protocol == cp->protocol) { 276 p->protocol == cp->protocol) {
242 /* HIT */ 277 /* HIT */
243 atomic_inc(&cp->refcnt); 278 atomic_inc(&cp->refcnt);
244 ct_read_unlock(hash); 279 ct_read_unlock(hash);
@@ -251,99 +286,111 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
251 return NULL; 286 return NULL;
252} 287}
253 288
254struct ip_vs_conn *ip_vs_conn_in_get 289struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
255(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
256 const union nf_inet_addr *d_addr, __be16 d_port)
257{ 290{
258 struct ip_vs_conn *cp; 291 struct ip_vs_conn *cp;
259 292
260 cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port); 293 cp = __ip_vs_conn_in_get(p);
261 if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) 294 if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) {
262 cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr, 295 struct ip_vs_conn_param cport_zero_p = *p;
263 d_port); 296 cport_zero_p.cport = 0;
297 cp = __ip_vs_conn_in_get(&cport_zero_p);
298 }
264 299
265 IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n", 300 IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
266 ip_vs_proto_name(protocol), 301 ip_vs_proto_name(p->protocol),
267 IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), 302 IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport),
268 IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), 303 IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
269 cp ? "hit" : "not hit"); 304 cp ? "hit" : "not hit");
270 305
271 return cp; 306 return cp;
272} 307}
273 308
309static int
310ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
311 const struct ip_vs_iphdr *iph,
312 unsigned int proto_off, int inverse,
313 struct ip_vs_conn_param *p)
314{
315 __be16 _ports[2], *pptr;
316
317 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
318 if (pptr == NULL)
319 return 1;
320
321 if (likely(!inverse))
322 ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0],
323 &iph->daddr, pptr[1], p);
324 else
325 ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1],
326 &iph->saddr, pptr[0], p);
327 return 0;
328}
329
274struct ip_vs_conn * 330struct ip_vs_conn *
275ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, 331ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
276 struct ip_vs_protocol *pp, 332 struct ip_vs_protocol *pp,
277 const struct ip_vs_iphdr *iph, 333 const struct ip_vs_iphdr *iph,
278 unsigned int proto_off, int inverse) 334 unsigned int proto_off, int inverse)
279{ 335{
280 __be16 _ports[2], *pptr; 336 struct ip_vs_conn_param p;
281 337
282 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 338 if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p))
283 if (pptr == NULL)
284 return NULL; 339 return NULL;
285 340
286 if (likely(!inverse)) 341 return ip_vs_conn_in_get(&p);
287 return ip_vs_conn_in_get(af, iph->protocol,
288 &iph->saddr, pptr[0],
289 &iph->daddr, pptr[1]);
290 else
291 return ip_vs_conn_in_get(af, iph->protocol,
292 &iph->daddr, pptr[1],
293 &iph->saddr, pptr[0]);
294} 342}
295EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto); 343EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto);
296 344
297/* Get reference to connection template */ 345/* Get reference to connection template */
298struct ip_vs_conn *ip_vs_ct_in_get 346struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
299(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
300 const union nf_inet_addr *d_addr, __be16 d_port)
301{ 347{
302 unsigned hash; 348 unsigned hash;
303 struct ip_vs_conn *cp; 349 struct ip_vs_conn *cp;
304 350
305 hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port); 351 hash = ip_vs_conn_hashkey_param(p, false);
306 352
307 ct_read_lock(hash); 353 ct_read_lock(hash);
308 354
309 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 355 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
310 if (cp->af == af && 356 if (p->pe_data && p->pe->ct_match) {
311 ip_vs_addr_equal(af, s_addr, &cp->caddr) && 357 if (p->pe->ct_match(p, cp))
358 goto out;
359 continue;
360 }
361
362 if (cp->af == p->af &&
363 ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
312 /* protocol should only be IPPROTO_IP if 364 /* protocol should only be IPPROTO_IP if
313 * d_addr is a fwmark */ 365 * p->vaddr is a fwmark */
314 ip_vs_addr_equal(protocol == IPPROTO_IP ? AF_UNSPEC : af, 366 ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC :
315 d_addr, &cp->vaddr) && 367 p->af, p->vaddr, &cp->vaddr) &&
316 s_port == cp->cport && d_port == cp->vport && 368 p->cport == cp->cport && p->vport == cp->vport &&
317 cp->flags & IP_VS_CONN_F_TEMPLATE && 369 cp->flags & IP_VS_CONN_F_TEMPLATE &&
318 protocol == cp->protocol) { 370 p->protocol == cp->protocol)
319 /* HIT */
320 atomic_inc(&cp->refcnt);
321 goto out; 371 goto out;
322 }
323 } 372 }
324 cp = NULL; 373 cp = NULL;
325 374
326 out: 375 out:
376 if (cp)
377 atomic_inc(&cp->refcnt);
327 ct_read_unlock(hash); 378 ct_read_unlock(hash);
328 379
329 IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n", 380 IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
330 ip_vs_proto_name(protocol), 381 ip_vs_proto_name(p->protocol),
331 IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), 382 IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport),
332 IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), 383 IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
333 cp ? "hit" : "not hit"); 384 cp ? "hit" : "not hit");
334 385
335 return cp; 386 return cp;
336} 387}
337 388
338/* 389/* Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
339 * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. 390 * Called for pkts coming from inside-to-OUTside.
340 * Called for pkts coming from inside-to-OUTside. 391 * p->caddr, p->cport: pkt source address (inside host)
341 * s_addr, s_port: pkt source address (inside host) 392 * p->vaddr, p->vport: pkt dest address (foreign host) */
342 * d_addr, d_port: pkt dest address (foreign host) 393struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
343 */
344struct ip_vs_conn *ip_vs_conn_out_get
345(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
346 const union nf_inet_addr *d_addr, __be16 d_port)
347{ 394{
348 unsigned hash; 395 unsigned hash;
349 struct ip_vs_conn *cp, *ret=NULL; 396 struct ip_vs_conn *cp, *ret=NULL;
@@ -351,16 +398,16 @@ struct ip_vs_conn *ip_vs_conn_out_get
351 /* 398 /*
352 * Check for "full" addressed entries 399 * Check for "full" addressed entries
353 */ 400 */
354 hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port); 401 hash = ip_vs_conn_hashkey_param(p, true);
355 402
356 ct_read_lock(hash); 403 ct_read_lock(hash);
357 404
358 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 405 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
359 if (cp->af == af && 406 if (cp->af == p->af &&
360 ip_vs_addr_equal(af, d_addr, &cp->caddr) && 407 ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
361 ip_vs_addr_equal(af, s_addr, &cp->daddr) && 408 ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
362 d_port == cp->cport && s_port == cp->dport && 409 p->vport == cp->cport && p->cport == cp->dport &&
363 protocol == cp->protocol) { 410 p->protocol == cp->protocol) {
364 /* HIT */ 411 /* HIT */
365 atomic_inc(&cp->refcnt); 412 atomic_inc(&cp->refcnt);
366 ret = cp; 413 ret = cp;
@@ -371,9 +418,9 @@ struct ip_vs_conn *ip_vs_conn_out_get
371 ct_read_unlock(hash); 418 ct_read_unlock(hash);
372 419
373 IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n", 420 IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
374 ip_vs_proto_name(protocol), 421 ip_vs_proto_name(p->protocol),
375 IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), 422 IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport),
376 IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), 423 IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
377 ret ? "hit" : "not hit"); 424 ret ? "hit" : "not hit");
378 425
379 return ret; 426 return ret;
@@ -385,20 +432,12 @@ ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
385 const struct ip_vs_iphdr *iph, 432 const struct ip_vs_iphdr *iph,
386 unsigned int proto_off, int inverse) 433 unsigned int proto_off, int inverse)
387{ 434{
388 __be16 _ports[2], *pptr; 435 struct ip_vs_conn_param p;
389 436
390 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 437 if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p))
391 if (pptr == NULL)
392 return NULL; 438 return NULL;
393 439
394 if (likely(!inverse)) 440 return ip_vs_conn_out_get(&p);
395 return ip_vs_conn_out_get(af, iph->protocol,
396 &iph->saddr, pptr[0],
397 &iph->daddr, pptr[1]);
398 else
399 return ip_vs_conn_out_get(af, iph->protocol,
400 &iph->daddr, pptr[1],
401 &iph->saddr, pptr[0]);
402} 441}
403EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); 442EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
404 443
@@ -505,6 +544,8 @@ static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
505static inline void 544static inline void
506ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) 545ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
507{ 546{
547 unsigned int conn_flags;
548
508 /* if dest is NULL, then return directly */ 549 /* if dest is NULL, then return directly */
509 if (!dest) 550 if (!dest)
510 return; 551 return;
@@ -512,16 +553,20 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
512 /* Increase the refcnt counter of the dest */ 553 /* Increase the refcnt counter of the dest */
513 atomic_inc(&dest->refcnt); 554 atomic_inc(&dest->refcnt);
514 555
556 conn_flags = atomic_read(&dest->conn_flags);
557 if (cp->protocol != IPPROTO_UDP)
558 conn_flags &= ~IP_VS_CONN_F_ONE_PACKET;
515 /* Bind with the destination and its corresponding transmitter */ 559 /* Bind with the destination and its corresponding transmitter */
516 if ((cp->flags & IP_VS_CONN_F_SYNC) && 560 if (cp->flags & IP_VS_CONN_F_SYNC) {
517 (!(cp->flags & IP_VS_CONN_F_TEMPLATE)))
518 /* if the connection is not template and is created 561 /* if the connection is not template and is created
519 * by sync, preserve the activity flag. 562 * by sync, preserve the activity flag.
520 */ 563 */
521 cp->flags |= atomic_read(&dest->conn_flags) & 564 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE))
522 (~IP_VS_CONN_F_INACTIVE); 565 conn_flags &= ~IP_VS_CONN_F_INACTIVE;
523 else 566 /* connections inherit forwarding method from dest */
524 cp->flags |= atomic_read(&dest->conn_flags); 567 cp->flags &= ~IP_VS_CONN_F_FWD_MASK;
568 }
569 cp->flags |= conn_flags;
525 cp->dest = dest; 570 cp->dest = dest;
526 571
527 IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d " 572 IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
@@ -717,6 +762,10 @@ static void ip_vs_conn_expire(unsigned long data)
717 if (cp->control) 762 if (cp->control)
718 ip_vs_control_del(cp); 763 ip_vs_control_del(cp);
719 764
765 if (cp->flags & IP_VS_CONN_F_NFCT)
766 ip_vs_conn_drop_conntrack(cp);
767
768 kfree(cp->pe_data);
720 if (unlikely(cp->app != NULL)) 769 if (unlikely(cp->app != NULL))
721 ip_vs_unbind_app(cp); 770 ip_vs_unbind_app(cp);
722 ip_vs_unbind_dest(cp); 771 ip_vs_unbind_dest(cp);
@@ -751,13 +800,12 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
751 * Create a new connection entry and hash it into the ip_vs_conn_tab 800 * Create a new connection entry and hash it into the ip_vs_conn_tab
752 */ 801 */
753struct ip_vs_conn * 802struct ip_vs_conn *
754ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, 803ip_vs_conn_new(const struct ip_vs_conn_param *p,
755 const union nf_inet_addr *vaddr, __be16 vport,
756 const union nf_inet_addr *daddr, __be16 dport, unsigned flags, 804 const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
757 struct ip_vs_dest *dest) 805 struct ip_vs_dest *dest)
758{ 806{
759 struct ip_vs_conn *cp; 807 struct ip_vs_conn *cp;
760 struct ip_vs_protocol *pp = ip_vs_proto_get(proto); 808 struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol);
761 809
762 cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); 810 cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
763 if (cp == NULL) { 811 if (cp == NULL) {
@@ -767,17 +815,21 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
767 815
768 INIT_LIST_HEAD(&cp->c_list); 816 INIT_LIST_HEAD(&cp->c_list);
769 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); 817 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
770 cp->af = af; 818 cp->af = p->af;
771 cp->protocol = proto; 819 cp->protocol = p->protocol;
772 ip_vs_addr_copy(af, &cp->caddr, caddr); 820 ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
773 cp->cport = cport; 821 cp->cport = p->cport;
774 ip_vs_addr_copy(af, &cp->vaddr, vaddr); 822 ip_vs_addr_copy(p->af, &cp->vaddr, p->vaddr);
775 cp->vport = vport; 823 cp->vport = p->vport;
776 /* proto should only be IPPROTO_IP if d_addr is a fwmark */ 824 /* proto should only be IPPROTO_IP if d_addr is a fwmark */
777 ip_vs_addr_copy(proto == IPPROTO_IP ? AF_UNSPEC : af, 825 ip_vs_addr_copy(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
778 &cp->daddr, daddr); 826 &cp->daddr, daddr);
779 cp->dport = dport; 827 cp->dport = dport;
780 cp->flags = flags; 828 cp->flags = flags;
829 if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) {
830 cp->pe_data = p->pe_data;
831 cp->pe_data_len = p->pe_data_len;
832 }
781 spin_lock_init(&cp->lock); 833 spin_lock_init(&cp->lock);
782 834
783 /* 835 /*
@@ -803,7 +855,7 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
803 855
804 /* Bind its packet transmitter */ 856 /* Bind its packet transmitter */
805#ifdef CONFIG_IP_VS_IPV6 857#ifdef CONFIG_IP_VS_IPV6
806 if (af == AF_INET6) 858 if (p->af == AF_INET6)
807 ip_vs_bind_xmit_v6(cp); 859 ip_vs_bind_xmit_v6(cp);
808 else 860 else
809#endif 861#endif
@@ -812,13 +864,22 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
812 if (unlikely(pp && atomic_read(&pp->appcnt))) 864 if (unlikely(pp && atomic_read(&pp->appcnt)))
813 ip_vs_bind_app(cp, pp); 865 ip_vs_bind_app(cp, pp);
814 866
867 /*
868 * Allow conntrack to be preserved. By default, conntrack
869 * is created and destroyed for every packet.
870 * Sometimes keeping conntrack can be useful for
871 * IP_VS_CONN_F_ONE_PACKET too.
872 */
873
874 if (ip_vs_conntrack_enabled())
875 cp->flags |= IP_VS_CONN_F_NFCT;
876
815 /* Hash it in the ip_vs_conn_tab finally */ 877 /* Hash it in the ip_vs_conn_tab finally */
816 ip_vs_conn_hash(cp); 878 ip_vs_conn_hash(cp);
817 879
818 return cp; 880 return cp;
819} 881}
820 882
821
822/* 883/*
823 * /proc/net/ip_vs_conn entries 884 * /proc/net/ip_vs_conn entries
824 */ 885 */
@@ -834,7 +895,7 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
834 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 895 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
835 if (pos-- == 0) { 896 if (pos-- == 0) {
836 seq->private = &ip_vs_conn_tab[idx]; 897 seq->private = &ip_vs_conn_tab[idx];
837 return cp; 898 return cp;
838 } 899 }
839 } 900 }
840 ct_read_unlock_bh(idx); 901 ct_read_unlock_bh(idx);
@@ -891,30 +952,45 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
891 952
892 if (v == SEQ_START_TOKEN) 953 if (v == SEQ_START_TOKEN)
893 seq_puts(seq, 954 seq_puts(seq,
894 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires\n"); 955 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n");
895 else { 956 else {
896 const struct ip_vs_conn *cp = v; 957 const struct ip_vs_conn *cp = v;
958 char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
959 size_t len = 0;
960
961 if (cp->dest && cp->pe_data &&
962 cp->dest->svc->pe->show_pe_data) {
963 pe_data[0] = ' ';
964 len = strlen(cp->dest->svc->pe->name);
965 memcpy(pe_data + 1, cp->dest->svc->pe->name, len);
966 pe_data[len + 1] = ' ';
967 len += 2;
968 len += cp->dest->svc->pe->show_pe_data(cp,
969 pe_data + len);
970 }
971 pe_data[len] = '\0';
897 972
898#ifdef CONFIG_IP_VS_IPV6 973#ifdef CONFIG_IP_VS_IPV6
899 if (cp->af == AF_INET6) 974 if (cp->af == AF_INET6)
900 seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %7lu\n", 975 seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
976 "%pI6 %04X %-11s %7lu%s\n",
901 ip_vs_proto_name(cp->protocol), 977 ip_vs_proto_name(cp->protocol),
902 &cp->caddr.in6, ntohs(cp->cport), 978 &cp->caddr.in6, ntohs(cp->cport),
903 &cp->vaddr.in6, ntohs(cp->vport), 979 &cp->vaddr.in6, ntohs(cp->vport),
904 &cp->daddr.in6, ntohs(cp->dport), 980 &cp->daddr.in6, ntohs(cp->dport),
905 ip_vs_state_name(cp->protocol, cp->state), 981 ip_vs_state_name(cp->protocol, cp->state),
906 (cp->timer.expires-jiffies)/HZ); 982 (cp->timer.expires-jiffies)/HZ, pe_data);
907 else 983 else
908#endif 984#endif
909 seq_printf(seq, 985 seq_printf(seq,
910 "%-3s %08X %04X %08X %04X" 986 "%-3s %08X %04X %08X %04X"
911 " %08X %04X %-11s %7lu\n", 987 " %08X %04X %-11s %7lu%s\n",
912 ip_vs_proto_name(cp->protocol), 988 ip_vs_proto_name(cp->protocol),
913 ntohl(cp->caddr.ip), ntohs(cp->cport), 989 ntohl(cp->caddr.ip), ntohs(cp->cport),
914 ntohl(cp->vaddr.ip), ntohs(cp->vport), 990 ntohl(cp->vaddr.ip), ntohs(cp->vport),
915 ntohl(cp->daddr.ip), ntohs(cp->dport), 991 ntohl(cp->daddr.ip), ntohs(cp->dport),
916 ip_vs_state_name(cp->protocol, cp->state), 992 ip_vs_state_name(cp->protocol, cp->state),
917 (cp->timer.expires-jiffies)/HZ); 993 (cp->timer.expires-jiffies)/HZ, pe_data);
918 } 994 }
919 return 0; 995 return 0;
920} 996}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 0c043b6ce65e..b4e51e9c5a04 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -48,6 +48,7 @@
48#ifdef CONFIG_IP_VS_IPV6 48#ifdef CONFIG_IP_VS_IPV6
49#include <net/ipv6.h> 49#include <net/ipv6.h>
50#include <linux/netfilter_ipv6.h> 50#include <linux/netfilter_ipv6.h>
51#include <net/ip6_route.h>
51#endif 52#endif
52 53
53#include <net/ip_vs.h> 54#include <net/ip_vs.h>
@@ -176,6 +177,18 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction,
176 return pp->state_transition(cp, direction, skb, pp); 177 return pp->state_transition(cp, direction, skb, pp);
177} 178}
178 179
180static inline void
181ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
182 struct sk_buff *skb, int protocol,
183 const union nf_inet_addr *caddr, __be16 cport,
184 const union nf_inet_addr *vaddr, __be16 vport,
185 struct ip_vs_conn_param *p)
186{
187 ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);
188 p->pe = svc->pe;
189 if (p->pe && p->pe->fill_param)
190 p->pe->fill_param(p, skb);
191}
179 192
180/* 193/*
181 * IPVS persistent scheduling function 194 * IPVS persistent scheduling function
@@ -186,15 +199,16 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction,
186 */ 199 */
187static struct ip_vs_conn * 200static struct ip_vs_conn *
188ip_vs_sched_persist(struct ip_vs_service *svc, 201ip_vs_sched_persist(struct ip_vs_service *svc,
189 const struct sk_buff *skb, 202 struct sk_buff *skb,
190 __be16 ports[2]) 203 __be16 ports[2])
191{ 204{
192 struct ip_vs_conn *cp = NULL; 205 struct ip_vs_conn *cp = NULL;
193 struct ip_vs_iphdr iph; 206 struct ip_vs_iphdr iph;
194 struct ip_vs_dest *dest; 207 struct ip_vs_dest *dest;
195 struct ip_vs_conn *ct; 208 struct ip_vs_conn *ct;
196 __be16 dport; /* destination port to forward */ 209 __be16 dport = 0; /* destination port to forward */
197 __be16 flags; 210 unsigned int flags;
211 struct ip_vs_conn_param param;
198 union nf_inet_addr snet; /* source network of the client, 212 union nf_inet_addr snet; /* source network of the client,
199 after masking */ 213 after masking */
200 214
@@ -227,120 +241,75 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
227 * service, and a template like <caddr, 0, vaddr, vport, daddr, dport> 241 * service, and a template like <caddr, 0, vaddr, vport, daddr, dport>
228 * is created for other persistent services. 242 * is created for other persistent services.
229 */ 243 */
230 if (ports[1] == svc->port) { 244 {
231 /* Check if a template already exists */ 245 int protocol = iph.protocol;
232 if (svc->port != FTPPORT) 246 const union nf_inet_addr *vaddr = &iph.daddr;
233 ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0, 247 const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
234 &iph.daddr, ports[1]); 248 __be16 vport = 0;
235 else 249
236 ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0, 250 if (ports[1] == svc->port) {
237 &iph.daddr, 0); 251 /* non-FTP template:
238 252 * <protocol, caddr, 0, vaddr, vport, daddr, dport>
239 if (!ct || !ip_vs_check_template(ct)) { 253 * FTP template:
240 /* 254 * <protocol, caddr, 0, vaddr, 0, daddr, 0>
241 * No template found or the dest of the connection
242 * template is not available.
243 */
244 dest = svc->scheduler->schedule(svc, skb);
245 if (dest == NULL) {
246 IP_VS_DBG(1, "p-schedule: no dest found.\n");
247 return NULL;
248 }
249
250 /*
251 * Create a template like <protocol,caddr,0,
252 * vaddr,vport,daddr,dport> for non-ftp service,
253 * and <protocol,caddr,0,vaddr,0,daddr,0>
254 * for ftp service.
255 */ 255 */
256 if (svc->port != FTPPORT) 256 if (svc->port != FTPPORT)
257 ct = ip_vs_conn_new(svc->af, iph.protocol, 257 vport = ports[1];
258 &snet, 0,
259 &iph.daddr,
260 ports[1],
261 &dest->addr, dest->port,
262 IP_VS_CONN_F_TEMPLATE,
263 dest);
264 else
265 ct = ip_vs_conn_new(svc->af, iph.protocol,
266 &snet, 0,
267 &iph.daddr, 0,
268 &dest->addr, 0,
269 IP_VS_CONN_F_TEMPLATE,
270 dest);
271 if (ct == NULL)
272 return NULL;
273
274 ct->timeout = svc->timeout;
275 } else { 258 } else {
276 /* set destination with the found template */ 259 /* Note: persistent fwmark-based services and
277 dest = ct->dest; 260 * persistent port zero service are handled here.
278 } 261 * fwmark template:
279 dport = dest->port; 262 * <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
280 } else { 263 * port zero template:
281 /* 264 * <protocol,caddr,0,vaddr,0,daddr,0>
282 * Note: persistent fwmark-based services and persistent
283 * port zero service are handled here.
284 * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
285 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
286 */
287 if (svc->fwmark) {
288 union nf_inet_addr fwmark = {
289 .ip = htonl(svc->fwmark)
290 };
291
292 ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0,
293 &fwmark, 0);
294 } else
295 ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
296 &iph.daddr, 0);
297
298 if (!ct || !ip_vs_check_template(ct)) {
299 /*
300 * If it is not persistent port zero, return NULL,
301 * otherwise create a connection template.
302 */ 265 */
303 if (svc->port) 266 if (svc->fwmark) {
304 return NULL; 267 protocol = IPPROTO_IP;
305 268 vaddr = &fwmark;
306 dest = svc->scheduler->schedule(svc, skb);
307 if (dest == NULL) {
308 IP_VS_DBG(1, "p-schedule: no dest found.\n");
309 return NULL;
310 } 269 }
270 }
271 ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
272 vaddr, vport, &param);
273 }
311 274
312 /* 275 /* Check if a template already exists */
313 * Create a template according to the service 276 ct = ip_vs_ct_in_get(&param);
314 */ 277 if (!ct || !ip_vs_check_template(ct)) {
315 if (svc->fwmark) { 278 /* No template found or the dest of the connection
316 union nf_inet_addr fwmark = { 279 * template is not available.
317 .ip = htonl(svc->fwmark) 280 */
318 }; 281 dest = svc->scheduler->schedule(svc, skb);
319 282 if (!dest) {
320 ct = ip_vs_conn_new(svc->af, IPPROTO_IP, 283 IP_VS_DBG(1, "p-schedule: no dest found.\n");
321 &snet, 0, 284 kfree(param.pe_data);
322 &fwmark, 0, 285 return NULL;
323 &dest->addr, 0,
324 IP_VS_CONN_F_TEMPLATE,
325 dest);
326 } else
327 ct = ip_vs_conn_new(svc->af, iph.protocol,
328 &snet, 0,
329 &iph.daddr, 0,
330 &dest->addr, 0,
331 IP_VS_CONN_F_TEMPLATE,
332 dest);
333 if (ct == NULL)
334 return NULL;
335
336 ct->timeout = svc->timeout;
337 } else {
338 /* set destination with the found template */
339 dest = ct->dest;
340 } 286 }
341 dport = ports[1]; 287
288 if (ports[1] == svc->port && svc->port != FTPPORT)
289 dport = dest->port;
290
291 /* Create a template
292 * This adds param.pe_data to the template,
293 * and thus param.pe_data will be destroyed
294 * when the template expires */
295 ct = ip_vs_conn_new(&param, &dest->addr, dport,
296 IP_VS_CONN_F_TEMPLATE, dest);
297 if (ct == NULL) {
298 kfree(param.pe_data);
299 return NULL;
300 }
301
302 ct->timeout = svc->timeout;
303 } else {
304 /* set destination with the found template */
305 dest = ct->dest;
306 kfree(param.pe_data);
342 } 307 }
343 308
309 dport = ports[1];
310 if (dport == svc->port && dest->port)
311 dport = dest->port;
312
344 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET 313 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
345 && iph.protocol == IPPROTO_UDP)? 314 && iph.protocol == IPPROTO_UDP)?
346 IP_VS_CONN_F_ONE_PACKET : 0; 315 IP_VS_CONN_F_ONE_PACKET : 0;
@@ -348,12 +317,9 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
348 /* 317 /*
349 * Create a new connection according to the template 318 * Create a new connection according to the template
350 */ 319 */
351 cp = ip_vs_conn_new(svc->af, iph.protocol, 320 ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],
352 &iph.saddr, ports[0], 321 &iph.daddr, ports[1], &param);
353 &iph.daddr, ports[1], 322 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest);
354 &dest->addr, dport,
355 flags,
356 dest);
357 if (cp == NULL) { 323 if (cp == NULL) {
358 ip_vs_conn_put(ct); 324 ip_vs_conn_put(ct);
359 return NULL; 325 return NULL;
@@ -377,23 +343,53 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
377 * Protocols supported: TCP, UDP 343 * Protocols supported: TCP, UDP
378 */ 344 */
379struct ip_vs_conn * 345struct ip_vs_conn *
380ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 346ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
347 struct ip_vs_protocol *pp, int *ignored)
381{ 348{
382 struct ip_vs_conn *cp = NULL; 349 struct ip_vs_conn *cp = NULL;
383 struct ip_vs_iphdr iph; 350 struct ip_vs_iphdr iph;
384 struct ip_vs_dest *dest; 351 struct ip_vs_dest *dest;
385 __be16 _ports[2], *pptr, flags; 352 __be16 _ports[2], *pptr;
353 unsigned int flags;
386 354
355 *ignored = 1;
387 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 356 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
388 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); 357 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
389 if (pptr == NULL) 358 if (pptr == NULL)
390 return NULL; 359 return NULL;
391 360
392 /* 361 /*
362 * FTPDATA needs this check when using local real server.
363 * Never schedule Active FTPDATA connections from real server.
364 * For LVS-NAT they must be already created. For other methods
365 * with persistence the connection is created on SYN+ACK.
366 */
367 if (pptr[0] == FTPDATA) {
368 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
369 "Not scheduling FTPDATA");
370 return NULL;
371 }
372
373 /*
374 * Do not schedule replies from local real server. It is risky
375 * for fwmark services but mostly for persistent services.
376 */
377 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
378 (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&
379 (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
380 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
381 "Not scheduling reply for existing connection");
382 __ip_vs_conn_put(cp);
383 return NULL;
384 }
385
386 /*
393 * Persistent service 387 * Persistent service
394 */ 388 */
395 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 389 if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
390 *ignored = 0;
396 return ip_vs_sched_persist(svc, skb, pptr); 391 return ip_vs_sched_persist(svc, skb, pptr);
392 }
397 393
398 /* 394 /*
399 * Non-persistent service 395 * Non-persistent service
@@ -406,6 +402,8 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
406 return NULL; 402 return NULL;
407 } 403 }
408 404
405 *ignored = 0;
406
409 dest = svc->scheduler->schedule(svc, skb); 407 dest = svc->scheduler->schedule(svc, skb);
410 if (dest == NULL) { 408 if (dest == NULL) {
411 IP_VS_DBG(1, "Schedule: no dest found.\n"); 409 IP_VS_DBG(1, "Schedule: no dest found.\n");
@@ -419,14 +417,16 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
419 /* 417 /*
420 * Create a connection entry. 418 * Create a connection entry.
421 */ 419 */
422 cp = ip_vs_conn_new(svc->af, iph.protocol, 420 {
423 &iph.saddr, pptr[0], 421 struct ip_vs_conn_param p;
424 &iph.daddr, pptr[1], 422 ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr,
425 &dest->addr, dest->port ? dest->port : pptr[1], 423 pptr[0], &iph.daddr, pptr[1], &p);
426 flags, 424 cp = ip_vs_conn_new(&p, &dest->addr,
427 dest); 425 dest->port ? dest->port : pptr[1],
428 if (cp == NULL) 426 flags, dest);
429 return NULL; 427 if (!cp)
428 return NULL;
429 }
430 430
431 IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " 431 IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
432 "d:%s:%u conn->flags:%X conn->refcnt:%d\n", 432 "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
@@ -473,23 +473,26 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
473 if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { 473 if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
474 int ret, cs; 474 int ret, cs;
475 struct ip_vs_conn *cp; 475 struct ip_vs_conn *cp;
476 __u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && 476 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
477 iph.protocol == IPPROTO_UDP)? 477 iph.protocol == IPPROTO_UDP)?
478 IP_VS_CONN_F_ONE_PACKET : 0; 478 IP_VS_CONN_F_ONE_PACKET : 0;
479 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; 479 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
480 480
481 ip_vs_service_put(svc); 481 ip_vs_service_put(svc);
482 482
483 /* create a new connection entry */ 483 /* create a new connection entry */
484 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); 484 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
485 cp = ip_vs_conn_new(svc->af, iph.protocol, 485 {
486 &iph.saddr, pptr[0], 486 struct ip_vs_conn_param p;
487 &iph.daddr, pptr[1], 487 ip_vs_conn_fill_param(svc->af, iph.protocol,
488 &daddr, 0, 488 &iph.saddr, pptr[0],
489 IP_VS_CONN_F_BYPASS | flags, 489 &iph.daddr, pptr[1], &p);
490 NULL); 490 cp = ip_vs_conn_new(&p, &daddr, 0,
491 if (cp == NULL) 491 IP_VS_CONN_F_BYPASS | flags,
492 return NF_DROP; 492 NULL);
493 if (!cp)
494 return NF_DROP;
495 }
493 496
494 /* statistics */ 497 /* statistics */
495 ip_vs_in_stats(cp, skb); 498 ip_vs_in_stats(cp, skb);
@@ -527,9 +530,14 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
527 * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ 530 * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
528 */ 531 */
529#ifdef CONFIG_IP_VS_IPV6 532#ifdef CONFIG_IP_VS_IPV6
530 if (svc->af == AF_INET6) 533 if (svc->af == AF_INET6) {
534 if (!skb->dev) {
535 struct net *net = dev_net(skb_dst(skb)->dev);
536
537 skb->dev = net->loopback_dev;
538 }
531 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); 539 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
532 else 540 } else
533#endif 541#endif
534 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 542 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
535 543
@@ -541,6 +549,15 @@ __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
541 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); 549 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
542} 550}
543 551
552static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
553{
554 if (NF_INET_LOCAL_IN == hooknum)
555 return IP_DEFRAG_VS_IN;
556 if (NF_INET_FORWARD == hooknum)
557 return IP_DEFRAG_VS_FWD;
558 return IP_DEFRAG_VS_OUT;
559}
560
544static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) 561static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
545{ 562{
546 int err = ip_defrag(skb, user); 563 int err = ip_defrag(skb, user);
@@ -601,10 +618,10 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
601 skb->ip_summed = CHECKSUM_UNNECESSARY; 618 skb->ip_summed = CHECKSUM_UNNECESSARY;
602 619
603 if (inout) 620 if (inout)
604 IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, 621 IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph,
605 "Forwarding altered outgoing ICMP"); 622 "Forwarding altered outgoing ICMP");
606 else 623 else
607 IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, 624 IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph,
608 "Forwarding altered incoming ICMP"); 625 "Forwarding altered incoming ICMP");
609} 626}
610 627
@@ -646,11 +663,13 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
646 skb->ip_summed = CHECKSUM_PARTIAL; 663 skb->ip_summed = CHECKSUM_PARTIAL;
647 664
648 if (inout) 665 if (inout)
649 IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, 666 IP_VS_DBG_PKT(11, AF_INET6, pp, skb,
650 "Forwarding altered outgoing ICMPv6"); 667 (void *)ciph - (void *)iph,
668 "Forwarding altered outgoing ICMPv6");
651 else 669 else
652 IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, 670 IP_VS_DBG_PKT(11, AF_INET6, pp, skb,
653 "Forwarding altered incoming ICMPv6"); 671 (void *)ciph - (void *)iph,
672 "Forwarding altered incoming ICMPv6");
654} 673}
655#endif 674#endif
656 675
@@ -691,10 +710,25 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
691#endif 710#endif
692 ip_vs_nat_icmp(skb, pp, cp, 1); 711 ip_vs_nat_icmp(skb, pp, cp, 1);
693 712
713#ifdef CONFIG_IP_VS_IPV6
714 if (af == AF_INET6) {
715 if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
716 goto out;
717 } else
718#endif
719 if ((sysctl_ip_vs_snat_reroute ||
720 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
721 ip_route_me_harder(skb, RTN_LOCAL) != 0)
722 goto out;
723
694 /* do the statistics and put it back */ 724 /* do the statistics and put it back */
695 ip_vs_out_stats(cp, skb); 725 ip_vs_out_stats(cp, skb);
696 726
697 skb->ipvs_property = 1; 727 skb->ipvs_property = 1;
728 if (!(cp->flags & IP_VS_CONN_F_NFCT))
729 ip_vs_notrack(skb);
730 else
731 ip_vs_update_conntrack(skb, cp, 0);
698 verdict = NF_ACCEPT; 732 verdict = NF_ACCEPT;
699 733
700out: 734out:
@@ -708,7 +742,8 @@ out:
708 * Find any that might be relevant, check against existing connections. 742 * Find any that might be relevant, check against existing connections.
709 * Currently handles error types - unreachable, quench, ttl exceeded. 743 * Currently handles error types - unreachable, quench, ttl exceeded.
710 */ 744 */
711static int ip_vs_out_icmp(struct sk_buff *skb, int *related) 745static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
746 unsigned int hooknum)
712{ 747{
713 struct iphdr *iph; 748 struct iphdr *iph;
714 struct icmphdr _icmph, *ic; 749 struct icmphdr _icmph, *ic;
@@ -723,7 +758,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
723 758
724 /* reassemble IP fragments */ 759 /* reassemble IP fragments */
725 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 760 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
726 if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) 761 if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
727 return NF_STOLEN; 762 return NF_STOLEN;
728 } 763 }
729 764
@@ -766,7 +801,8 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
766 pp->dont_defrag)) 801 pp->dont_defrag))
767 return NF_ACCEPT; 802 return NF_ACCEPT;
768 803
769 IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for"); 804 IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
805 "Checking outgoing ICMP for");
770 806
771 offset += cih->ihl * 4; 807 offset += cih->ihl * 4;
772 808
@@ -782,7 +818,8 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
782} 818}
783 819
784#ifdef CONFIG_IP_VS_IPV6 820#ifdef CONFIG_IP_VS_IPV6
785static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related) 821static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
822 unsigned int hooknum)
786{ 823{
787 struct ipv6hdr *iph; 824 struct ipv6hdr *iph;
788 struct icmp6hdr _icmph, *ic; 825 struct icmp6hdr _icmph, *ic;
@@ -798,7 +835,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
798 835
799 /* reassemble IP fragments */ 836 /* reassemble IP fragments */
800 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { 837 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
801 if (ip_vs_gather_frags_v6(skb, IP_DEFRAG_VS_OUT)) 838 if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
802 return NF_STOLEN; 839 return NF_STOLEN;
803 } 840 }
804 841
@@ -841,7 +878,8 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
841 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) 878 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
842 return NF_ACCEPT; 879 return NF_ACCEPT;
843 880
844 IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for"); 881 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
882 "Checking outgoing ICMPv6 for");
845 883
846 offset += sizeof(struct ipv6hdr); 884 offset += sizeof(struct ipv6hdr);
847 885
@@ -889,7 +927,7 @@ static unsigned int
889handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 927handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
890 struct ip_vs_conn *cp, int ihl) 928 struct ip_vs_conn *cp, int ihl)
891{ 929{
892 IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); 930 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
893 931
894 if (!skb_make_writable(skb, ihl)) 932 if (!skb_make_writable(skb, ihl))
895 goto drop; 933 goto drop;
@@ -908,6 +946,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
908 ip_send_check(ip_hdr(skb)); 946 ip_send_check(ip_hdr(skb));
909 } 947 }
910 948
949 /*
950 * nf_iterate does not expect change in the skb->dst->dev.
951 * It looks like it is not fatal to enable this code for hooks
952 * where our handlers are at the end of the chain list and
953 * when all next handlers use skb->dst->dev and not outdev.
954 * It will definitely route properly the inout NAT traffic
955 * when multiple paths are used.
956 */
957
911 /* For policy routing, packets originating from this 958 /* For policy routing, packets originating from this
912 * machine itself may be routed differently to packets 959 * machine itself may be routed differently to packets
913 * passing through. We want this packet to be routed as 960 * passing through. We want this packet to be routed as
@@ -916,21 +963,25 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
916 */ 963 */
917#ifdef CONFIG_IP_VS_IPV6 964#ifdef CONFIG_IP_VS_IPV6
918 if (af == AF_INET6) { 965 if (af == AF_INET6) {
919 if (ip6_route_me_harder(skb) != 0) 966 if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
920 goto drop; 967 goto drop;
921 } else 968 } else
922#endif 969#endif
923 if (ip_route_me_harder(skb, RTN_LOCAL) != 0) 970 if ((sysctl_ip_vs_snat_reroute ||
971 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
972 ip_route_me_harder(skb, RTN_LOCAL) != 0)
924 goto drop; 973 goto drop;
925 974
926 IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); 975 IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
927 976
928 ip_vs_out_stats(cp, skb); 977 ip_vs_out_stats(cp, skb);
929 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); 978 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
930 ip_vs_update_conntrack(skb, cp, 0);
931 ip_vs_conn_put(cp);
932
933 skb->ipvs_property = 1; 979 skb->ipvs_property = 1;
980 if (!(cp->flags & IP_VS_CONN_F_NFCT))
981 ip_vs_notrack(skb);
982 else
983 ip_vs_update_conntrack(skb, cp, 0);
984 ip_vs_conn_put(cp);
934 985
935 LeaveFunction(11); 986 LeaveFunction(11);
936 return NF_ACCEPT; 987 return NF_ACCEPT;
@@ -938,35 +989,46 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
938drop: 989drop:
939 ip_vs_conn_put(cp); 990 ip_vs_conn_put(cp);
940 kfree_skb(skb); 991 kfree_skb(skb);
992 LeaveFunction(11);
941 return NF_STOLEN; 993 return NF_STOLEN;
942} 994}
943 995
944/* 996/*
945 * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
946 * Check if outgoing packet belongs to the established ip_vs_conn. 997 * Check if outgoing packet belongs to the established ip_vs_conn.
947 */ 998 */
948static unsigned int 999static unsigned int
949ip_vs_out(unsigned int hooknum, struct sk_buff *skb, 1000ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
950 const struct net_device *in, const struct net_device *out,
951 int (*okfn)(struct sk_buff *))
952{ 1001{
953 struct ip_vs_iphdr iph; 1002 struct ip_vs_iphdr iph;
954 struct ip_vs_protocol *pp; 1003 struct ip_vs_protocol *pp;
955 struct ip_vs_conn *cp; 1004 struct ip_vs_conn *cp;
956 int af;
957 1005
958 EnterFunction(11); 1006 EnterFunction(11);
959 1007
960 af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; 1008 /* Already marked as IPVS request or reply? */
961
962 if (skb->ipvs_property) 1009 if (skb->ipvs_property)
963 return NF_ACCEPT; 1010 return NF_ACCEPT;
964 1011
1012 /* Bad... Do not break raw sockets */
1013 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
1014 af == AF_INET)) {
1015 struct sock *sk = skb->sk;
1016 struct inet_sock *inet = inet_sk(skb->sk);
1017
1018 if (inet && sk->sk_family == PF_INET && inet->nodefrag)
1019 return NF_ACCEPT;
1020 }
1021
1022 if (unlikely(!skb_dst(skb)))
1023 return NF_ACCEPT;
1024
965 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1025 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
966#ifdef CONFIG_IP_VS_IPV6 1026#ifdef CONFIG_IP_VS_IPV6
967 if (af == AF_INET6) { 1027 if (af == AF_INET6) {
968 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1028 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
969 int related, verdict = ip_vs_out_icmp_v6(skb, &related); 1029 int related;
1030 int verdict = ip_vs_out_icmp_v6(skb, &related,
1031 hooknum);
970 1032
971 if (related) 1033 if (related)
972 return verdict; 1034 return verdict;
@@ -975,7 +1037,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
975 } else 1037 } else
976#endif 1038#endif
977 if (unlikely(iph.protocol == IPPROTO_ICMP)) { 1039 if (unlikely(iph.protocol == IPPROTO_ICMP)) {
978 int related, verdict = ip_vs_out_icmp(skb, &related); 1040 int related;
1041 int verdict = ip_vs_out_icmp(skb, &related, hooknum);
979 1042
980 if (related) 1043 if (related)
981 return verdict; 1044 return verdict;
@@ -989,19 +1052,19 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
989 /* reassemble IP fragments */ 1052 /* reassemble IP fragments */
990#ifdef CONFIG_IP_VS_IPV6 1053#ifdef CONFIG_IP_VS_IPV6
991 if (af == AF_INET6) { 1054 if (af == AF_INET6) {
992 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1055 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
993 int related, verdict = ip_vs_out_icmp_v6(skb, &related); 1056 if (ip_vs_gather_frags_v6(skb,
994 1057 ip_vs_defrag_user(hooknum)))
995 if (related) 1058 return NF_STOLEN;
996 return verdict;
997
998 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
999 } 1059 }
1060
1061 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1000 } else 1062 } else
1001#endif 1063#endif
1002 if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) && 1064 if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) &&
1003 !pp->dont_defrag)) { 1065 !pp->dont_defrag)) {
1004 if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) 1066 if (ip_vs_gather_frags(skb,
1067 ip_vs_defrag_user(hooknum)))
1005 return NF_STOLEN; 1068 return NF_STOLEN;
1006 1069
1007 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1070 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
@@ -1012,55 +1075,123 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
1012 */ 1075 */
1013 cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); 1076 cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
1014 1077
1015 if (unlikely(!cp)) { 1078 if (likely(cp))
1016 if (sysctl_ip_vs_nat_icmp_send && 1079 return handle_response(af, skb, pp, cp, iph.len);
1017 (pp->protocol == IPPROTO_TCP || 1080 if (sysctl_ip_vs_nat_icmp_send &&
1018 pp->protocol == IPPROTO_UDP || 1081 (pp->protocol == IPPROTO_TCP ||
1019 pp->protocol == IPPROTO_SCTP)) { 1082 pp->protocol == IPPROTO_UDP ||
1020 __be16 _ports[2], *pptr; 1083 pp->protocol == IPPROTO_SCTP)) {
1021 1084 __be16 _ports[2], *pptr;
1022 pptr = skb_header_pointer(skb, iph.len, 1085
1023 sizeof(_ports), _ports); 1086 pptr = skb_header_pointer(skb, iph.len,
1024 if (pptr == NULL) 1087 sizeof(_ports), _ports);
1025 return NF_ACCEPT; /* Not for me */ 1088 if (pptr == NULL)
1026 if (ip_vs_lookup_real_service(af, iph.protocol, 1089 return NF_ACCEPT; /* Not for me */
1027 &iph.saddr, 1090 if (ip_vs_lookup_real_service(af, iph.protocol,
1028 pptr[0])) { 1091 &iph.saddr,
1029 /* 1092 pptr[0])) {
1030 * Notify the real server: there is no 1093 /*
1031 * existing entry if it is not RST 1094 * Notify the real server: there is no
1032 * packet or not TCP packet. 1095 * existing entry if it is not RST
1033 */ 1096 * packet or not TCP packet.
1034 if ((iph.protocol != IPPROTO_TCP && 1097 */
1035 iph.protocol != IPPROTO_SCTP) 1098 if ((iph.protocol != IPPROTO_TCP &&
1036 || ((iph.protocol == IPPROTO_TCP 1099 iph.protocol != IPPROTO_SCTP)
1037 && !is_tcp_reset(skb, iph.len)) 1100 || ((iph.protocol == IPPROTO_TCP
1038 || (iph.protocol == IPPROTO_SCTP 1101 && !is_tcp_reset(skb, iph.len))
1039 && !is_sctp_abort(skb, 1102 || (iph.protocol == IPPROTO_SCTP
1040 iph.len)))) { 1103 && !is_sctp_abort(skb,
1104 iph.len)))) {
1041#ifdef CONFIG_IP_VS_IPV6 1105#ifdef CONFIG_IP_VS_IPV6
1042 if (af == AF_INET6) 1106 if (af == AF_INET6) {
1043 icmpv6_send(skb, 1107 struct net *net =
1044 ICMPV6_DEST_UNREACH, 1108 dev_net(skb_dst(skb)->dev);
1045 ICMPV6_PORT_UNREACH, 1109
1046 0); 1110 if (!skb->dev)
1047 else 1111 skb->dev = net->loopback_dev;
1112 icmpv6_send(skb,
1113 ICMPV6_DEST_UNREACH,
1114 ICMPV6_PORT_UNREACH,
1115 0);
1116 } else
1048#endif 1117#endif
1049 icmp_send(skb, 1118 icmp_send(skb,
1050 ICMP_DEST_UNREACH, 1119 ICMP_DEST_UNREACH,
1051 ICMP_PORT_UNREACH, 0); 1120 ICMP_PORT_UNREACH, 0);
1052 return NF_DROP; 1121 return NF_DROP;
1053 }
1054 } 1122 }
1055 } 1123 }
1056 IP_VS_DBG_PKT(12, pp, skb, 0,
1057 "packet continues traversal as normal");
1058 return NF_ACCEPT;
1059 } 1124 }
1125 IP_VS_DBG_PKT(12, af, pp, skb, 0,
1126 "ip_vs_out: packet continues traversal as normal");
1127 return NF_ACCEPT;
1128}
1060 1129
1061 return handle_response(af, skb, pp, cp, iph.len); 1130/*
1131 * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain,
1132 * used only for VS/NAT.
1133 * Check if packet is reply for established ip_vs_conn.
1134 */
1135static unsigned int
1136ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
1137 const struct net_device *in, const struct net_device *out,
1138 int (*okfn)(struct sk_buff *))
1139{
1140 return ip_vs_out(hooknum, skb, AF_INET);
1062} 1141}
1063 1142
1143/*
1144 * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
1145 * Check if packet is reply for established ip_vs_conn.
1146 */
1147static unsigned int
1148ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
1149 const struct net_device *in, const struct net_device *out,
1150 int (*okfn)(struct sk_buff *))
1151{
1152 unsigned int verdict;
1153
1154 /* Disable BH in LOCAL_OUT until all places are fixed */
1155 local_bh_disable();
1156 verdict = ip_vs_out(hooknum, skb, AF_INET);
1157 local_bh_enable();
1158 return verdict;
1159}
1160
1161#ifdef CONFIG_IP_VS_IPV6
1162
1163/*
1164 * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain,
1165 * used only for VS/NAT.
1166 * Check if packet is reply for established ip_vs_conn.
1167 */
1168static unsigned int
1169ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
1170 const struct net_device *in, const struct net_device *out,
1171 int (*okfn)(struct sk_buff *))
1172{
1173 return ip_vs_out(hooknum, skb, AF_INET6);
1174}
1175
1176/*
1177 * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
1178 * Check if packet is reply for established ip_vs_conn.
1179 */
1180static unsigned int
1181ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
1182 const struct net_device *in, const struct net_device *out,
1183 int (*okfn)(struct sk_buff *))
1184{
1185 unsigned int verdict;
1186
1187 /* Disable BH in LOCAL_OUT until all places are fixed */
1188 local_bh_disable();
1189 verdict = ip_vs_out(hooknum, skb, AF_INET6);
1190 local_bh_enable();
1191 return verdict;
1192}
1193
1194#endif
1064 1195
1065/* 1196/*
1066 * Handle ICMP messages in the outside-to-inside direction (incoming). 1197 * Handle ICMP messages in the outside-to-inside direction (incoming).
@@ -1084,8 +1215,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1084 1215
1085 /* reassemble IP fragments */ 1216 /* reassemble IP fragments */
1086 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 1217 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
1087 if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ? 1218 if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
1088 IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
1089 return NF_STOLEN; 1219 return NF_STOLEN;
1090 } 1220 }
1091 1221
@@ -1128,7 +1258,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1128 pp->dont_defrag)) 1258 pp->dont_defrag))
1129 return NF_ACCEPT; 1259 return NF_ACCEPT;
1130 1260
1131 IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for"); 1261 IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
1262 "Checking incoming ICMP for");
1132 1263
1133 offset += cih->ihl * 4; 1264 offset += cih->ihl * 4;
1134 1265
@@ -1162,7 +1293,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1162 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) 1293 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
1163 offset += 2 * sizeof(__u16); 1294 offset += 2 * sizeof(__u16);
1164 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset); 1295 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset);
1165 /* do not touch skb anymore */ 1296 /* LOCALNODE from FORWARD hook is not supported */
1297 if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD &&
1298 skb_rtable(skb)->rt_flags & RTCF_LOCAL) {
1299 IP_VS_DBG(1, "%s(): "
1300 "local delivery to %pI4 but in FORWARD\n",
1301 __func__, &skb_rtable(skb)->rt_dst);
1302 verdict = NF_DROP;
1303 }
1166 1304
1167 out: 1305 out:
1168 __ip_vs_conn_put(cp); 1306 __ip_vs_conn_put(cp);
@@ -1183,14 +1321,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1183 struct ip_vs_protocol *pp; 1321 struct ip_vs_protocol *pp;
1184 unsigned int offset, verdict; 1322 unsigned int offset, verdict;
1185 union nf_inet_addr snet; 1323 union nf_inet_addr snet;
1324 struct rt6_info *rt;
1186 1325
1187 *related = 1; 1326 *related = 1;
1188 1327
1189 /* reassemble IP fragments */ 1328 /* reassemble IP fragments */
1190 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { 1329 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
1191 if (ip_vs_gather_frags_v6(skb, hooknum == NF_INET_LOCAL_IN ? 1330 if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
1192 IP_DEFRAG_VS_IN :
1193 IP_DEFRAG_VS_FWD))
1194 return NF_STOLEN; 1331 return NF_STOLEN;
1195 } 1332 }
1196 1333
@@ -1233,7 +1370,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1233 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) 1370 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
1234 return NF_ACCEPT; 1371 return NF_ACCEPT;
1235 1372
1236 IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for"); 1373 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
1374 "Checking incoming ICMPv6 for");
1237 1375
1238 offset += sizeof(struct ipv6hdr); 1376 offset += sizeof(struct ipv6hdr);
1239 1377
@@ -1261,7 +1399,15 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1261 IPPROTO_SCTP == cih->nexthdr) 1399 IPPROTO_SCTP == cih->nexthdr)
1262 offset += 2 * sizeof(__u16); 1400 offset += 2 * sizeof(__u16);
1263 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset); 1401 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
1264 /* do not touch skb anymore */ 1402 /* LOCALNODE from FORWARD hook is not supported */
1403 if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD &&
1404 (rt = (struct rt6_info *) skb_dst(skb)) &&
1405 rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK) {
1406 IP_VS_DBG(1, "%s(): "
1407 "local delivery to %pI6 but in FORWARD\n",
1408 __func__, &rt->rt6i_dst);
1409 verdict = NF_DROP;
1410 }
1265 1411
1266 __ip_vs_conn_put(cp); 1412 __ip_vs_conn_put(cp);
1267 1413
@@ -1275,35 +1421,49 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1275 * and send it on its way... 1421 * and send it on its way...
1276 */ 1422 */
1277static unsigned int 1423static unsigned int
1278ip_vs_in(unsigned int hooknum, struct sk_buff *skb, 1424ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1279 const struct net_device *in, const struct net_device *out,
1280 int (*okfn)(struct sk_buff *))
1281{ 1425{
1282 struct ip_vs_iphdr iph; 1426 struct ip_vs_iphdr iph;
1283 struct ip_vs_protocol *pp; 1427 struct ip_vs_protocol *pp;
1284 struct ip_vs_conn *cp; 1428 struct ip_vs_conn *cp;
1285 int ret, restart, af, pkts; 1429 int ret, restart, pkts;
1286 1430
1287 af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; 1431 /* Already marked as IPVS request or reply? */
1288 1432 if (skb->ipvs_property)
1289 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1433 return NF_ACCEPT;
1290 1434
1291 /* 1435 /*
1292 * Big tappo: only PACKET_HOST, including loopback for local client 1436 * Big tappo:
1293 * Don't handle local packets on IPv6 for now 1437 * - remote client: only PACKET_HOST
1438 * - route: used for struct net when skb->dev is unset
1294 */ 1439 */
1295 if (unlikely(skb->pkt_type != PACKET_HOST)) { 1440 if (unlikely((skb->pkt_type != PACKET_HOST &&
1296 IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n", 1441 hooknum != NF_INET_LOCAL_OUT) ||
1297 skb->pkt_type, 1442 !skb_dst(skb))) {
1298 iph.protocol, 1443 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1299 IP_VS_DBG_ADDR(af, &iph.daddr)); 1444 IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
1445 " ignored in hook %u\n",
1446 skb->pkt_type, iph.protocol,
1447 IP_VS_DBG_ADDR(af, &iph.daddr), hooknum);
1300 return NF_ACCEPT; 1448 return NF_ACCEPT;
1301 } 1449 }
1450 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1451
1452 /* Bad... Do not break raw sockets */
1453 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
1454 af == AF_INET)) {
1455 struct sock *sk = skb->sk;
1456 struct inet_sock *inet = inet_sk(skb->sk);
1457
1458 if (inet && sk->sk_family == PF_INET && inet->nodefrag)
1459 return NF_ACCEPT;
1460 }
1302 1461
1303#ifdef CONFIG_IP_VS_IPV6 1462#ifdef CONFIG_IP_VS_IPV6
1304 if (af == AF_INET6) { 1463 if (af == AF_INET6) {
1305 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1464 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
1306 int related, verdict = ip_vs_in_icmp_v6(skb, &related, hooknum); 1465 int related;
1466 int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum);
1307 1467
1308 if (related) 1468 if (related)
1309 return verdict; 1469 return verdict;
@@ -1312,7 +1472,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1312 } else 1472 } else
1313#endif 1473#endif
1314 if (unlikely(iph.protocol == IPPROTO_ICMP)) { 1474 if (unlikely(iph.protocol == IPPROTO_ICMP)) {
1315 int related, verdict = ip_vs_in_icmp(skb, &related, hooknum); 1475 int related;
1476 int verdict = ip_vs_in_icmp(skb, &related, hooknum);
1316 1477
1317 if (related) 1478 if (related)
1318 return verdict; 1479 return verdict;
@@ -1332,23 +1493,18 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1332 if (unlikely(!cp)) { 1493 if (unlikely(!cp)) {
1333 int v; 1494 int v;
1334 1495
1335 /* For local client packets, it could be a response */
1336 cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
1337 if (cp)
1338 return handle_response(af, skb, pp, cp, iph.len);
1339
1340 if (!pp->conn_schedule(af, skb, pp, &v, &cp)) 1496 if (!pp->conn_schedule(af, skb, pp, &v, &cp))
1341 return v; 1497 return v;
1342 } 1498 }
1343 1499
1344 if (unlikely(!cp)) { 1500 if (unlikely(!cp)) {
1345 /* sorry, all this trouble for a no-hit :) */ 1501 /* sorry, all this trouble for a no-hit :) */
1346 IP_VS_DBG_PKT(12, pp, skb, 0, 1502 IP_VS_DBG_PKT(12, af, pp, skb, 0,
1347 "packet continues traversal as normal"); 1503 "ip_vs_in: packet continues traversal as normal");
1348 return NF_ACCEPT; 1504 return NF_ACCEPT;
1349 } 1505 }
1350 1506
1351 IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet"); 1507 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
1352 1508
1353 /* Check the server status */ 1509 /* Check the server status */
1354 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { 1510 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
@@ -1415,6 +1571,72 @@ out:
1415 return ret; 1571 return ret;
1416} 1572}
1417 1573
1574/*
1575 * AF_INET handler in NF_INET_LOCAL_IN chain
1576 * Schedule and forward packets from remote clients
1577 */
1578static unsigned int
1579ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb,
1580 const struct net_device *in,
1581 const struct net_device *out,
1582 int (*okfn)(struct sk_buff *))
1583{
1584 return ip_vs_in(hooknum, skb, AF_INET);
1585}
1586
1587/*
1588 * AF_INET handler in NF_INET_LOCAL_OUT chain
1589 * Schedule and forward packets from local clients
1590 */
1591static unsigned int
1592ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
1593 const struct net_device *in, const struct net_device *out,
1594 int (*okfn)(struct sk_buff *))
1595{
1596 unsigned int verdict;
1597
1598 /* Disable BH in LOCAL_OUT until all places are fixed */
1599 local_bh_disable();
1600 verdict = ip_vs_in(hooknum, skb, AF_INET);
1601 local_bh_enable();
1602 return verdict;
1603}
1604
1605#ifdef CONFIG_IP_VS_IPV6
1606
1607/*
1608 * AF_INET6 handler in NF_INET_LOCAL_IN chain
1609 * Schedule and forward packets from remote clients
1610 */
1611static unsigned int
1612ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb,
1613 const struct net_device *in,
1614 const struct net_device *out,
1615 int (*okfn)(struct sk_buff *))
1616{
1617 return ip_vs_in(hooknum, skb, AF_INET6);
1618}
1619
1620/*
1621 * AF_INET6 handler in NF_INET_LOCAL_OUT chain
1622 * Schedule and forward packets from local clients
1623 */
1624static unsigned int
1625ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
1626 const struct net_device *in, const struct net_device *out,
1627 int (*okfn)(struct sk_buff *))
1628{
1629 unsigned int verdict;
1630
1631 /* Disable BH in LOCAL_OUT until all places are fixed */
1632 local_bh_disable();
1633 verdict = ip_vs_in(hooknum, skb, AF_INET6);
1634 local_bh_enable();
1635 return verdict;
1636}
1637
1638#endif
1639
1418 1640
1419/* 1641/*
1420 * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP 1642 * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
@@ -1455,23 +1677,39 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
1455 1677
1456 1678
1457static struct nf_hook_ops ip_vs_ops[] __read_mostly = { 1679static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1680 /* After packet filtering, change source only for VS/NAT */
1681 {
1682 .hook = ip_vs_reply4,
1683 .owner = THIS_MODULE,
1684 .pf = PF_INET,
1685 .hooknum = NF_INET_LOCAL_IN,
1686 .priority = 99,
1687 },
1458 /* After packet filtering, forward packet through VS/DR, VS/TUN, 1688 /* After packet filtering, forward packet through VS/DR, VS/TUN,
1459 * or VS/NAT(change destination), so that filtering rules can be 1689 * or VS/NAT(change destination), so that filtering rules can be
1460 * applied to IPVS. */ 1690 * applied to IPVS. */
1461 { 1691 {
1462 .hook = ip_vs_in, 1692 .hook = ip_vs_remote_request4,
1463 .owner = THIS_MODULE, 1693 .owner = THIS_MODULE,
1464 .pf = PF_INET, 1694 .pf = PF_INET,
1465 .hooknum = NF_INET_LOCAL_IN, 1695 .hooknum = NF_INET_LOCAL_IN,
1466 .priority = 100, 1696 .priority = 101,
1467 }, 1697 },
1468 /* After packet filtering, change source only for VS/NAT */ 1698 /* Before ip_vs_in, change source only for VS/NAT */
1469 { 1699 {
1470 .hook = ip_vs_out, 1700 .hook = ip_vs_local_reply4,
1471 .owner = THIS_MODULE, 1701 .owner = THIS_MODULE,
1472 .pf = PF_INET, 1702 .pf = PF_INET,
1473 .hooknum = NF_INET_FORWARD, 1703 .hooknum = NF_INET_LOCAL_OUT,
1474 .priority = 100, 1704 .priority = -99,
1705 },
1706 /* After mangle, schedule and forward local requests */
1707 {
1708 .hook = ip_vs_local_request4,
1709 .owner = THIS_MODULE,
1710 .pf = PF_INET,
1711 .hooknum = NF_INET_LOCAL_OUT,
1712 .priority = -98,
1475 }, 1713 },
1476 /* After packet filtering (but before ip_vs_out_icmp), catch icmp 1714 /* After packet filtering (but before ip_vs_out_icmp), catch icmp
1477 * destined for 0.0.0.0/0, which is for incoming IPVS connections */ 1715 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
@@ -1479,27 +1717,51 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1479 .hook = ip_vs_forward_icmp, 1717 .hook = ip_vs_forward_icmp,
1480 .owner = THIS_MODULE, 1718 .owner = THIS_MODULE,
1481 .pf = PF_INET, 1719 .pf = PF_INET,
1482 .hooknum = NF_INET_FORWARD, 1720 .hooknum = NF_INET_FORWARD,
1483 .priority = 99, 1721 .priority = 99,
1722 },
1723 /* After packet filtering, change source only for VS/NAT */
1724 {
1725 .hook = ip_vs_reply4,
1726 .owner = THIS_MODULE,
1727 .pf = PF_INET,
1728 .hooknum = NF_INET_FORWARD,
1729 .priority = 100,
1484 }, 1730 },
1485#ifdef CONFIG_IP_VS_IPV6 1731#ifdef CONFIG_IP_VS_IPV6
1732 /* After packet filtering, change source only for VS/NAT */
1733 {
1734 .hook = ip_vs_reply6,
1735 .owner = THIS_MODULE,
1736 .pf = PF_INET6,
1737 .hooknum = NF_INET_LOCAL_IN,
1738 .priority = 99,
1739 },
1486 /* After packet filtering, forward packet through VS/DR, VS/TUN, 1740 /* After packet filtering, forward packet through VS/DR, VS/TUN,
1487 * or VS/NAT(change destination), so that filtering rules can be 1741 * or VS/NAT(change destination), so that filtering rules can be
1488 * applied to IPVS. */ 1742 * applied to IPVS. */
1489 { 1743 {
1490 .hook = ip_vs_in, 1744 .hook = ip_vs_remote_request6,
1491 .owner = THIS_MODULE, 1745 .owner = THIS_MODULE,
1492 .pf = PF_INET6, 1746 .pf = PF_INET6,
1493 .hooknum = NF_INET_LOCAL_IN, 1747 .hooknum = NF_INET_LOCAL_IN,
1494 .priority = 100, 1748 .priority = 101,
1495 }, 1749 },
1496 /* After packet filtering, change source only for VS/NAT */ 1750 /* Before ip_vs_in, change source only for VS/NAT */
1751 {
1752 .hook = ip_vs_local_reply6,
1753 .owner = THIS_MODULE,
1754 .pf = PF_INET,
1755 .hooknum = NF_INET_LOCAL_OUT,
1756 .priority = -99,
1757 },
1758 /* After mangle, schedule and forward local requests */
1497 { 1759 {
1498 .hook = ip_vs_out, 1760 .hook = ip_vs_local_request6,
1499 .owner = THIS_MODULE, 1761 .owner = THIS_MODULE,
1500 .pf = PF_INET6, 1762 .pf = PF_INET6,
1501 .hooknum = NF_INET_FORWARD, 1763 .hooknum = NF_INET_LOCAL_OUT,
1502 .priority = 100, 1764 .priority = -98,
1503 }, 1765 },
1504 /* After packet filtering (but before ip_vs_out_icmp), catch icmp 1766 /* After packet filtering (but before ip_vs_out_icmp), catch icmp
1505 * destined for 0.0.0.0/0, which is for incoming IPVS connections */ 1767 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
@@ -1507,8 +1769,16 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1507 .hook = ip_vs_forward_icmp_v6, 1769 .hook = ip_vs_forward_icmp_v6,
1508 .owner = THIS_MODULE, 1770 .owner = THIS_MODULE,
1509 .pf = PF_INET6, 1771 .pf = PF_INET6,
1510 .hooknum = NF_INET_FORWARD, 1772 .hooknum = NF_INET_FORWARD,
1511 .priority = 99, 1773 .priority = 99,
1774 },
1775 /* After packet filtering, change source only for VS/NAT */
1776 {
1777 .hook = ip_vs_reply6,
1778 .owner = THIS_MODULE,
1779 .pf = PF_INET6,
1780 .hooknum = NF_INET_FORWARD,
1781 .priority = 100,
1512 }, 1782 },
1513#endif 1783#endif
1514}; 1784};
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ca8ec8c4f311..5f5daa30b0af 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -88,6 +88,10 @@ int sysctl_ip_vs_expire_nodest_conn = 0;
88int sysctl_ip_vs_expire_quiescent_template = 0; 88int sysctl_ip_vs_expire_quiescent_template = 0;
89int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; 89int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90int sysctl_ip_vs_nat_icmp_send = 0; 90int sysctl_ip_vs_nat_icmp_send = 0;
91#ifdef CONFIG_IP_VS_NFCT
92int sysctl_ip_vs_conntrack;
93#endif
94int sysctl_ip_vs_snat_reroute = 1;
91 95
92 96
93#ifdef CONFIG_IP_VS_DEBUG 97#ifdef CONFIG_IP_VS_DEBUG
@@ -401,7 +405,7 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
401 * Get service by {proto,addr,port} in the service table. 405 * Get service by {proto,addr,port} in the service table.
402 */ 406 */
403static inline struct ip_vs_service * 407static inline struct ip_vs_service *
404__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr, 408__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
405 __be16 vport) 409 __be16 vport)
406{ 410{
407 unsigned hash; 411 unsigned hash;
@@ -416,7 +420,6 @@ __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
416 && (svc->port == vport) 420 && (svc->port == vport)
417 && (svc->protocol == protocol)) { 421 && (svc->protocol == protocol)) {
418 /* HIT */ 422 /* HIT */
419 atomic_inc(&svc->usecnt);
420 return svc; 423 return svc;
421 } 424 }
422 } 425 }
@@ -429,7 +432,7 @@ __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
429 * Get service by {fwmark} in the service table. 432 * Get service by {fwmark} in the service table.
430 */ 433 */
431static inline struct ip_vs_service * 434static inline struct ip_vs_service *
432__ip_vs_svc_fwm_get(int af, __u32 fwmark) 435__ip_vs_svc_fwm_find(int af, __u32 fwmark)
433{ 436{
434 unsigned hash; 437 unsigned hash;
435 struct ip_vs_service *svc; 438 struct ip_vs_service *svc;
@@ -440,7 +443,6 @@ __ip_vs_svc_fwm_get(int af, __u32 fwmark)
440 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { 443 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
441 if (svc->fwmark == fwmark && svc->af == af) { 444 if (svc->fwmark == fwmark && svc->af == af) {
442 /* HIT */ 445 /* HIT */
443 atomic_inc(&svc->usecnt);
444 return svc; 446 return svc;
445 } 447 }
446 } 448 }
@@ -459,14 +461,14 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
459 /* 461 /*
460 * Check the table hashed by fwmark first 462 * Check the table hashed by fwmark first
461 */ 463 */
462 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark))) 464 if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
463 goto out; 465 goto out;
464 466
465 /* 467 /*
466 * Check the table hashed by <protocol,addr,port> 468 * Check the table hashed by <protocol,addr,port>
467 * for "full" addressed entries 469 * for "full" addressed entries
468 */ 470 */
469 svc = __ip_vs_service_get(af, protocol, vaddr, vport); 471 svc = __ip_vs_service_find(af, protocol, vaddr, vport);
470 472
471 if (svc == NULL 473 if (svc == NULL
472 && protocol == IPPROTO_TCP 474 && protocol == IPPROTO_TCP
@@ -476,7 +478,7 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
476 * Check if ftp service entry exists, the packet 478 * Check if ftp service entry exists, the packet
477 * might belong to FTP data connections. 479 * might belong to FTP data connections.
478 */ 480 */
479 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT); 481 svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
480 } 482 }
481 483
482 if (svc == NULL 484 if (svc == NULL
@@ -484,10 +486,12 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
484 /* 486 /*
485 * Check if the catch-all port (port zero) exists 487 * Check if the catch-all port (port zero) exists
486 */ 488 */
487 svc = __ip_vs_service_get(af, protocol, vaddr, 0); 489 svc = __ip_vs_service_find(af, protocol, vaddr, 0);
488 } 490 }
489 491
490 out: 492 out:
493 if (svc)
494 atomic_inc(&svc->usecnt);
491 read_unlock(&__ip_vs_svc_lock); 495 read_unlock(&__ip_vs_svc_lock);
492 496
493 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", 497 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
@@ -506,14 +510,19 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
506 dest->svc = svc; 510 dest->svc = svc;
507} 511}
508 512
509static inline void 513static void
510__ip_vs_unbind_svc(struct ip_vs_dest *dest) 514__ip_vs_unbind_svc(struct ip_vs_dest *dest)
511{ 515{
512 struct ip_vs_service *svc = dest->svc; 516 struct ip_vs_service *svc = dest->svc;
513 517
514 dest->svc = NULL; 518 dest->svc = NULL;
515 if (atomic_dec_and_test(&svc->refcnt)) 519 if (atomic_dec_and_test(&svc->refcnt)) {
520 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
521 svc->fwmark,
522 IP_VS_DBG_ADDR(svc->af, &svc->addr),
523 ntohs(svc->port), atomic_read(&svc->usecnt));
516 kfree(svc); 524 kfree(svc);
525 }
517} 526}
518 527
519 528
@@ -758,31 +767,18 @@ ip_vs_zero_stats(struct ip_vs_stats *stats)
758 * Update a destination in the given service 767 * Update a destination in the given service
759 */ 768 */
760static void 769static void
761__ip_vs_update_dest(struct ip_vs_service *svc, 770__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
762 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest) 771 struct ip_vs_dest_user_kern *udest, int add)
763{ 772{
764 int conn_flags; 773 int conn_flags;
765 774
766 /* set the weight and the flags */ 775 /* set the weight and the flags */
767 atomic_set(&dest->weight, udest->weight); 776 atomic_set(&dest->weight, udest->weight);
768 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; 777 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
769 778 conn_flags |= IP_VS_CONN_F_INACTIVE;
770 /* check if local node and update the flags */
771#ifdef CONFIG_IP_VS_IPV6
772 if (svc->af == AF_INET6) {
773 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
774 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
775 | IP_VS_CONN_F_LOCALNODE;
776 }
777 } else
778#endif
779 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
780 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
781 | IP_VS_CONN_F_LOCALNODE;
782 }
783 779
784 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ 780 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
785 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) { 781 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
786 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 782 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
787 } else { 783 } else {
788 /* 784 /*
@@ -813,6 +809,29 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
813 dest->flags &= ~IP_VS_DEST_F_OVERLOAD; 809 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
814 dest->u_threshold = udest->u_threshold; 810 dest->u_threshold = udest->u_threshold;
815 dest->l_threshold = udest->l_threshold; 811 dest->l_threshold = udest->l_threshold;
812
813 spin_lock(&dest->dst_lock);
814 ip_vs_dst_reset(dest);
815 spin_unlock(&dest->dst_lock);
816
817 if (add)
818 ip_vs_new_estimator(&dest->stats);
819
820 write_lock_bh(&__ip_vs_svc_lock);
821
822 /* Wait until all other svc users go away */
823 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
824
825 if (add) {
826 list_add(&dest->n_list, &svc->destinations);
827 svc->num_dests++;
828 }
829
830 /* call the update_service, because server weight may be changed */
831 if (svc->scheduler->update_service)
832 svc->scheduler->update_service(svc);
833
834 write_unlock_bh(&__ip_vs_svc_lock);
816} 835}
817 836
818 837
@@ -860,13 +879,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
860 atomic_set(&dest->activeconns, 0); 879 atomic_set(&dest->activeconns, 0);
861 atomic_set(&dest->inactconns, 0); 880 atomic_set(&dest->inactconns, 0);
862 atomic_set(&dest->persistconns, 0); 881 atomic_set(&dest->persistconns, 0);
863 atomic_set(&dest->refcnt, 0); 882 atomic_set(&dest->refcnt, 1);
864 883
865 INIT_LIST_HEAD(&dest->d_list); 884 INIT_LIST_HEAD(&dest->d_list);
866 spin_lock_init(&dest->dst_lock); 885 spin_lock_init(&dest->dst_lock);
867 spin_lock_init(&dest->stats.lock); 886 spin_lock_init(&dest->stats.lock);
868 __ip_vs_update_dest(svc, dest, udest); 887 __ip_vs_update_dest(svc, dest, udest, 1);
869 ip_vs_new_estimator(&dest->stats);
870 888
871 *dest_p = dest; 889 *dest_p = dest;
872 890
@@ -926,65 +944,22 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
926 IP_VS_DBG_ADDR(svc->af, &dest->vaddr), 944 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
927 ntohs(dest->vport)); 945 ntohs(dest->vport));
928 946
929 __ip_vs_update_dest(svc, dest, udest);
930
931 /* 947 /*
932 * Get the destination from the trash 948 * Get the destination from the trash
933 */ 949 */
934 list_del(&dest->n_list); 950 list_del(&dest->n_list);
935 951
936 ip_vs_new_estimator(&dest->stats); 952 __ip_vs_update_dest(svc, dest, udest, 1);
937 953 ret = 0;
938 write_lock_bh(&__ip_vs_svc_lock); 954 } else {
939
940 /* 955 /*
941 * Wait until all other svc users go away. 956 * Allocate and initialize the dest structure
942 */ 957 */
943 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 958 ret = ip_vs_new_dest(svc, udest, &dest);
944
945 list_add(&dest->n_list, &svc->destinations);
946 svc->num_dests++;
947
948 /* call the update_service function of its scheduler */
949 if (svc->scheduler->update_service)
950 svc->scheduler->update_service(svc);
951
952 write_unlock_bh(&__ip_vs_svc_lock);
953 return 0;
954 }
955
956 /*
957 * Allocate and initialize the dest structure
958 */
959 ret = ip_vs_new_dest(svc, udest, &dest);
960 if (ret) {
961 return ret;
962 } 959 }
963
964 /*
965 * Add the dest entry into the list
966 */
967 atomic_inc(&dest->refcnt);
968
969 write_lock_bh(&__ip_vs_svc_lock);
970
971 /*
972 * Wait until all other svc users go away.
973 */
974 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
975
976 list_add(&dest->n_list, &svc->destinations);
977 svc->num_dests++;
978
979 /* call the update_service function of its scheduler */
980 if (svc->scheduler->update_service)
981 svc->scheduler->update_service(svc);
982
983 write_unlock_bh(&__ip_vs_svc_lock);
984
985 LeaveFunction(2); 960 LeaveFunction(2);
986 961
987 return 0; 962 return ret;
988} 963}
989 964
990 965
@@ -1023,19 +998,7 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1023 return -ENOENT; 998 return -ENOENT;
1024 } 999 }
1025 1000
1026 __ip_vs_update_dest(svc, dest, udest); 1001 __ip_vs_update_dest(svc, dest, udest, 0);
1027
1028 write_lock_bh(&__ip_vs_svc_lock);
1029
1030 /* Wait until all other svc users go away */
1031 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1032
1033 /* call the update_service, because server weight may be changed */
1034 if (svc->scheduler->update_service)
1035 svc->scheduler->update_service(svc);
1036
1037 write_unlock_bh(&__ip_vs_svc_lock);
1038
1039 LeaveFunction(2); 1002 LeaveFunction(2);
1040 1003
1041 return 0; 1004 return 0;
@@ -1062,6 +1025,10 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1062 * the destination into the trash. 1025 * the destination into the trash.
1063 */ 1026 */
1064 if (atomic_dec_and_test(&dest->refcnt)) { 1027 if (atomic_dec_and_test(&dest->refcnt)) {
1028 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1029 dest->vfwmark,
1030 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1031 ntohs(dest->port));
1065 ip_vs_dst_reset(dest); 1032 ip_vs_dst_reset(dest);
1066 /* simply decrease svc->refcnt here, let the caller check 1033 /* simply decrease svc->refcnt here, let the caller check
1067 and release the service if nobody refers to it. 1034 and release the service if nobody refers to it.
@@ -1128,7 +1095,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1128 /* 1095 /*
1129 * Wait until all other svc users go away. 1096 * Wait until all other svc users go away.
1130 */ 1097 */
1131 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 1098 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1132 1099
1133 /* 1100 /*
1134 * Unlink dest from the service 1101 * Unlink dest from the service
@@ -1157,6 +1124,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1157{ 1124{
1158 int ret = 0; 1125 int ret = 0;
1159 struct ip_vs_scheduler *sched = NULL; 1126 struct ip_vs_scheduler *sched = NULL;
1127 struct ip_vs_pe *pe = NULL;
1160 struct ip_vs_service *svc = NULL; 1128 struct ip_vs_service *svc = NULL;
1161 1129
1162 /* increase the module use count */ 1130 /* increase the module use count */
@@ -1167,7 +1135,17 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1167 if (sched == NULL) { 1135 if (sched == NULL) {
1168 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); 1136 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1169 ret = -ENOENT; 1137 ret = -ENOENT;
1170 goto out_mod_dec; 1138 goto out_err;
1139 }
1140
1141 if (u->pe_name && *u->pe_name) {
1142 pe = ip_vs_pe_get(u->pe_name);
1143 if (pe == NULL) {
1144 pr_info("persistence engine module ip_vs_pe_%s "
1145 "not found\n", u->pe_name);
1146 ret = -ENOENT;
1147 goto out_err;
1148 }
1171 } 1149 }
1172 1150
1173#ifdef CONFIG_IP_VS_IPV6 1151#ifdef CONFIG_IP_VS_IPV6
@@ -1185,7 +1163,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1185 } 1163 }
1186 1164
1187 /* I'm the first user of the service */ 1165 /* I'm the first user of the service */
1188 atomic_set(&svc->usecnt, 1); 1166 atomic_set(&svc->usecnt, 0);
1189 atomic_set(&svc->refcnt, 0); 1167 atomic_set(&svc->refcnt, 0);
1190 1168
1191 svc->af = u->af; 1169 svc->af = u->af;
@@ -1207,6 +1185,10 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1207 goto out_err; 1185 goto out_err;
1208 sched = NULL; 1186 sched = NULL;
1209 1187
1188 /* Bind the ct retriever */
1189 ip_vs_bind_pe(svc, pe);
1190 pe = NULL;
1191
1210 /* Update the virtual service counters */ 1192 /* Update the virtual service counters */
1211 if (svc->port == FTPPORT) 1193 if (svc->port == FTPPORT)
1212 atomic_inc(&ip_vs_ftpsvc_counter); 1194 atomic_inc(&ip_vs_ftpsvc_counter);
@@ -1227,10 +1209,9 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1227 *svc_p = svc; 1209 *svc_p = svc;
1228 return 0; 1210 return 0;
1229 1211
1230 out_err: 1212 out_err:
1231 if (svc != NULL) { 1213 if (svc != NULL) {
1232 if (svc->scheduler) 1214 ip_vs_unbind_scheduler(svc);
1233 ip_vs_unbind_scheduler(svc);
1234 if (svc->inc) { 1215 if (svc->inc) {
1235 local_bh_disable(); 1216 local_bh_disable();
1236 ip_vs_app_inc_put(svc->inc); 1217 ip_vs_app_inc_put(svc->inc);
@@ -1239,8 +1220,8 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1239 kfree(svc); 1220 kfree(svc);
1240 } 1221 }
1241 ip_vs_scheduler_put(sched); 1222 ip_vs_scheduler_put(sched);
1223 ip_vs_pe_put(pe);
1242 1224
1243 out_mod_dec:
1244 /* decrease the module use count */ 1225 /* decrease the module use count */
1245 ip_vs_use_count_dec(); 1226 ip_vs_use_count_dec();
1246 1227
@@ -1255,6 +1236,7 @@ static int
1255ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) 1236ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1256{ 1237{
1257 struct ip_vs_scheduler *sched, *old_sched; 1238 struct ip_vs_scheduler *sched, *old_sched;
1239 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1258 int ret = 0; 1240 int ret = 0;
1259 1241
1260 /* 1242 /*
@@ -1267,6 +1249,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1267 } 1249 }
1268 old_sched = sched; 1250 old_sched = sched;
1269 1251
1252 if (u->pe_name && *u->pe_name) {
1253 pe = ip_vs_pe_get(u->pe_name);
1254 if (pe == NULL) {
1255 pr_info("persistence engine module ip_vs_pe_%s "
1256 "not found\n", u->pe_name);
1257 ret = -ENOENT;
1258 goto out;
1259 }
1260 old_pe = pe;
1261 }
1262
1270#ifdef CONFIG_IP_VS_IPV6 1263#ifdef CONFIG_IP_VS_IPV6
1271 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { 1264 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1272 ret = -EINVAL; 1265 ret = -EINVAL;
@@ -1279,7 +1272,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1279 /* 1272 /*
1280 * Wait until all other svc users go away. 1273 * Wait until all other svc users go away.
1281 */ 1274 */
1282 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 1275 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1283 1276
1284 /* 1277 /*
1285 * Set the flags and timeout value 1278 * Set the flags and timeout value
@@ -1318,15 +1311,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1318 } 1311 }
1319 } 1312 }
1320 1313
1314 old_pe = svc->pe;
1315 if (pe != old_pe) {
1316 ip_vs_unbind_pe(svc);
1317 ip_vs_bind_pe(svc, pe);
1318 }
1319
1321 out_unlock: 1320 out_unlock:
1322 write_unlock_bh(&__ip_vs_svc_lock); 1321 write_unlock_bh(&__ip_vs_svc_lock);
1323#ifdef CONFIG_IP_VS_IPV6
1324 out: 1322 out:
1325#endif 1323 ip_vs_scheduler_put(old_sched);
1326 1324 ip_vs_pe_put(old_pe);
1327 if (old_sched)
1328 ip_vs_scheduler_put(old_sched);
1329
1330 return ret; 1325 return ret;
1331} 1326}
1332 1327
@@ -1340,6 +1335,9 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1340{ 1335{
1341 struct ip_vs_dest *dest, *nxt; 1336 struct ip_vs_dest *dest, *nxt;
1342 struct ip_vs_scheduler *old_sched; 1337 struct ip_vs_scheduler *old_sched;
1338 struct ip_vs_pe *old_pe;
1339
1340 pr_info("%s: enter\n", __func__);
1343 1341
1344 /* Count only IPv4 services for old get/setsockopt interface */ 1342 /* Count only IPv4 services for old get/setsockopt interface */
1345 if (svc->af == AF_INET) 1343 if (svc->af == AF_INET)
@@ -1350,8 +1348,12 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1350 /* Unbind scheduler */ 1348 /* Unbind scheduler */
1351 old_sched = svc->scheduler; 1349 old_sched = svc->scheduler;
1352 ip_vs_unbind_scheduler(svc); 1350 ip_vs_unbind_scheduler(svc);
1353 if (old_sched) 1351 ip_vs_scheduler_put(old_sched);
1354 ip_vs_scheduler_put(old_sched); 1352
1353 /* Unbind persistence engine */
1354 old_pe = svc->pe;
1355 ip_vs_unbind_pe(svc);
1356 ip_vs_pe_put(old_pe);
1355 1357
1356 /* Unbind app inc */ 1358 /* Unbind app inc */
1357 if (svc->inc) { 1359 if (svc->inc) {
@@ -1378,21 +1380,23 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1378 /* 1380 /*
1379 * Free the service if nobody refers to it 1381 * Free the service if nobody refers to it
1380 */ 1382 */
1381 if (atomic_read(&svc->refcnt) == 0) 1383 if (atomic_read(&svc->refcnt) == 0) {
1384 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1385 svc->fwmark,
1386 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1387 ntohs(svc->port), atomic_read(&svc->usecnt));
1382 kfree(svc); 1388 kfree(svc);
1389 }
1383 1390
1384 /* decrease the module use count */ 1391 /* decrease the module use count */
1385 ip_vs_use_count_dec(); 1392 ip_vs_use_count_dec();
1386} 1393}
1387 1394
1388/* 1395/*
1389 * Delete a service from the service list 1396 * Unlink a service from list and try to delete it if its refcnt reached 0
1390 */ 1397 */
1391static int ip_vs_del_service(struct ip_vs_service *svc) 1398static void ip_vs_unlink_service(struct ip_vs_service *svc)
1392{ 1399{
1393 if (svc == NULL)
1394 return -EEXIST;
1395
1396 /* 1400 /*
1397 * Unhash it from the service table 1401 * Unhash it from the service table
1398 */ 1402 */
@@ -1403,11 +1407,21 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
1403 /* 1407 /*
1404 * Wait until all the svc users go away. 1408 * Wait until all the svc users go away.
1405 */ 1409 */
1406 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 1410 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1407 1411
1408 __ip_vs_del_service(svc); 1412 __ip_vs_del_service(svc);
1409 1413
1410 write_unlock_bh(&__ip_vs_svc_lock); 1414 write_unlock_bh(&__ip_vs_svc_lock);
1415}
1416
1417/*
1418 * Delete a service from the service list
1419 */
1420static int ip_vs_del_service(struct ip_vs_service *svc)
1421{
1422 if (svc == NULL)
1423 return -EEXIST;
1424 ip_vs_unlink_service(svc);
1411 1425
1412 return 0; 1426 return 0;
1413} 1427}
@@ -1426,14 +1440,7 @@ static int ip_vs_flush(void)
1426 */ 1440 */
1427 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1441 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1428 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) { 1442 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1429 write_lock_bh(&__ip_vs_svc_lock); 1443 ip_vs_unlink_service(svc);
1430 ip_vs_svc_unhash(svc);
1431 /*
1432 * Wait until all the svc users go away.
1433 */
1434 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1435 __ip_vs_del_service(svc);
1436 write_unlock_bh(&__ip_vs_svc_lock);
1437 } 1444 }
1438 } 1445 }
1439 1446
@@ -1443,14 +1450,7 @@ static int ip_vs_flush(void)
1443 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1450 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1444 list_for_each_entry_safe(svc, nxt, 1451 list_for_each_entry_safe(svc, nxt,
1445 &ip_vs_svc_fwm_table[idx], f_list) { 1452 &ip_vs_svc_fwm_table[idx], f_list) {
1446 write_lock_bh(&__ip_vs_svc_lock); 1453 ip_vs_unlink_service(svc);
1447 ip_vs_svc_unhash(svc);
1448 /*
1449 * Wait until all the svc users go away.
1450 */
1451 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1452 __ip_vs_del_service(svc);
1453 write_unlock_bh(&__ip_vs_svc_lock);
1454 } 1454 }
1455 } 1455 }
1456 1456
@@ -1579,6 +1579,15 @@ static struct ctl_table vs_vars[] = {
1579 .mode = 0644, 1579 .mode = 0644,
1580 .proc_handler = proc_do_defense_mode, 1580 .proc_handler = proc_do_defense_mode,
1581 }, 1581 },
1582#ifdef CONFIG_IP_VS_NFCT
1583 {
1584 .procname = "conntrack",
1585 .data = &sysctl_ip_vs_conntrack,
1586 .maxlen = sizeof(int),
1587 .mode = 0644,
1588 .proc_handler = &proc_dointvec,
1589 },
1590#endif
1582 { 1591 {
1583 .procname = "secure_tcp", 1592 .procname = "secure_tcp",
1584 .data = &sysctl_ip_vs_secure_tcp, 1593 .data = &sysctl_ip_vs_secure_tcp,
@@ -1586,6 +1595,13 @@ static struct ctl_table vs_vars[] = {
1586 .mode = 0644, 1595 .mode = 0644,
1587 .proc_handler = proc_do_defense_mode, 1596 .proc_handler = proc_do_defense_mode,
1588 }, 1597 },
1598 {
1599 .procname = "snat_reroute",
1600 .data = &sysctl_ip_vs_snat_reroute,
1601 .maxlen = sizeof(int),
1602 .mode = 0644,
1603 .proc_handler = &proc_dointvec,
1604 },
1589#if 0 1605#if 0
1590 { 1606 {
1591 .procname = "timeout_established", 1607 .procname = "timeout_established",
@@ -2041,6 +2057,8 @@ static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2041static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, 2057static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2042 struct ip_vs_service_user *usvc_compat) 2058 struct ip_vs_service_user *usvc_compat)
2043{ 2059{
2060 memset(usvc, 0, sizeof(*usvc));
2061
2044 usvc->af = AF_INET; 2062 usvc->af = AF_INET;
2045 usvc->protocol = usvc_compat->protocol; 2063 usvc->protocol = usvc_compat->protocol;
2046 usvc->addr.ip = usvc_compat->addr; 2064 usvc->addr.ip = usvc_compat->addr;
@@ -2058,6 +2076,8 @@ static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2058static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, 2076static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2059 struct ip_vs_dest_user *udest_compat) 2077 struct ip_vs_dest_user *udest_compat)
2060{ 2078{
2079 memset(udest, 0, sizeof(*udest));
2080
2061 udest->addr.ip = udest_compat->addr; 2081 udest->addr.ip = udest_compat->addr;
2062 udest->port = udest_compat->port; 2082 udest->port = udest_compat->port;
2063 udest->conn_flags = udest_compat->conn_flags; 2083 udest->conn_flags = udest_compat->conn_flags;
@@ -2147,15 +2167,15 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2147 2167
2148 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2168 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2149 if (usvc.fwmark == 0) 2169 if (usvc.fwmark == 0)
2150 svc = __ip_vs_service_get(usvc.af, usvc.protocol, 2170 svc = __ip_vs_service_find(usvc.af, usvc.protocol,
2151 &usvc.addr, usvc.port); 2171 &usvc.addr, usvc.port);
2152 else 2172 else
2153 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); 2173 svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
2154 2174
2155 if (cmd != IP_VS_SO_SET_ADD 2175 if (cmd != IP_VS_SO_SET_ADD
2156 && (svc == NULL || svc->protocol != usvc.protocol)) { 2176 && (svc == NULL || svc->protocol != usvc.protocol)) {
2157 ret = -ESRCH; 2177 ret = -ESRCH;
2158 goto out_drop_service; 2178 goto out_unlock;
2159 } 2179 }
2160 2180
2161 switch (cmd) { 2181 switch (cmd) {
@@ -2189,10 +2209,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2189 ret = -EINVAL; 2209 ret = -EINVAL;
2190 } 2210 }
2191 2211
2192out_drop_service:
2193 if (svc)
2194 ip_vs_service_put(svc);
2195
2196 out_unlock: 2212 out_unlock:
2197 mutex_unlock(&__ip_vs_mutex); 2213 mutex_unlock(&__ip_vs_mutex);
2198 out_dec: 2214 out_dec:
@@ -2285,10 +2301,10 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2285 int ret = 0; 2301 int ret = 0;
2286 2302
2287 if (get->fwmark) 2303 if (get->fwmark)
2288 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark); 2304 svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
2289 else 2305 else
2290 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr, 2306 svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
2291 get->port); 2307 get->port);
2292 2308
2293 if (svc) { 2309 if (svc) {
2294 int count = 0; 2310 int count = 0;
@@ -2316,7 +2332,6 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2316 } 2332 }
2317 count++; 2333 count++;
2318 } 2334 }
2319 ip_vs_service_put(svc);
2320 } else 2335 } else
2321 ret = -ESRCH; 2336 ret = -ESRCH;
2322 return ret; 2337 return ret;
@@ -2437,15 +2452,14 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2437 entry = (struct ip_vs_service_entry *)arg; 2452 entry = (struct ip_vs_service_entry *)arg;
2438 addr.ip = entry->addr; 2453 addr.ip = entry->addr;
2439 if (entry->fwmark) 2454 if (entry->fwmark)
2440 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark); 2455 svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);
2441 else 2456 else
2442 svc = __ip_vs_service_get(AF_INET, entry->protocol, 2457 svc = __ip_vs_service_find(AF_INET, entry->protocol,
2443 &addr, entry->port); 2458 &addr, entry->port);
2444 if (svc) { 2459 if (svc) {
2445 ip_vs_copy_service(entry, svc); 2460 ip_vs_copy_service(entry, svc);
2446 if (copy_to_user(user, entry, sizeof(*entry)) != 0) 2461 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2447 ret = -EFAULT; 2462 ret = -EFAULT;
2448 ip_vs_service_put(svc);
2449 } else 2463 } else
2450 ret = -ESRCH; 2464 ret = -ESRCH;
2451 } 2465 }
@@ -2560,6 +2574,8 @@ static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2560 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, 2574 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2561 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, 2575 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2562 .len = IP_VS_SCHEDNAME_MAXLEN }, 2576 .len = IP_VS_SCHEDNAME_MAXLEN },
2577 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2578 .len = IP_VS_PENAME_MAXLEN },
2563 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, 2579 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2564 .len = sizeof(struct ip_vs_flags) }, 2580 .len = sizeof(struct ip_vs_flags) },
2565 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, 2581 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
@@ -2636,6 +2652,8 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
2636 } 2652 }
2637 2653
2638 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name); 2654 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2655 if (svc->pe)
2656 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2639 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags); 2657 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2640 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ); 2658 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2641 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask); 2659 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
@@ -2712,10 +2730,12 @@ nla_put_failure:
2712} 2730}
2713 2731
2714static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, 2732static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2715 struct nlattr *nla, int full_entry) 2733 struct nlattr *nla, int full_entry,
2734 struct ip_vs_service **ret_svc)
2716{ 2735{
2717 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; 2736 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2718 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; 2737 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2738 struct ip_vs_service *svc;
2719 2739
2720 /* Parse mandatory identifying service fields first */ 2740 /* Parse mandatory identifying service fields first */
2721 if (nla == NULL || 2741 if (nla == NULL ||
@@ -2751,14 +2771,21 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2751 usvc->fwmark = 0; 2771 usvc->fwmark = 0;
2752 } 2772 }
2753 2773
2774 if (usvc->fwmark)
2775 svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);
2776 else
2777 svc = __ip_vs_service_find(usvc->af, usvc->protocol,
2778 &usvc->addr, usvc->port);
2779 *ret_svc = svc;
2780
2754 /* If a full entry was requested, check for the additional fields */ 2781 /* If a full entry was requested, check for the additional fields */
2755 if (full_entry) { 2782 if (full_entry) {
2756 struct nlattr *nla_sched, *nla_flags, *nla_timeout, 2783 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2757 *nla_netmask; 2784 *nla_netmask;
2758 struct ip_vs_flags flags; 2785 struct ip_vs_flags flags;
2759 struct ip_vs_service *svc;
2760 2786
2761 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; 2787 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2788 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2762 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; 2789 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2763 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; 2790 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2764 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; 2791 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
@@ -2769,21 +2796,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2769 nla_memcpy(&flags, nla_flags, sizeof(flags)); 2796 nla_memcpy(&flags, nla_flags, sizeof(flags));
2770 2797
2771 /* prefill flags from service if it already exists */ 2798 /* prefill flags from service if it already exists */
2772 if (usvc->fwmark) 2799 if (svc)
2773 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
2774 else
2775 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2776 &usvc->addr, usvc->port);
2777 if (svc) {
2778 usvc->flags = svc->flags; 2800 usvc->flags = svc->flags;
2779 ip_vs_service_put(svc);
2780 } else
2781 usvc->flags = 0;
2782 2801
2783 /* set new flags from userland */ 2802 /* set new flags from userland */
2784 usvc->flags = (usvc->flags & ~flags.mask) | 2803 usvc->flags = (usvc->flags & ~flags.mask) |
2785 (flags.flags & flags.mask); 2804 (flags.flags & flags.mask);
2786 usvc->sched_name = nla_data(nla_sched); 2805 usvc->sched_name = nla_data(nla_sched);
2806 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2787 usvc->timeout = nla_get_u32(nla_timeout); 2807 usvc->timeout = nla_get_u32(nla_timeout);
2788 usvc->netmask = nla_get_u32(nla_netmask); 2808 usvc->netmask = nla_get_u32(nla_netmask);
2789 } 2809 }
@@ -2794,17 +2814,11 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2794static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) 2814static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2795{ 2815{
2796 struct ip_vs_service_user_kern usvc; 2816 struct ip_vs_service_user_kern usvc;
2817 struct ip_vs_service *svc;
2797 int ret; 2818 int ret;
2798 2819
2799 ret = ip_vs_genl_parse_service(&usvc, nla, 0); 2820 ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);
2800 if (ret) 2821 return ret ? ERR_PTR(ret) : svc;
2801 return ERR_PTR(ret);
2802
2803 if (usvc.fwmark)
2804 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2805 else
2806 return __ip_vs_service_get(usvc.af, usvc.protocol,
2807 &usvc.addr, usvc.port);
2808} 2822}
2809 2823
2810static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) 2824static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
@@ -2895,7 +2909,6 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2895 2909
2896nla_put_failure: 2910nla_put_failure:
2897 cb->args[0] = idx; 2911 cb->args[0] = idx;
2898 ip_vs_service_put(svc);
2899 2912
2900out_err: 2913out_err:
2901 mutex_unlock(&__ip_vs_mutex); 2914 mutex_unlock(&__ip_vs_mutex);
@@ -3108,17 +3121,10 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3108 3121
3109 ret = ip_vs_genl_parse_service(&usvc, 3122 ret = ip_vs_genl_parse_service(&usvc,
3110 info->attrs[IPVS_CMD_ATTR_SERVICE], 3123 info->attrs[IPVS_CMD_ATTR_SERVICE],
3111 need_full_svc); 3124 need_full_svc, &svc);
3112 if (ret) 3125 if (ret)
3113 goto out; 3126 goto out;
3114 3127
3115 /* Lookup the exact service by <protocol, addr, port> or fwmark */
3116 if (usvc.fwmark == 0)
3117 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3118 &usvc.addr, usvc.port);
3119 else
3120 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
3121
3122 /* Unless we're adding a new service, the service must already exist */ 3128 /* Unless we're adding a new service, the service must already exist */
3123 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { 3129 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3124 ret = -ESRCH; 3130 ret = -ESRCH;
@@ -3152,6 +3158,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3152 break; 3158 break;
3153 case IPVS_CMD_DEL_SERVICE: 3159 case IPVS_CMD_DEL_SERVICE:
3154 ret = ip_vs_del_service(svc); 3160 ret = ip_vs_del_service(svc);
3161 /* do not use svc, it can be freed */
3155 break; 3162 break;
3156 case IPVS_CMD_NEW_DEST: 3163 case IPVS_CMD_NEW_DEST:
3157 ret = ip_vs_add_dest(svc, &udest); 3164 ret = ip_vs_add_dest(svc, &udest);
@@ -3170,8 +3177,6 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3170 } 3177 }
3171 3178
3172out: 3179out:
3173 if (svc)
3174 ip_vs_service_put(svc);
3175 mutex_unlock(&__ip_vs_mutex); 3180 mutex_unlock(&__ip_vs_mutex);
3176 3181
3177 return ret; 3182 return ret;
@@ -3217,7 +3222,6 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3217 goto out_err; 3222 goto out_err;
3218 } else if (svc) { 3223 } else if (svc) {
3219 ret = ip_vs_genl_fill_service(msg, svc); 3224 ret = ip_vs_genl_fill_service(msg, svc);
3220 ip_vs_service_put(svc);
3221 if (ret) 3225 if (ret)
3222 goto nla_put_failure; 3226 goto nla_put_failure;
3223 } else { 3227 } else {
@@ -3386,6 +3390,16 @@ int __init ip_vs_control_init(void)
3386 3390
3387 EnterFunction(2); 3391 EnterFunction(2);
3388 3392
3393 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3394 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3395 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3396 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3397 }
3398 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3399 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3400 }
3401 smp_wmb();
3402
3389 ret = nf_register_sockopt(&ip_vs_sockopts); 3403 ret = nf_register_sockopt(&ip_vs_sockopts);
3390 if (ret) { 3404 if (ret) {
3391 pr_err("cannot register sockopt.\n"); 3405 pr_err("cannot register sockopt.\n");
@@ -3404,15 +3418,6 @@ int __init ip_vs_control_init(void)
3404 3418
3405 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars); 3419 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3406 3420
3407 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3408 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3409 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3410 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3411 }
3412 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3413 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3414 }
3415
3416 ip_vs_new_estimator(&ip_vs_stats); 3421 ip_vs_new_estimator(&ip_vs_stats);
3417 3422
3418 /* Hook the defense timer */ 3423 /* Hook the defense timer */
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 7e9af5b76d9e..75455000ad1c 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -20,17 +20,6 @@
20 * 20 *
21 * Author: Wouter Gadeyne 21 * Author: Wouter Gadeyne
22 * 22 *
23 *
24 * Code for ip_vs_expect_related and ip_vs_expect_callback is taken from
25 * http://www.ssi.bg/~ja/nfct/:
26 *
27 * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
28 *
29 * Portions Copyright (C) 2001-2002
30 * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
31 *
32 * Portions Copyright (C) 2003-2008
33 * Julian Anastasov
34 */ 23 */
35 24
36#define KMSG_COMPONENT "IPVS" 25#define KMSG_COMPONENT "IPVS"
@@ -58,16 +47,6 @@
58#define SERVER_STRING "227 Entering Passive Mode (" 47#define SERVER_STRING "227 Entering Passive Mode ("
59#define CLIENT_STRING "PORT " 48#define CLIENT_STRING "PORT "
60 49
61#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u"
62#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \
63 &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
64 (T)->dst.protonum
65
66#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
67#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \
68 &((C)->vaddr.ip), ntohs((C)->vport), \
69 &((C)->daddr.ip), ntohs((C)->dport), \
70 (C)->protocol, (C)->state
71 50
72/* 51/*
73 * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper 52 * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
@@ -85,6 +64,8 @@ static int ip_vs_ftp_pasv;
85static int 64static int
86ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp) 65ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
87{ 66{
67 /* We use connection tracking for the command connection */
68 cp->flags |= IP_VS_CONN_F_NFCT;
88 return 0; 69 return 0;
89} 70}
90 71
@@ -149,120 +130,6 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
149} 130}
150 131
151/* 132/*
152 * Called from init_conntrack() as expectfn handler.
153 */
154static void
155ip_vs_expect_callback(struct nf_conn *ct,
156 struct nf_conntrack_expect *exp)
157{
158 struct nf_conntrack_tuple *orig, new_reply;
159 struct ip_vs_conn *cp;
160
161 if (exp->tuple.src.l3num != PF_INET)
162 return;
163
164 /*
165 * We assume that no NF locks are held before this callback.
166 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
167 * expectations even if they use wildcard values, now we provide the
168 * actual values from the newly created original conntrack direction.
169 * The conntrack is confirmed when packet reaches IPVS hooks.
170 */
171
172 /* RS->CLIENT */
173 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
174 cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum,
175 &orig->src.u3, orig->src.u.tcp.port,
176 &orig->dst.u3, orig->dst.u.tcp.port);
177 if (cp) {
178 /* Change reply CLIENT->RS to CLIENT->VS */
179 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
180 IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
181 FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
182 __func__, ct, ct->status,
183 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
184 ARG_CONN(cp));
185 new_reply.dst.u3 = cp->vaddr;
186 new_reply.dst.u.tcp.port = cp->vport;
187 IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
188 ", inout cp=" FMT_CONN "\n",
189 __func__, ct,
190 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
191 ARG_CONN(cp));
192 goto alter;
193 }
194
195 /* CLIENT->VS */
196 cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum,
197 &orig->src.u3, orig->src.u.tcp.port,
198 &orig->dst.u3, orig->dst.u.tcp.port);
199 if (cp) {
200 /* Change reply VS->CLIENT to RS->CLIENT */
201 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
202 IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
203 FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
204 __func__, ct, ct->status,
205 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
206 ARG_CONN(cp));
207 new_reply.src.u3 = cp->daddr;
208 new_reply.src.u.tcp.port = cp->dport;
209 IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", "
210 FMT_TUPLE ", outin cp=" FMT_CONN "\n",
211 __func__, ct,
212 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
213 ARG_CONN(cp));
214 goto alter;
215 }
216
217 IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE
218 " - unknown expect\n",
219 __func__, ct, ct->status, ARG_TUPLE(orig));
220 return;
221
222alter:
223 /* Never alter conntrack for non-NAT conns */
224 if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
225 nf_conntrack_alter_reply(ct, &new_reply);
226 ip_vs_conn_put(cp);
227 return;
228}
229
230/*
231 * Create NF conntrack expectation with wildcard (optional) source port.
232 * Then the default callback function will alter the reply and will confirm
233 * the conntrack entry when the first packet comes.
234 */
235static void
236ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct,
237 struct ip_vs_conn *cp, u_int8_t proto,
238 const __be16 *port, int from_rs)
239{
240 struct nf_conntrack_expect *exp;
241
242 BUG_ON(!ct || ct == &nf_conntrack_untracked);
243
244 exp = nf_ct_expect_alloc(ct);
245 if (!exp)
246 return;
247
248 if (from_rs)
249 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
250 nf_ct_l3num(ct), &cp->daddr, &cp->caddr,
251 proto, port, &cp->cport);
252 else
253 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
254 nf_ct_l3num(ct), &cp->caddr, &cp->vaddr,
255 proto, port, &cp->vport);
256
257 exp->expectfn = ip_vs_expect_callback;
258
259 IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n",
260 __func__, ct, ARG_TUPLE(&exp->tuple));
261 nf_ct_expect_related(exp);
262 nf_ct_expect_put(exp);
263}
264
265/*
266 * Look at outgoing ftp packets to catch the response to a PASV command 133 * Look at outgoing ftp packets to catch the response to a PASV command
267 * from the server (inside-to-outside). 134 * from the server (inside-to-outside).
268 * When we see one, we build a connection entry with the client address, 135 * When we see one, we build a connection entry with the client address,
@@ -328,14 +195,19 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
328 /* 195 /*
329 * Now update or create an connection entry for it 196 * Now update or create an connection entry for it
330 */ 197 */
331 n_cp = ip_vs_conn_out_get(AF_INET, iph->protocol, &from, port, 198 {
332 &cp->caddr, 0); 199 struct ip_vs_conn_param p;
200 ip_vs_conn_fill_param(AF_INET, iph->protocol,
201 &from, port, &cp->caddr, 0, &p);
202 n_cp = ip_vs_conn_out_get(&p);
203 }
333 if (!n_cp) { 204 if (!n_cp) {
334 n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP, 205 struct ip_vs_conn_param p;
335 &cp->caddr, 0, 206 ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr,
336 &cp->vaddr, port, 207 0, &cp->vaddr, port, &p);
337 &from, port, 208 n_cp = ip_vs_conn_new(&p, &from, port,
338 IP_VS_CONN_F_NO_CPORT, 209 IP_VS_CONN_F_NO_CPORT |
210 IP_VS_CONN_F_NFCT,
339 cp->dest); 211 cp->dest);
340 if (!n_cp) 212 if (!n_cp)
341 return 0; 213 return 0;
@@ -370,9 +242,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
370 ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, 242 ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
371 start-data, end-start, 243 start-data, end-start,
372 buf, buf_len); 244 buf, buf_len);
373 if (ret) 245 if (ret) {
374 ip_vs_expect_related(skb, ct, n_cp, 246 ip_vs_nfct_expect_related(skb, ct, n_cp,
375 IPPROTO_TCP, NULL, 0); 247 IPPROTO_TCP, 0, 0);
248 if (skb->ip_summed == CHECKSUM_COMPLETE)
249 skb->ip_summed = CHECKSUM_UNNECESSARY;
250 /* csum is updated */
251 ret = 1;
252 }
376 } 253 }
377 254
378 /* 255 /*
@@ -479,21 +356,22 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
479 ip_vs_proto_name(iph->protocol), 356 ip_vs_proto_name(iph->protocol),
480 &to.ip, ntohs(port), &cp->vaddr.ip, 0); 357 &to.ip, ntohs(port), &cp->vaddr.ip, 0);
481 358
482 n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol, 359 {
483 &to, port, 360 struct ip_vs_conn_param p;
484 &cp->vaddr, htons(ntohs(cp->vport)-1)); 361 ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port,
485 if (!n_cp) {
486 n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
487 &to, port,
488 &cp->vaddr, htons(ntohs(cp->vport)-1), 362 &cp->vaddr, htons(ntohs(cp->vport)-1),
489 &cp->daddr, htons(ntohs(cp->dport)-1), 363 &p);
490 0, 364 n_cp = ip_vs_conn_in_get(&p);
491 cp->dest); 365 if (!n_cp) {
492 if (!n_cp) 366 n_cp = ip_vs_conn_new(&p, &cp->daddr,
493 return 0; 367 htons(ntohs(cp->dport)-1),
368 IP_VS_CONN_F_NFCT, cp->dest);
369 if (!n_cp)
370 return 0;
494 371
495 /* add its controller */ 372 /* add its controller */
496 ip_vs_control_add(n_cp, cp); 373 ip_vs_control_add(n_cp, cp);
374 }
497 } 375 }
498 376
499 /* 377 /*
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
new file mode 100644
index 000000000000..4680647cd450
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -0,0 +1,292 @@
1/*
2 * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
3 *
4 * Portions Copyright (C) 2001-2002
5 * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
6 *
7 * Portions Copyright (C) 2003-2010
8 * Julian Anastasov
9 *
10 *
11 * This code is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 *
25 *
26 * Authors:
27 * Ben North <ben@redfrontdoor.org>
28 * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels
29 * Hannes Eder <heder@google.com> Extend NFCT support for FTP, ipvs match
30 *
31 *
32 * Current status:
33 *
34 * - provide conntrack confirmation for new and related connections, by
35 * this way we can see their proper conntrack state in all hooks
36 * - support for all forwarding methods, not only NAT
37 * - FTP support (NAT), ability to support other NAT apps with expectations
38 * - to correctly create expectations for related NAT connections the proper
39 * NF conntrack support must be already installed, eg. ip_vs_ftp requires
40 * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables
41 * NAT rules are needed)
42 * - alter reply for NAT when forwarding packet in original direction:
43 * conntrack from client in NEW or RELATED (Passive FTP DATA) state or
44 * when RELATED conntrack is created from real server (Active FTP DATA)
45 * - if iptables_nat is not loaded the Passive FTP will not work (the
46 * PASV response can not be NAT-ed) but Active FTP should work
47 *
48 */
49
50#define KMSG_COMPONENT "IPVS"
51#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52
53#include <linux/module.h>
54#include <linux/types.h>
55#include <linux/kernel.h>
56#include <linux/errno.h>
57#include <linux/compiler.h>
58#include <linux/vmalloc.h>
59#include <linux/skbuff.h>
60#include <net/ip.h>
61#include <linux/netfilter.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ip_vs.h>
64#include <net/netfilter/nf_conntrack_core.h>
65#include <net/netfilter/nf_conntrack_expect.h>
66#include <net/netfilter/nf_conntrack_helper.h>
67#include <net/netfilter/nf_conntrack_zones.h>
68
69
70#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u"
71#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \
72 &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
73 (T)->dst.protonum
74
75#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
76#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \
77 &((C)->vaddr.ip), ntohs((C)->vport), \
78 &((C)->daddr.ip), ntohs((C)->dport), \
79 (C)->protocol, (C)->state
80
81void
82ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
83{
84 enum ip_conntrack_info ctinfo;
85 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
86 struct nf_conntrack_tuple new_tuple;
87
88 if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) ||
89 nf_ct_is_dying(ct))
90 return;
91
92 /* Never alter conntrack for non-NAT conns */
93 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
94 return;
95
96 /* Alter reply only in original direction */
97 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
98 return;
99
100 /*
101 * The connection is not yet in the hashtable, so we update it.
102 * CIP->VIP will remain the same, so leave the tuple in
103 * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the
104 * real-server we will see RIP->DIP.
105 */
106 new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
107 /*
108 * This will also take care of UDP and other protocols.
109 */
110 if (outin) {
111 new_tuple.src.u3 = cp->daddr;
112 if (new_tuple.dst.protonum != IPPROTO_ICMP &&
113 new_tuple.dst.protonum != IPPROTO_ICMPV6)
114 new_tuple.src.u.tcp.port = cp->dport;
115 } else {
116 new_tuple.dst.u3 = cp->vaddr;
117 if (new_tuple.dst.protonum != IPPROTO_ICMP &&
118 new_tuple.dst.protonum != IPPROTO_ICMPV6)
119 new_tuple.dst.u.tcp.port = cp->vport;
120 }
121 IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
122 "ctinfo=%d, old reply=" FMT_TUPLE
123 ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n",
124 __func__, ct, ct->status, ctinfo,
125 ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple),
126 ARG_TUPLE(&new_tuple), ARG_CONN(cp));
127 nf_conntrack_alter_reply(ct, &new_tuple);
128}
129
130int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
131{
132 return nf_conntrack_confirm(skb);
133}
134
135/*
136 * Called from init_conntrack() as expectfn handler.
137 */
138static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
139 struct nf_conntrack_expect *exp)
140{
141 struct nf_conntrack_tuple *orig, new_reply;
142 struct ip_vs_conn *cp;
143 struct ip_vs_conn_param p;
144
145 if (exp->tuple.src.l3num != PF_INET)
146 return;
147
148 /*
149 * We assume that no NF locks are held before this callback.
150 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
151 * expectations even if they use wildcard values, now we provide the
152 * actual values from the newly created original conntrack direction.
153 * The conntrack is confirmed when packet reaches IPVS hooks.
154 */
155
156 /* RS->CLIENT */
157 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
158 ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum,
159 &orig->src.u3, orig->src.u.tcp.port,
160 &orig->dst.u3, orig->dst.u.tcp.port, &p);
161 cp = ip_vs_conn_out_get(&p);
162 if (cp) {
163 /* Change reply CLIENT->RS to CLIENT->VS */
164 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
165 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
166 FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
167 __func__, ct, ct->status,
168 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
169 ARG_CONN(cp));
170 new_reply.dst.u3 = cp->vaddr;
171 new_reply.dst.u.tcp.port = cp->vport;
172 IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
173 ", inout cp=" FMT_CONN "\n",
174 __func__, ct,
175 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
176 ARG_CONN(cp));
177 goto alter;
178 }
179
180 /* CLIENT->VS */
181 cp = ip_vs_conn_in_get(&p);
182 if (cp) {
183 /* Change reply VS->CLIENT to RS->CLIENT */
184 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
185 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
186 FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
187 __func__, ct, ct->status,
188 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
189 ARG_CONN(cp));
190 new_reply.src.u3 = cp->daddr;
191 new_reply.src.u.tcp.port = cp->dport;
192 IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", "
193 FMT_TUPLE ", outin cp=" FMT_CONN "\n",
194 __func__, ct,
195 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
196 ARG_CONN(cp));
197 goto alter;
198 }
199
200 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE
201 " - unknown expect\n",
202 __func__, ct, ct->status, ARG_TUPLE(orig));
203 return;
204
205alter:
206 /* Never alter conntrack for non-NAT conns */
207 if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
208 nf_conntrack_alter_reply(ct, &new_reply);
209 ip_vs_conn_put(cp);
210 return;
211}
212
213/*
214 * Create NF conntrack expectation with wildcard (optional) source port.
215 * Then the default callback function will alter the reply and will confirm
216 * the conntrack entry when the first packet comes.
217 * Use port 0 to expect connection from any port.
218 */
219void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
220 struct ip_vs_conn *cp, u_int8_t proto,
221 const __be16 port, int from_rs)
222{
223 struct nf_conntrack_expect *exp;
224
225 if (ct == NULL || nf_ct_is_untracked(ct))
226 return;
227
228 exp = nf_ct_expect_alloc(ct);
229 if (!exp)
230 return;
231
232 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
233 from_rs ? &cp->daddr : &cp->caddr,
234 from_rs ? &cp->caddr : &cp->vaddr,
235 proto, port ? &port : NULL,
236 from_rs ? &cp->cport : &cp->vport);
237
238 exp->expectfn = ip_vs_nfct_expect_callback;
239
240 IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
241 __func__, ct, ARG_TUPLE(&exp->tuple));
242 nf_ct_expect_related(exp);
243 nf_ct_expect_put(exp);
244}
245EXPORT_SYMBOL(ip_vs_nfct_expect_related);
246
247/*
248 * Our connection was terminated, try to drop the conntrack immediately
249 */
250void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
251{
252 struct nf_conntrack_tuple_hash *h;
253 struct nf_conn *ct;
254 struct nf_conntrack_tuple tuple;
255
256 if (!cp->cport)
257 return;
258
259 tuple = (struct nf_conntrack_tuple) {
260 .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
261 tuple.src.u3 = cp->caddr;
262 tuple.src.u.all = cp->cport;
263 tuple.src.l3num = cp->af;
264 tuple.dst.u3 = cp->vaddr;
265 tuple.dst.u.all = cp->vport;
266
267 IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
268 " for conn " FMT_CONN "\n",
269 __func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
270
271 h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
272 if (h) {
273 ct = nf_ct_tuplehash_to_ctrack(h);
274 /* Show what happens instead of calling nf_ct_kill() */
275 if (del_timer(&ct->timeout)) {
276 IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
277 FMT_TUPLE "\n",
278 __func__, ct, ARG_TUPLE(&tuple));
279 if (ct->timeout.function)
280 ct->timeout.function(ct->timeout.data);
281 } else {
282 IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
283 FMT_TUPLE "\n",
284 __func__, ct, ARG_TUPLE(&tuple));
285 }
286 nf_ct_put(ct);
287 } else {
288 IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
289 __func__, ARG_TUPLE(&tuple));
290 }
291}
292
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
new file mode 100644
index 000000000000..3414af70ee12
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -0,0 +1,147 @@
1#define KMSG_COMPONENT "IPVS"
2#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
3
4#include <linux/module.h>
5#include <linux/spinlock.h>
6#include <linux/interrupt.h>
7#include <asm/string.h>
8#include <linux/kmod.h>
9#include <linux/sysctl.h>
10
11#include <net/ip_vs.h>
12
13/* IPVS pe list */
14static LIST_HEAD(ip_vs_pe);
15
16/* lock for service table */
17static DEFINE_SPINLOCK(ip_vs_pe_lock);
18
19/* Bind a service with a pe */
20void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe)
21{
22 svc->pe = pe;
23}
24
25/* Unbind a service from its pe */
26void ip_vs_unbind_pe(struct ip_vs_service *svc)
27{
28 svc->pe = NULL;
29}
30
31/* Get pe in the pe list by name */
32static struct ip_vs_pe *
33ip_vs_pe_getbyname(const char *pe_name)
34{
35 struct ip_vs_pe *pe;
36
37 IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__,
38 pe_name);
39
40 spin_lock_bh(&ip_vs_pe_lock);
41
42 list_for_each_entry(pe, &ip_vs_pe, n_list) {
43 /* Test and get the modules atomically */
44 if (pe->module &&
45 !try_module_get(pe->module)) {
46 /* This pe is just deleted */
47 continue;
48 }
49 if (strcmp(pe_name, pe->name)==0) {
50 /* HIT */
51 spin_unlock_bh(&ip_vs_pe_lock);
52 return pe;
53 }
54 if (pe->module)
55 module_put(pe->module);
56 }
57
58 spin_unlock_bh(&ip_vs_pe_lock);
59 return NULL;
60}
61
62/* Lookup pe and try to load it if it doesn't exist */
63struct ip_vs_pe *ip_vs_pe_get(const char *name)
64{
65 struct ip_vs_pe *pe;
66
67 /* Search for the pe by name */
68 pe = ip_vs_pe_getbyname(name);
69
70 /* If pe not found, load the module and search again */
71 if (!pe) {
72 request_module("ip_vs_pe_%s", name);
73 pe = ip_vs_pe_getbyname(name);
74 }
75
76 return pe;
77}
78
79void ip_vs_pe_put(struct ip_vs_pe *pe)
80{
81 if (pe && pe->module)
82 module_put(pe->module);
83}
84
85/* Register a pe in the pe list */
86int register_ip_vs_pe(struct ip_vs_pe *pe)
87{
88 struct ip_vs_pe *tmp;
89
90 /* increase the module use count */
91 ip_vs_use_count_inc();
92
93 spin_lock_bh(&ip_vs_pe_lock);
94
95 if (!list_empty(&pe->n_list)) {
96 spin_unlock_bh(&ip_vs_pe_lock);
97 ip_vs_use_count_dec();
98 pr_err("%s(): [%s] pe already linked\n",
99 __func__, pe->name);
100 return -EINVAL;
101 }
102
103 /* Make sure that the pe with this name doesn't exist
104 * in the pe list.
105 */
106 list_for_each_entry(tmp, &ip_vs_pe, n_list) {
107 if (strcmp(tmp->name, pe->name) == 0) {
108 spin_unlock_bh(&ip_vs_pe_lock);
109 ip_vs_use_count_dec();
110 pr_err("%s(): [%s] pe already existed "
111 "in the system\n", __func__, pe->name);
112 return -EINVAL;
113 }
114 }
115 /* Add it into the d-linked pe list */
116 list_add(&pe->n_list, &ip_vs_pe);
117 spin_unlock_bh(&ip_vs_pe_lock);
118
119 pr_info("[%s] pe registered.\n", pe->name);
120
121 return 0;
122}
123EXPORT_SYMBOL_GPL(register_ip_vs_pe);
124
125/* Unregister a pe from the pe list */
126int unregister_ip_vs_pe(struct ip_vs_pe *pe)
127{
128 spin_lock_bh(&ip_vs_pe_lock);
129 if (list_empty(&pe->n_list)) {
130 spin_unlock_bh(&ip_vs_pe_lock);
131 pr_err("%s(): [%s] pe is not in the list. failed\n",
132 __func__, pe->name);
133 return -EINVAL;
134 }
135
136 /* Remove it from the d-linked pe list */
137 list_del(&pe->n_list);
138 spin_unlock_bh(&ip_vs_pe_lock);
139
140 /* decrease the module use count */
141 ip_vs_use_count_dec();
142
143 pr_info("[%s] pe unregistered.\n", pe->name);
144
145 return 0;
146}
147EXPORT_SYMBOL_GPL(unregister_ip_vs_pe);
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
new file mode 100644
index 000000000000..b8b4e9620f3e
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -0,0 +1,169 @@
1#define KMSG_COMPONENT "IPVS"
2#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
3
4#include <linux/module.h>
5#include <linux/kernel.h>
6
7#include <net/ip_vs.h>
8#include <net/netfilter/nf_conntrack.h>
9#include <linux/netfilter/nf_conntrack_sip.h>
10
11#ifdef CONFIG_IP_VS_DEBUG
12static const char *ip_vs_dbg_callid(char *buf, size_t buf_len,
13 const char *callid, size_t callid_len,
14 int *idx)
15{
16 size_t len = min(min(callid_len, (size_t)64), buf_len - *idx - 1);
17 memcpy(buf + *idx, callid, len);
18 buf[*idx+len] = '\0';
19 *idx += len + 1;
20 return buf + *idx - len;
21}
22
23#define IP_VS_DEBUG_CALLID(callid, len) \
24 ip_vs_dbg_callid(ip_vs_dbg_buf, sizeof(ip_vs_dbg_buf), \
25 callid, len, &ip_vs_dbg_idx)
26#endif
27
28static int get_callid(const char *dptr, unsigned int dataoff,
29 unsigned int datalen,
30 unsigned int *matchoff, unsigned int *matchlen)
31{
32 /* Find callid */
33 while (1) {
34 int ret = ct_sip_get_header(NULL, dptr, dataoff, datalen,
35 SIP_HDR_CALL_ID, matchoff,
36 matchlen);
37 if (ret > 0)
38 break;
39 if (!ret)
40 return 0;
41 dataoff += *matchoff;
42 }
43
44 /* Empty callid is useless */
45 if (!*matchlen)
46 return -EINVAL;
47
48 /* Too large is useless */
49 if (*matchlen > IP_VS_PEDATA_MAXLEN)
50 return -EINVAL;
51
52 /* SIP headers are always followed by a line terminator */
53 if (*matchoff + *matchlen == datalen)
54 return -EINVAL;
55
56 /* RFC 2543 allows lines to be terminated with CR, LF or CRLF,
57 * RFC 3261 allows only CRLF, we support both. */
58 if (*(dptr + *matchoff + *matchlen) != '\r' &&
59 *(dptr + *matchoff + *matchlen) != '\n')
60 return -EINVAL;
61
62 IP_VS_DBG_BUF(9, "SIP callid %s (%d bytes)\n",
63 IP_VS_DEBUG_CALLID(dptr + *matchoff, *matchlen),
64 *matchlen);
65 return 0;
66}
67
68static int
69ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
70{
71 struct ip_vs_iphdr iph;
72 unsigned int dataoff, datalen, matchoff, matchlen;
73 const char *dptr;
74
75 ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph);
76
77 /* Only useful with UDP */
78 if (iph.protocol != IPPROTO_UDP)
79 return -EINVAL;
80
81 /* No Data ? */
82 dataoff = iph.len + sizeof(struct udphdr);
83 if (dataoff >= skb->len)
84 return -EINVAL;
85
86 dptr = skb->data + dataoff;
87 datalen = skb->len - dataoff;
88
89 if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen))
90 return -EINVAL;
91
92 p->pe_data = kmalloc(matchlen, GFP_ATOMIC);
93 if (!p->pe_data)
94 return -ENOMEM;
95
96 /* N.B: pe_data is only set on success,
97 * this allows fallback to the default persistence logic on failure
98 */
99 memcpy(p->pe_data, dptr + matchoff, matchlen);
100 p->pe_data_len = matchlen;
101
102 return 0;
103}
104
105static bool ip_vs_sip_ct_match(const struct ip_vs_conn_param *p,
106 struct ip_vs_conn *ct)
107
108{
109 bool ret = 0;
110
111 if (ct->af == p->af &&
112 ip_vs_addr_equal(p->af, p->caddr, &ct->caddr) &&
113 /* protocol should only be IPPROTO_IP if
114 * d_addr is a fwmark */
115 ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
116 p->vaddr, &ct->vaddr) &&
117 ct->vport == p->vport &&
118 ct->flags & IP_VS_CONN_F_TEMPLATE &&
119 ct->protocol == p->protocol &&
120 ct->pe_data && ct->pe_data_len == p->pe_data_len &&
121 !memcmp(ct->pe_data, p->pe_data, p->pe_data_len))
122 ret = 1;
123
124 IP_VS_DBG_BUF(9, "SIP template match %s %s->%s:%d %s\n",
125 ip_vs_proto_name(p->protocol),
126 IP_VS_DEBUG_CALLID(p->pe_data, p->pe_data_len),
127 IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
128 ret ? "hit" : "not hit");
129
130 return ret;
131}
132
133static u32 ip_vs_sip_hashkey_raw(const struct ip_vs_conn_param *p,
134 u32 initval, bool inverse)
135{
136 return jhash(p->pe_data, p->pe_data_len, initval);
137}
138
139static int ip_vs_sip_show_pe_data(const struct ip_vs_conn *cp, char *buf)
140{
141 memcpy(buf, cp->pe_data, cp->pe_data_len);
142 return cp->pe_data_len;
143}
144
145static struct ip_vs_pe ip_vs_sip_pe =
146{
147 .name = "sip",
148 .refcnt = ATOMIC_INIT(0),
149 .module = THIS_MODULE,
150 .n_list = LIST_HEAD_INIT(ip_vs_sip_pe.n_list),
151 .fill_param = ip_vs_sip_fill_param,
152 .ct_match = ip_vs_sip_ct_match,
153 .hashkey_raw = ip_vs_sip_hashkey_raw,
154 .show_pe_data = ip_vs_sip_show_pe_data,
155};
156
157static int __init ip_vs_sip_init(void)
158{
159 return register_ip_vs_pe(&ip_vs_sip_pe);
160}
161
162static void __exit ip_vs_sip_cleanup(void)
163{
164 unregister_ip_vs_pe(&ip_vs_sip_pe);
165}
166
167module_init(ip_vs_sip_init);
168module_exit(ip_vs_sip_cleanup);
169MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 027f654799fe..c53998390877 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -172,8 +172,8 @@ ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
172 else if (ih->frag_off & htons(IP_OFFSET)) 172 else if (ih->frag_off & htons(IP_OFFSET))
173 sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr); 173 sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr);
174 else { 174 else {
175 __be16 _ports[2], *pptr 175 __be16 _ports[2], *pptr;
176; 176
177 pptr = skb_header_pointer(skb, offset + ih->ihl*4, 177 pptr = skb_header_pointer(skb, offset + ih->ihl*4,
178 sizeof(_ports), _ports); 178 sizeof(_ports), _ports);
179 if (pptr == NULL) 179 if (pptr == NULL)
@@ -223,13 +223,13 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
223 223
224 224
225void 225void
226ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, 226ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
227 const struct sk_buff *skb, 227 const struct sk_buff *skb,
228 int offset, 228 int offset,
229 const char *msg) 229 const char *msg)
230{ 230{
231#ifdef CONFIG_IP_VS_IPV6 231#ifdef CONFIG_IP_VS_IPV6
232 if (skb->protocol == htons(ETH_P_IPV6)) 232 if (af == AF_INET6)
233 ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg); 233 ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg);
234 else 234 else
235#endif 235#endif
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 1892dfc12fdd..3a0461117d3f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -40,6 +40,19 @@ struct isakmp_hdr {
40 40
41#define PORT_ISAKMP 500 41#define PORT_ISAKMP 500
42 42
43static void
44ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph,
45 int inverse, struct ip_vs_conn_param *p)
46{
47 if (likely(!inverse))
48 ip_vs_conn_fill_param(af, IPPROTO_UDP,
49 &iph->saddr, htons(PORT_ISAKMP),
50 &iph->daddr, htons(PORT_ISAKMP), p);
51 else
52 ip_vs_conn_fill_param(af, IPPROTO_UDP,
53 &iph->daddr, htons(PORT_ISAKMP),
54 &iph->saddr, htons(PORT_ISAKMP), p);
55}
43 56
44static struct ip_vs_conn * 57static struct ip_vs_conn *
45ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, 58ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
@@ -47,21 +60,10 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
47 int inverse) 60 int inverse)
48{ 61{
49 struct ip_vs_conn *cp; 62 struct ip_vs_conn *cp;
63 struct ip_vs_conn_param p;
50 64
51 if (likely(!inverse)) { 65 ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
52 cp = ip_vs_conn_in_get(af, IPPROTO_UDP, 66 cp = ip_vs_conn_in_get(&p);
53 &iph->saddr,
54 htons(PORT_ISAKMP),
55 &iph->daddr,
56 htons(PORT_ISAKMP));
57 } else {
58 cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
59 &iph->daddr,
60 htons(PORT_ISAKMP),
61 &iph->saddr,
62 htons(PORT_ISAKMP));
63 }
64
65 if (!cp) { 67 if (!cp) {
66 /* 68 /*
67 * We are not sure if the packet is from our 69 * We are not sure if the packet is from our
@@ -87,21 +89,10 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
87 int inverse) 89 int inverse)
88{ 90{
89 struct ip_vs_conn *cp; 91 struct ip_vs_conn *cp;
92 struct ip_vs_conn_param p;
90 93
91 if (likely(!inverse)) { 94 ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
92 cp = ip_vs_conn_out_get(af, IPPROTO_UDP, 95 cp = ip_vs_conn_out_get(&p);
93 &iph->saddr,
94 htons(PORT_ISAKMP),
95 &iph->daddr,
96 htons(PORT_ISAKMP));
97 } else {
98 cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
99 &iph->daddr,
100 htons(PORT_ISAKMP),
101 &iph->saddr,
102 htons(PORT_ISAKMP));
103 }
104
105 if (!cp) { 96 if (!cp) {
106 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " 97 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
107 "%s%s %s->%s\n", 98 "%s%s %s->%s\n",
@@ -126,54 +117,6 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
126 return 0; 117 return 0;
127} 118}
128 119
129
130static void
131ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb,
132 int offset, const char *msg)
133{
134 char buf[256];
135 struct iphdr _iph, *ih;
136
137 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
138 if (ih == NULL)
139 sprintf(buf, "TRUNCATED");
140 else
141 sprintf(buf, "%pI4->%pI4", &ih->saddr, &ih->daddr);
142
143 pr_debug("%s: %s %s\n", msg, pp->name, buf);
144}
145
146#ifdef CONFIG_IP_VS_IPV6
147static void
148ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb,
149 int offset, const char *msg)
150{
151 char buf[256];
152 struct ipv6hdr _iph, *ih;
153
154 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
155 if (ih == NULL)
156 sprintf(buf, "TRUNCATED");
157 else
158 sprintf(buf, "%pI6->%pI6", &ih->saddr, &ih->daddr);
159
160 pr_debug("%s: %s %s\n", msg, pp->name, buf);
161}
162#endif
163
164static void
165ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
166 int offset, const char *msg)
167{
168#ifdef CONFIG_IP_VS_IPV6
169 if (skb->protocol == htons(ETH_P_IPV6))
170 ah_esp_debug_packet_v6(pp, skb, offset, msg);
171 else
172#endif
173 ah_esp_debug_packet_v4(pp, skb, offset, msg);
174}
175
176
177static void ah_esp_init(struct ip_vs_protocol *pp) 120static void ah_esp_init(struct ip_vs_protocol *pp)
178{ 121{
179 /* nothing to do now */ 122 /* nothing to do now */
@@ -204,7 +147,7 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
204 .register_app = NULL, 147 .register_app = NULL,
205 .unregister_app = NULL, 148 .unregister_app = NULL,
206 .app_conn_bind = NULL, 149 .app_conn_bind = NULL,
207 .debug_packet = ah_esp_debug_packet, 150 .debug_packet = ip_vs_tcpudp_debug_packet,
208 .timeout_change = NULL, /* ISAKMP */ 151 .timeout_change = NULL, /* ISAKMP */
209 .set_state_timeout = NULL, 152 .set_state_timeout = NULL,
210}; 153};
@@ -228,7 +171,7 @@ struct ip_vs_protocol ip_vs_protocol_esp = {
228 .register_app = NULL, 171 .register_app = NULL,
229 .unregister_app = NULL, 172 .unregister_app = NULL,
230 .app_conn_bind = NULL, 173 .app_conn_bind = NULL,
231 .debug_packet = ah_esp_debug_packet, 174 .debug_packet = ip_vs_tcpudp_debug_packet,
232 .timeout_change = NULL, /* ISAKMP */ 175 .timeout_change = NULL, /* ISAKMP */
233}; 176};
234#endif 177#endif
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 2f982a4c4770..1ea96bcd342b 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -31,6 +31,8 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
31 if ((sch->type == SCTP_CID_INIT) && 31 if ((sch->type == SCTP_CID_INIT) &&
32 (svc = ip_vs_service_get(af, skb->mark, iph.protocol, 32 (svc = ip_vs_service_get(af, skb->mark, iph.protocol,
33 &iph.daddr, sh->dest))) { 33 &iph.daddr, sh->dest))) {
34 int ignored;
35
34 if (ip_vs_todrop()) { 36 if (ip_vs_todrop()) {
35 /* 37 /*
36 * It seems that we are very loaded. 38 * It seems that we are very loaded.
@@ -44,8 +46,8 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
44 * Let the virtual server select a real server for the 46 * Let the virtual server select a real server for the
45 * incoming connection, and create a connection entry. 47 * incoming connection, and create a connection entry.
46 */ 48 */
47 *cpp = ip_vs_schedule(svc, skb); 49 *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
48 if (!*cpp) { 50 if (!*cpp && !ignored) {
49 *verdict = ip_vs_leave(svc, skb, pp); 51 *verdict = ip_vs_leave(svc, skb, pp);
50 return 0; 52 return 0;
51 } 53 }
@@ -175,7 +177,7 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
175 177
176 if (val != cmp) { 178 if (val != cmp) {
177 /* CRC failure, dump it. */ 179 /* CRC failure, dump it. */
178 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 180 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
179 "Failed checksum for"); 181 "Failed checksum for");
180 return 0; 182 return 0;
181 } 183 }
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 282d24de8592..f6c5200e2146 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -43,9 +43,12 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
43 return 0; 43 return 0;
44 } 44 }
45 45
46 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
46 if (th->syn && 47 if (th->syn &&
47 (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, 48 (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
48 th->dest))) { 49 th->dest))) {
50 int ignored;
51
49 if (ip_vs_todrop()) { 52 if (ip_vs_todrop()) {
50 /* 53 /*
51 * It seems that we are very loaded. 54 * It seems that we are very loaded.
@@ -60,8 +63,8 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
60 * Let the virtual server select a real server for the 63 * Let the virtual server select a real server for the
61 * incoming connection, and create a connection entry. 64 * incoming connection, and create a connection entry.
62 */ 65 */
63 *cpp = ip_vs_schedule(svc, skb); 66 *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
64 if (!*cpp) { 67 if (!*cpp && !ignored) {
65 *verdict = ip_vs_leave(svc, skb, pp); 68 *verdict = ip_vs_leave(svc, skb, pp);
66 return 0; 69 return 0;
67 } 70 }
@@ -101,15 +104,15 @@ tcp_partial_csum_update(int af, struct tcphdr *tcph,
101#ifdef CONFIG_IP_VS_IPV6 104#ifdef CONFIG_IP_VS_IPV6
102 if (af == AF_INET6) 105 if (af == AF_INET6)
103 tcph->check = 106 tcph->check =
104 csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, 107 ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
105 ip_vs_check_diff2(oldlen, newlen, 108 ip_vs_check_diff2(oldlen, newlen,
106 ~csum_unfold(tcph->check)))); 109 csum_unfold(tcph->check))));
107 else 110 else
108#endif 111#endif
109 tcph->check = 112 tcph->check =
110 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, 113 ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
111 ip_vs_check_diff2(oldlen, newlen, 114 ip_vs_check_diff2(oldlen, newlen,
112 ~csum_unfold(tcph->check)))); 115 csum_unfold(tcph->check))));
113} 116}
114 117
115 118
@@ -120,6 +123,7 @@ tcp_snat_handler(struct sk_buff *skb,
120 struct tcphdr *tcph; 123 struct tcphdr *tcph;
121 unsigned int tcphoff; 124 unsigned int tcphoff;
122 int oldlen; 125 int oldlen;
126 int payload_csum = 0;
123 127
124#ifdef CONFIG_IP_VS_IPV6 128#ifdef CONFIG_IP_VS_IPV6
125 if (cp->af == AF_INET6) 129 if (cp->af == AF_INET6)
@@ -134,13 +138,20 @@ tcp_snat_handler(struct sk_buff *skb,
134 return 0; 138 return 0;
135 139
136 if (unlikely(cp->app != NULL)) { 140 if (unlikely(cp->app != NULL)) {
141 int ret;
142
137 /* Some checks before mangling */ 143 /* Some checks before mangling */
138 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) 144 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
139 return 0; 145 return 0;
140 146
141 /* Call application helper if needed */ 147 /* Call application helper if needed */
142 if (!ip_vs_app_pkt_out(cp, skb)) 148 if (!(ret = ip_vs_app_pkt_out(cp, skb)))
143 return 0; 149 return 0;
150 /* ret=2: csum update is needed after payload mangling */
151 if (ret == 1)
152 oldlen = skb->len - tcphoff;
153 else
154 payload_csum = 1;
144 } 155 }
145 156
146 tcph = (void *)skb_network_header(skb) + tcphoff; 157 tcph = (void *)skb_network_header(skb) + tcphoff;
@@ -151,12 +162,13 @@ tcp_snat_handler(struct sk_buff *skb,
151 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, 162 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
152 htons(oldlen), 163 htons(oldlen),
153 htons(skb->len - tcphoff)); 164 htons(skb->len - tcphoff));
154 } else if (!cp->app) { 165 } else if (!payload_csum) {
155 /* Only port and addr are changed, do fast csum update */ 166 /* Only port and addr are changed, do fast csum update */
156 tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, 167 tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
157 cp->dport, cp->vport); 168 cp->dport, cp->vport);
158 if (skb->ip_summed == CHECKSUM_COMPLETE) 169 if (skb->ip_summed == CHECKSUM_COMPLETE)
159 skb->ip_summed = CHECKSUM_NONE; 170 skb->ip_summed = (cp->app && pp->csum_check) ?
171 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
160 } else { 172 } else {
161 /* full checksum calculation */ 173 /* full checksum calculation */
162 tcph->check = 0; 174 tcph->check = 0;
@@ -174,6 +186,7 @@ tcp_snat_handler(struct sk_buff *skb,
174 skb->len - tcphoff, 186 skb->len - tcphoff,
175 cp->protocol, 187 cp->protocol,
176 skb->csum); 188 skb->csum);
189 skb->ip_summed = CHECKSUM_UNNECESSARY;
177 190
178 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", 191 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
179 pp->name, tcph->check, 192 pp->name, tcph->check,
@@ -190,6 +203,7 @@ tcp_dnat_handler(struct sk_buff *skb,
190 struct tcphdr *tcph; 203 struct tcphdr *tcph;
191 unsigned int tcphoff; 204 unsigned int tcphoff;
192 int oldlen; 205 int oldlen;
206 int payload_csum = 0;
193 207
194#ifdef CONFIG_IP_VS_IPV6 208#ifdef CONFIG_IP_VS_IPV6
195 if (cp->af == AF_INET6) 209 if (cp->af == AF_INET6)
@@ -204,6 +218,8 @@ tcp_dnat_handler(struct sk_buff *skb,
204 return 0; 218 return 0;
205 219
206 if (unlikely(cp->app != NULL)) { 220 if (unlikely(cp->app != NULL)) {
221 int ret;
222
207 /* Some checks before mangling */ 223 /* Some checks before mangling */
208 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) 224 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
209 return 0; 225 return 0;
@@ -212,8 +228,13 @@ tcp_dnat_handler(struct sk_buff *skb,
212 * Attempt ip_vs_app call. 228 * Attempt ip_vs_app call.
213 * It will fix ip_vs_conn and iph ack_seq stuff 229 * It will fix ip_vs_conn and iph ack_seq stuff
214 */ 230 */
215 if (!ip_vs_app_pkt_in(cp, skb)) 231 if (!(ret = ip_vs_app_pkt_in(cp, skb)))
216 return 0; 232 return 0;
233 /* ret=2: csum update is needed after payload mangling */
234 if (ret == 1)
235 oldlen = skb->len - tcphoff;
236 else
237 payload_csum = 1;
217 } 238 }
218 239
219 tcph = (void *)skb_network_header(skb) + tcphoff; 240 tcph = (void *)skb_network_header(skb) + tcphoff;
@@ -223,15 +244,16 @@ tcp_dnat_handler(struct sk_buff *skb,
223 * Adjust TCP checksums 244 * Adjust TCP checksums
224 */ 245 */
225 if (skb->ip_summed == CHECKSUM_PARTIAL) { 246 if (skb->ip_summed == CHECKSUM_PARTIAL) {
226 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, 247 tcp_partial_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
227 htons(oldlen), 248 htons(oldlen),
228 htons(skb->len - tcphoff)); 249 htons(skb->len - tcphoff));
229 } else if (!cp->app) { 250 } else if (!payload_csum) {
230 /* Only port and addr are changed, do fast csum update */ 251 /* Only port and addr are changed, do fast csum update */
231 tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, 252 tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
232 cp->vport, cp->dport); 253 cp->vport, cp->dport);
233 if (skb->ip_summed == CHECKSUM_COMPLETE) 254 if (skb->ip_summed == CHECKSUM_COMPLETE)
234 skb->ip_summed = CHECKSUM_NONE; 255 skb->ip_summed = (cp->app && pp->csum_check) ?
256 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
235 } else { 257 } else {
236 /* full checksum calculation */ 258 /* full checksum calculation */
237 tcph->check = 0; 259 tcph->check = 0;
@@ -278,7 +300,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
278 skb->len - tcphoff, 300 skb->len - tcphoff,
279 ipv6_hdr(skb)->nexthdr, 301 ipv6_hdr(skb)->nexthdr,
280 skb->csum)) { 302 skb->csum)) {
281 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 303 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
282 "Failed checksum for"); 304 "Failed checksum for");
283 return 0; 305 return 0;
284 } 306 }
@@ -289,7 +311,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
289 skb->len - tcphoff, 311 skb->len - tcphoff,
290 ip_hdr(skb)->protocol, 312 ip_hdr(skb)->protocol,
291 skb->csum)) { 313 skb->csum)) {
292 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 314 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
293 "Failed checksum for"); 315 "Failed checksum for");
294 return 0; 316 return 0;
295 } 317 }
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 8553231b5d41..9d106a06bb0a 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -46,6 +46,8 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
46 svc = ip_vs_service_get(af, skb->mark, iph.protocol, 46 svc = ip_vs_service_get(af, skb->mark, iph.protocol,
47 &iph.daddr, uh->dest); 47 &iph.daddr, uh->dest);
48 if (svc) { 48 if (svc) {
49 int ignored;
50
49 if (ip_vs_todrop()) { 51 if (ip_vs_todrop()) {
50 /* 52 /*
51 * It seems that we are very loaded. 53 * It seems that we are very loaded.
@@ -60,8 +62,8 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
60 * Let the virtual server select a real server for the 62 * Let the virtual server select a real server for the
61 * incoming connection, and create a connection entry. 63 * incoming connection, and create a connection entry.
62 */ 64 */
63 *cpp = ip_vs_schedule(svc, skb); 65 *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
64 if (!*cpp) { 66 if (!*cpp && !ignored) {
65 *verdict = ip_vs_leave(svc, skb, pp); 67 *verdict = ip_vs_leave(svc, skb, pp);
66 return 0; 68 return 0;
67 } 69 }
@@ -102,15 +104,15 @@ udp_partial_csum_update(int af, struct udphdr *uhdr,
102#ifdef CONFIG_IP_VS_IPV6 104#ifdef CONFIG_IP_VS_IPV6
103 if (af == AF_INET6) 105 if (af == AF_INET6)
104 uhdr->check = 106 uhdr->check =
105 csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, 107 ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
106 ip_vs_check_diff2(oldlen, newlen, 108 ip_vs_check_diff2(oldlen, newlen,
107 ~csum_unfold(uhdr->check)))); 109 csum_unfold(uhdr->check))));
108 else 110 else
109#endif 111#endif
110 uhdr->check = 112 uhdr->check =
111 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, 113 ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
112 ip_vs_check_diff2(oldlen, newlen, 114 ip_vs_check_diff2(oldlen, newlen,
113 ~csum_unfold(uhdr->check)))); 115 csum_unfold(uhdr->check))));
114} 116}
115 117
116 118
@@ -121,6 +123,7 @@ udp_snat_handler(struct sk_buff *skb,
121 struct udphdr *udph; 123 struct udphdr *udph;
122 unsigned int udphoff; 124 unsigned int udphoff;
123 int oldlen; 125 int oldlen;
126 int payload_csum = 0;
124 127
125#ifdef CONFIG_IP_VS_IPV6 128#ifdef CONFIG_IP_VS_IPV6
126 if (cp->af == AF_INET6) 129 if (cp->af == AF_INET6)
@@ -135,6 +138,8 @@ udp_snat_handler(struct sk_buff *skb,
135 return 0; 138 return 0;
136 139
137 if (unlikely(cp->app != NULL)) { 140 if (unlikely(cp->app != NULL)) {
141 int ret;
142
138 /* Some checks before mangling */ 143 /* Some checks before mangling */
139 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) 144 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
140 return 0; 145 return 0;
@@ -142,8 +147,13 @@ udp_snat_handler(struct sk_buff *skb,
142 /* 147 /*
143 * Call application helper if needed 148 * Call application helper if needed
144 */ 149 */
145 if (!ip_vs_app_pkt_out(cp, skb)) 150 if (!(ret = ip_vs_app_pkt_out(cp, skb)))
146 return 0; 151 return 0;
152 /* ret=2: csum update is needed after payload mangling */
153 if (ret == 1)
154 oldlen = skb->len - udphoff;
155 else
156 payload_csum = 1;
147 } 157 }
148 158
149 udph = (void *)skb_network_header(skb) + udphoff; 159 udph = (void *)skb_network_header(skb) + udphoff;
@@ -156,12 +166,13 @@ udp_snat_handler(struct sk_buff *skb,
156 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, 166 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
157 htons(oldlen), 167 htons(oldlen),
158 htons(skb->len - udphoff)); 168 htons(skb->len - udphoff));
159 } else if (!cp->app && (udph->check != 0)) { 169 } else if (!payload_csum && (udph->check != 0)) {
160 /* Only port and addr are changed, do fast csum update */ 170 /* Only port and addr are changed, do fast csum update */
161 udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, 171 udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
162 cp->dport, cp->vport); 172 cp->dport, cp->vport);
163 if (skb->ip_summed == CHECKSUM_COMPLETE) 173 if (skb->ip_summed == CHECKSUM_COMPLETE)
164 skb->ip_summed = CHECKSUM_NONE; 174 skb->ip_summed = (cp->app && pp->csum_check) ?
175 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
165 } else { 176 } else {
166 /* full checksum calculation */ 177 /* full checksum calculation */
167 udph->check = 0; 178 udph->check = 0;
@@ -181,6 +192,7 @@ udp_snat_handler(struct sk_buff *skb,
181 skb->csum); 192 skb->csum);
182 if (udph->check == 0) 193 if (udph->check == 0)
183 udph->check = CSUM_MANGLED_0; 194 udph->check = CSUM_MANGLED_0;
195 skb->ip_summed = CHECKSUM_UNNECESSARY;
184 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", 196 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
185 pp->name, udph->check, 197 pp->name, udph->check,
186 (char*)&(udph->check) - (char*)udph); 198 (char*)&(udph->check) - (char*)udph);
@@ -196,6 +208,7 @@ udp_dnat_handler(struct sk_buff *skb,
196 struct udphdr *udph; 208 struct udphdr *udph;
197 unsigned int udphoff; 209 unsigned int udphoff;
198 int oldlen; 210 int oldlen;
211 int payload_csum = 0;
199 212
200#ifdef CONFIG_IP_VS_IPV6 213#ifdef CONFIG_IP_VS_IPV6
201 if (cp->af == AF_INET6) 214 if (cp->af == AF_INET6)
@@ -210,6 +223,8 @@ udp_dnat_handler(struct sk_buff *skb,
210 return 0; 223 return 0;
211 224
212 if (unlikely(cp->app != NULL)) { 225 if (unlikely(cp->app != NULL)) {
226 int ret;
227
213 /* Some checks before mangling */ 228 /* Some checks before mangling */
214 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) 229 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
215 return 0; 230 return 0;
@@ -218,8 +233,13 @@ udp_dnat_handler(struct sk_buff *skb,
218 * Attempt ip_vs_app call. 233 * Attempt ip_vs_app call.
219 * It will fix ip_vs_conn 234 * It will fix ip_vs_conn
220 */ 235 */
221 if (!ip_vs_app_pkt_in(cp, skb)) 236 if (!(ret = ip_vs_app_pkt_in(cp, skb)))
222 return 0; 237 return 0;
238 /* ret=2: csum update is needed after payload mangling */
239 if (ret == 1)
240 oldlen = skb->len - udphoff;
241 else
242 payload_csum = 1;
223 } 243 }
224 244
225 udph = (void *)skb_network_header(skb) + udphoff; 245 udph = (void *)skb_network_header(skb) + udphoff;
@@ -229,15 +249,16 @@ udp_dnat_handler(struct sk_buff *skb,
229 * Adjust UDP checksums 249 * Adjust UDP checksums
230 */ 250 */
231 if (skb->ip_summed == CHECKSUM_PARTIAL) { 251 if (skb->ip_summed == CHECKSUM_PARTIAL) {
232 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, 252 udp_partial_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
233 htons(oldlen), 253 htons(oldlen),
234 htons(skb->len - udphoff)); 254 htons(skb->len - udphoff));
235 } else if (!cp->app && (udph->check != 0)) { 255 } else if (!payload_csum && (udph->check != 0)) {
236 /* Only port and addr are changed, do fast csum update */ 256 /* Only port and addr are changed, do fast csum update */
237 udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, 257 udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
238 cp->vport, cp->dport); 258 cp->vport, cp->dport);
239 if (skb->ip_summed == CHECKSUM_COMPLETE) 259 if (skb->ip_summed == CHECKSUM_COMPLETE)
240 skb->ip_summed = CHECKSUM_NONE; 260 skb->ip_summed = (cp->app && pp->csum_check) ?
261 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
241 } else { 262 } else {
242 /* full checksum calculation */ 263 /* full checksum calculation */
243 udph->check = 0; 264 udph->check = 0;
@@ -293,7 +314,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
293 skb->len - udphoff, 314 skb->len - udphoff,
294 ipv6_hdr(skb)->nexthdr, 315 ipv6_hdr(skb)->nexthdr,
295 skb->csum)) { 316 skb->csum)) {
296 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 317 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
297 "Failed checksum for"); 318 "Failed checksum for");
298 return 0; 319 return 0;
299 } 320 }
@@ -304,7 +325,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
304 skb->len - udphoff, 325 skb->len - udphoff,
305 ip_hdr(skb)->protocol, 326 ip_hdr(skb)->protocol,
306 skb->csum)) { 327 skb->csum)) {
307 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 328 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
308 "Failed checksum for"); 329 "Failed checksum for");
309 return 0; 330 return 0;
310 } 331 }
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index 727e45b66953..076ebe00435d 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -46,15 +46,6 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
46{ 46{
47 int ret; 47 int ret;
48 48
49 if (svc == NULL) {
50 pr_err("%s(): svc arg NULL\n", __func__);
51 return -EINVAL;
52 }
53 if (scheduler == NULL) {
54 pr_err("%s(): scheduler arg NULL\n", __func__);
55 return -EINVAL;
56 }
57
58 svc->scheduler = scheduler; 49 svc->scheduler = scheduler;
59 50
60 if (scheduler->init_service) { 51 if (scheduler->init_service) {
@@ -74,18 +65,10 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
74 */ 65 */
75int ip_vs_unbind_scheduler(struct ip_vs_service *svc) 66int ip_vs_unbind_scheduler(struct ip_vs_service *svc)
76{ 67{
77 struct ip_vs_scheduler *sched; 68 struct ip_vs_scheduler *sched = svc->scheduler;
78 69
79 if (svc == NULL) { 70 if (!sched)
80 pr_err("%s(): svc arg NULL\n", __func__); 71 return 0;
81 return -EINVAL;
82 }
83
84 sched = svc->scheduler;
85 if (sched == NULL) {
86 pr_err("%s(): svc isn't bound\n", __func__);
87 return -EINVAL;
88 }
89 72
90 if (sched->done_service) { 73 if (sched->done_service) {
91 if (sched->done_service(svc) != 0) { 74 if (sched->done_service(svc) != 0) {
@@ -159,7 +142,7 @@ struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name)
159 142
160void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler) 143void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
161{ 144{
162 if (scheduler->module) 145 if (scheduler && scheduler->module)
163 module_put(scheduler->module); 146 module_put(scheduler->module);
164} 147}
165 148
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 7ba06939829f..ab85aedea17e 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -288,6 +288,16 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
288 ip_vs_sync_conn(cp->control); 288 ip_vs_sync_conn(cp->control);
289} 289}
290 290
291static inline int
292ip_vs_conn_fill_param_sync(int af, int protocol,
293 const union nf_inet_addr *caddr, __be16 cport,
294 const union nf_inet_addr *vaddr, __be16 vport,
295 struct ip_vs_conn_param *p)
296{
297 /* XXX: Need to take into account persistence engine */
298 ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p);
299 return 0;
300}
291 301
292/* 302/*
293 * Process received multicast message and create the corresponding 303 * Process received multicast message and create the corresponding
@@ -301,6 +311,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
301 struct ip_vs_conn *cp; 311 struct ip_vs_conn *cp;
302 struct ip_vs_protocol *pp; 312 struct ip_vs_protocol *pp;
303 struct ip_vs_dest *dest; 313 struct ip_vs_dest *dest;
314 struct ip_vs_conn_param param;
304 char *p; 315 char *p;
305 int i; 316 int i;
306 317
@@ -370,18 +381,20 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
370 } 381 }
371 } 382 }
372 383
373 if (!(flags & IP_VS_CONN_F_TEMPLATE)) 384 {
374 cp = ip_vs_conn_in_get(AF_INET, s->protocol, 385 if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol,
375 (union nf_inet_addr *)&s->caddr, 386 (union nf_inet_addr *)&s->caddr,
376 s->cport, 387 s->cport,
377 (union nf_inet_addr *)&s->vaddr, 388 (union nf_inet_addr *)&s->vaddr,
378 s->vport); 389 s->vport, &param)) {
379 else 390 pr_err("ip_vs_conn_fill_param_sync failed");
380 cp = ip_vs_ct_in_get(AF_INET, s->protocol, 391 return;
381 (union nf_inet_addr *)&s->caddr, 392 }
382 s->cport, 393 if (!(flags & IP_VS_CONN_F_TEMPLATE))
383 (union nf_inet_addr *)&s->vaddr, 394 cp = ip_vs_conn_in_get(&param);
384 s->vport); 395 else
396 cp = ip_vs_ct_in_get(&param);
397 }
385 if (!cp) { 398 if (!cp) {
386 /* 399 /*
387 * Find the appropriate destination for the connection. 400 * Find the appropriate destination for the connection.
@@ -406,14 +419,9 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
406 else 419 else
407 flags &= ~IP_VS_CONN_F_INACTIVE; 420 flags &= ~IP_VS_CONN_F_INACTIVE;
408 } 421 }
409 cp = ip_vs_conn_new(AF_INET, s->protocol, 422 cp = ip_vs_conn_new(&param,
410 (union nf_inet_addr *)&s->caddr,
411 s->cport,
412 (union nf_inet_addr *)&s->vaddr,
413 s->vport,
414 (union nf_inet_addr *)&s->daddr, 423 (union nf_inet_addr *)&s->daddr,
415 s->dport, 424 s->dport, flags, dest);
416 flags, dest);
417 if (dest) 425 if (dest)
418 atomic_dec(&dest->refcnt); 426 atomic_dec(&dest->refcnt);
419 if (!cp) { 427 if (!cp) {
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 49df6bea6a2d..de04ea39cde8 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -11,6 +11,16 @@
11 * 11 *
12 * Changes: 12 * Changes:
13 * 13 *
14 * Description of forwarding methods:
15 * - all transmitters are called from LOCAL_IN (remote clients) and
16 * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD
17 * - not all connections have destination server, for example,
18 * connections in backup server when fwmark is used
19 * - bypass connections use daddr from packet
20 * LOCAL_OUT rules:
21 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
22 * - skb->pkt_type is not set yet
23 * - the only place where we can see skb->sk != NULL
14 */ 24 */
15 25
16#define KMSG_COMPONENT "IPVS" 26#define KMSG_COMPONENT "IPVS"
@@ -26,9 +36,9 @@
26#include <net/route.h> /* for ip_route_output */ 36#include <net/route.h> /* for ip_route_output */
27#include <net/ipv6.h> 37#include <net/ipv6.h>
28#include <net/ip6_route.h> 38#include <net/ip6_route.h>
39#include <net/addrconf.h>
29#include <linux/icmpv6.h> 40#include <linux/icmpv6.h>
30#include <linux/netfilter.h> 41#include <linux/netfilter.h>
31#include <net/netfilter/nf_conntrack.h>
32#include <linux/netfilter_ipv4.h> 42#include <linux/netfilter_ipv4.h>
33 43
34#include <net/ip_vs.h> 44#include <net/ip_vs.h>
@@ -38,26 +48,27 @@
38 * Destination cache to speed up outgoing route lookup 48 * Destination cache to speed up outgoing route lookup
39 */ 49 */
40static inline void 50static inline void
41__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst) 51__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
52 u32 dst_cookie)
42{ 53{
43 struct dst_entry *old_dst; 54 struct dst_entry *old_dst;
44 55
45 old_dst = dest->dst_cache; 56 old_dst = dest->dst_cache;
46 dest->dst_cache = dst; 57 dest->dst_cache = dst;
47 dest->dst_rtos = rtos; 58 dest->dst_rtos = rtos;
59 dest->dst_cookie = dst_cookie;
48 dst_release(old_dst); 60 dst_release(old_dst);
49} 61}
50 62
51static inline struct dst_entry * 63static inline struct dst_entry *
52__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie) 64__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
53{ 65{
54 struct dst_entry *dst = dest->dst_cache; 66 struct dst_entry *dst = dest->dst_cache;
55 67
56 if (!dst) 68 if (!dst)
57 return NULL; 69 return NULL;
58 if ((dst->obsolete 70 if ((dst->obsolete || rtos != dest->dst_rtos) &&
59 || (dest->af == AF_INET && rtos != dest->dst_rtos)) && 71 dst->ops->check(dst, dest->dst_cookie) == NULL) {
60 dst->ops->check(dst, cookie) == NULL) {
61 dest->dst_cache = NULL; 72 dest->dst_cache = NULL;
62 dst_release(dst); 73 dst_release(dst);
63 return NULL; 74 return NULL;
@@ -66,16 +77,24 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
66 return dst; 77 return dst;
67} 78}
68 79
80/*
81 * Get route to destination or remote server
82 * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
83 * &4=Allow redirect from remote daddr to local
84 */
69static struct rtable * 85static struct rtable *
70__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) 86__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
87 __be32 daddr, u32 rtos, int rt_mode)
71{ 88{
89 struct net *net = dev_net(skb_dst(skb)->dev);
72 struct rtable *rt; /* Route to the other host */ 90 struct rtable *rt; /* Route to the other host */
73 struct ip_vs_dest *dest = cp->dest; 91 struct rtable *ort; /* Original route */
92 int local;
74 93
75 if (dest) { 94 if (dest) {
76 spin_lock(&dest->dst_lock); 95 spin_lock(&dest->dst_lock);
77 if (!(rt = (struct rtable *) 96 if (!(rt = (struct rtable *)
78 __ip_vs_dst_check(dest, rtos, 0))) { 97 __ip_vs_dst_check(dest, rtos))) {
79 struct flowi fl = { 98 struct flowi fl = {
80 .oif = 0, 99 .oif = 0,
81 .nl_u = { 100 .nl_u = {
@@ -85,13 +104,13 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
85 .tos = rtos, } }, 104 .tos = rtos, } },
86 }; 105 };
87 106
88 if (ip_route_output_key(&init_net, &rt, &fl)) { 107 if (ip_route_output_key(net, &rt, &fl)) {
89 spin_unlock(&dest->dst_lock); 108 spin_unlock(&dest->dst_lock);
90 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", 109 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
91 &dest->addr.ip); 110 &dest->addr.ip);
92 return NULL; 111 return NULL;
93 } 112 }
94 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst)); 113 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
95 IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", 114 IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
96 &dest->addr.ip, 115 &dest->addr.ip,
97 atomic_read(&rt->dst.__refcnt), rtos); 116 atomic_read(&rt->dst.__refcnt), rtos);
@@ -102,78 +121,199 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
102 .oif = 0, 121 .oif = 0,
103 .nl_u = { 122 .nl_u = {
104 .ip4_u = { 123 .ip4_u = {
105 .daddr = cp->daddr.ip, 124 .daddr = daddr,
106 .saddr = 0, 125 .saddr = 0,
107 .tos = rtos, } }, 126 .tos = rtos, } },
108 }; 127 };
109 128
110 if (ip_route_output_key(&init_net, &rt, &fl)) { 129 if (ip_route_output_key(net, &rt, &fl)) {
111 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", 130 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
112 &cp->daddr.ip); 131 &daddr);
113 return NULL; 132 return NULL;
114 } 133 }
115 } 134 }
116 135
136 local = rt->rt_flags & RTCF_LOCAL;
137 if (!((local ? 1 : 2) & rt_mode)) {
138 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
139 (rt->rt_flags & RTCF_LOCAL) ?
140 "local":"non-local", &rt->rt_dst);
141 ip_rt_put(rt);
142 return NULL;
143 }
144 if (local && !(rt_mode & 4) && !((ort = skb_rtable(skb)) &&
145 ort->rt_flags & RTCF_LOCAL)) {
146 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
147 "requires NAT method, dest: %pI4\n",
148 &ip_hdr(skb)->daddr, &rt->rt_dst);
149 ip_rt_put(rt);
150 return NULL;
151 }
152 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
153 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
154 "to non-local address, dest: %pI4\n",
155 &ip_hdr(skb)->saddr, &rt->rt_dst);
156 ip_rt_put(rt);
157 return NULL;
158 }
159
117 return rt; 160 return rt;
118} 161}
119 162
163/* Reroute packet to local IPv4 stack after DNAT */
164static int
165__ip_vs_reroute_locally(struct sk_buff *skb)
166{
167 struct rtable *rt = skb_rtable(skb);
168 struct net_device *dev = rt->dst.dev;
169 struct net *net = dev_net(dev);
170 struct iphdr *iph = ip_hdr(skb);
171
172 if (rt->fl.iif) {
173 unsigned long orefdst = skb->_skb_refdst;
174
175 if (ip_route_input(skb, iph->daddr, iph->saddr,
176 iph->tos, skb->dev))
177 return 0;
178 refdst_drop(orefdst);
179 } else {
180 struct flowi fl = {
181 .oif = 0,
182 .nl_u = {
183 .ip4_u = {
184 .daddr = iph->daddr,
185 .saddr = iph->saddr,
186 .tos = RT_TOS(iph->tos),
187 }
188 },
189 .mark = skb->mark,
190 };
191 struct rtable *rt;
192
193 if (ip_route_output_key(net, &rt, &fl))
194 return 0;
195 if (!(rt->rt_flags & RTCF_LOCAL)) {
196 ip_rt_put(rt);
197 return 0;
198 }
199 /* Drop old route. */
200 skb_dst_drop(skb);
201 skb_dst_set(skb, &rt->dst);
202 }
203 return 1;
204}
205
120#ifdef CONFIG_IP_VS_IPV6 206#ifdef CONFIG_IP_VS_IPV6
207
208static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
209{
210 return rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK;
211}
212
213static struct dst_entry *
214__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
215 struct in6_addr *ret_saddr, int do_xfrm)
216{
217 struct dst_entry *dst;
218 struct flowi fl = {
219 .oif = 0,
220 .nl_u = {
221 .ip6_u = {
222 .daddr = *daddr,
223 },
224 },
225 };
226
227 dst = ip6_route_output(net, NULL, &fl);
228 if (dst->error)
229 goto out_err;
230 if (!ret_saddr)
231 return dst;
232 if (ipv6_addr_any(&fl.fl6_src) &&
233 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
234 &fl.fl6_dst, 0, &fl.fl6_src) < 0)
235 goto out_err;
236 if (do_xfrm && xfrm_lookup(net, &dst, &fl, NULL, 0) < 0)
237 goto out_err;
238 ipv6_addr_copy(ret_saddr, &fl.fl6_src);
239 return dst;
240
241out_err:
242 dst_release(dst);
243 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
244 return NULL;
245}
246
247/*
248 * Get route to destination or remote server
249 * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
250 * &4=Allow redirect from remote daddr to local
251 */
121static struct rt6_info * 252static struct rt6_info *
122__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) 253__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
254 struct in6_addr *daddr, struct in6_addr *ret_saddr,
255 int do_xfrm, int rt_mode)
123{ 256{
257 struct net *net = dev_net(skb_dst(skb)->dev);
124 struct rt6_info *rt; /* Route to the other host */ 258 struct rt6_info *rt; /* Route to the other host */
125 struct ip_vs_dest *dest = cp->dest; 259 struct rt6_info *ort; /* Original route */
260 struct dst_entry *dst;
261 int local;
126 262
127 if (dest) { 263 if (dest) {
128 spin_lock(&dest->dst_lock); 264 spin_lock(&dest->dst_lock);
129 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0); 265 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
130 if (!rt) { 266 if (!rt) {
131 struct flowi fl = { 267 u32 cookie;
132 .oif = 0,
133 .nl_u = {
134 .ip6_u = {
135 .daddr = dest->addr.in6,
136 .saddr = {
137 .s6_addr32 =
138 { 0, 0, 0, 0 },
139 },
140 },
141 },
142 };
143 268
144 rt = (struct rt6_info *)ip6_route_output(&init_net, 269 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
145 NULL, &fl); 270 &dest->dst_saddr,
146 if (!rt) { 271 do_xfrm);
272 if (!dst) {
147 spin_unlock(&dest->dst_lock); 273 spin_unlock(&dest->dst_lock);
148 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
149 &dest->addr.in6);
150 return NULL; 274 return NULL;
151 } 275 }
152 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst)); 276 rt = (struct rt6_info *) dst;
153 IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n", 277 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
154 &dest->addr.in6, 278 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
279 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
280 &dest->addr.in6, &dest->dst_saddr,
155 atomic_read(&rt->dst.__refcnt)); 281 atomic_read(&rt->dst.__refcnt));
156 } 282 }
283 if (ret_saddr)
284 ipv6_addr_copy(ret_saddr, &dest->dst_saddr);
157 spin_unlock(&dest->dst_lock); 285 spin_unlock(&dest->dst_lock);
158 } else { 286 } else {
159 struct flowi fl = { 287 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
160 .oif = 0, 288 if (!dst)
161 .nl_u = {
162 .ip6_u = {
163 .daddr = cp->daddr.in6,
164 .saddr = {
165 .s6_addr32 = { 0, 0, 0, 0 },
166 },
167 },
168 },
169 };
170
171 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
172 if (!rt) {
173 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
174 &cp->daddr.in6);
175 return NULL; 289 return NULL;
176 } 290 rt = (struct rt6_info *) dst;
291 }
292
293 local = __ip_vs_is_local_route6(rt);
294 if (!((local ? 1 : 2) & rt_mode)) {
295 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n",
296 local ? "local":"non-local", daddr);
297 dst_release(&rt->dst);
298 return NULL;
299 }
300 if (local && !(rt_mode & 4) &&
301 !((ort = (struct rt6_info *) skb_dst(skb)) &&
302 __ip_vs_is_local_route6(ort))) {
303 IP_VS_DBG_RL("Redirect from non-local address %pI6 to local "
304 "requires NAT method, dest: %pI6\n",
305 &ipv6_hdr(skb)->daddr, daddr);
306 dst_release(&rt->dst);
307 return NULL;
308 }
309 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
310 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
311 IPV6_ADDR_LOOPBACK)) {
312 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 "
313 "to non-local address, dest: %pI6\n",
314 &ipv6_hdr(skb)->saddr, daddr);
315 dst_release(&rt->dst);
316 return NULL;
177 } 317 }
178 318
179 return rt; 319 return rt;
@@ -194,12 +334,44 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
194 dst_release(old_dst); 334 dst_release(old_dst);
195} 335}
196 336
197#define IP_VS_XMIT(pf, skb, rt) \ 337#define IP_VS_XMIT_TUNNEL(skb, cp) \
338({ \
339 int __ret = NF_ACCEPT; \
340 \
341 (skb)->ipvs_property = 1; \
342 if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
343 __ret = ip_vs_confirm_conntrack(skb, cp); \
344 if (__ret == NF_ACCEPT) { \
345 nf_reset(skb); \
346 skb_forward_csum(skb); \
347 } \
348 __ret; \
349})
350
351#define IP_VS_XMIT_NAT(pf, skb, cp, local) \
352do { \
353 (skb)->ipvs_property = 1; \
354 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
355 ip_vs_notrack(skb); \
356 else \
357 ip_vs_update_conntrack(skb, cp, 1); \
358 if (local) \
359 return NF_ACCEPT; \
360 skb_forward_csum(skb); \
361 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
362 skb_dst(skb)->dev, dst_output); \
363} while (0)
364
365#define IP_VS_XMIT(pf, skb, cp, local) \
198do { \ 366do { \
199 (skb)->ipvs_property = 1; \ 367 (skb)->ipvs_property = 1; \
368 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
369 ip_vs_notrack(skb); \
370 if (local) \
371 return NF_ACCEPT; \
200 skb_forward_csum(skb); \ 372 skb_forward_csum(skb); \
201 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ 373 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
202 (rt)->dst.dev, dst_output); \ 374 skb_dst(skb)->dev, dst_output); \
203} while (0) 375} while (0)
204 376
205 377
@@ -211,7 +383,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
211 struct ip_vs_protocol *pp) 383 struct ip_vs_protocol *pp)
212{ 384{
213 /* we do not touch skb and do not need pskb ptr */ 385 /* we do not touch skb and do not need pskb ptr */
214 return NF_ACCEPT; 386 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
215} 387}
216 388
217 389
@@ -226,24 +398,13 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
226{ 398{
227 struct rtable *rt; /* Route to the other host */ 399 struct rtable *rt; /* Route to the other host */
228 struct iphdr *iph = ip_hdr(skb); 400 struct iphdr *iph = ip_hdr(skb);
229 u8 tos = iph->tos;
230 int mtu; 401 int mtu;
231 struct flowi fl = {
232 .oif = 0,
233 .nl_u = {
234 .ip4_u = {
235 .daddr = iph->daddr,
236 .saddr = 0,
237 .tos = RT_TOS(tos), } },
238 };
239 402
240 EnterFunction(10); 403 EnterFunction(10);
241 404
242 if (ip_route_output_key(&init_net, &rt, &fl)) { 405 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr,
243 IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n", 406 RT_TOS(iph->tos), 2)))
244 __func__, &iph->daddr);
245 goto tx_error_icmp; 407 goto tx_error_icmp;
246 }
247 408
248 /* MTU checking */ 409 /* MTU checking */
249 mtu = dst_mtu(&rt->dst); 410 mtu = dst_mtu(&rt->dst);
@@ -271,7 +432,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
271 /* Another hack: avoid icmp_send in ip_fragment */ 432 /* Another hack: avoid icmp_send in ip_fragment */
272 skb->local_df = 1; 433 skb->local_df = 1;
273 434
274 IP_VS_XMIT(NFPROTO_IPV4, skb, rt); 435 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
275 436
276 LeaveFunction(10); 437 LeaveFunction(10);
277 return NF_STOLEN; 438 return NF_STOLEN;
@@ -292,28 +453,22 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
292 struct rt6_info *rt; /* Route to the other host */ 453 struct rt6_info *rt; /* Route to the other host */
293 struct ipv6hdr *iph = ipv6_hdr(skb); 454 struct ipv6hdr *iph = ipv6_hdr(skb);
294 int mtu; 455 int mtu;
295 struct flowi fl = {
296 .oif = 0,
297 .nl_u = {
298 .ip6_u = {
299 .daddr = iph->daddr,
300 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
301 };
302 456
303 EnterFunction(10); 457 EnterFunction(10);
304 458
305 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); 459 if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, 2)))
306 if (!rt) {
307 IP_VS_DBG_RL("%s(): ip6_route_output error, dest: %pI6\n",
308 __func__, &iph->daddr);
309 goto tx_error_icmp; 460 goto tx_error_icmp;
310 }
311 461
312 /* MTU checking */ 462 /* MTU checking */
313 mtu = dst_mtu(&rt->dst); 463 mtu = dst_mtu(&rt->dst);
314 if (skb->len > mtu) { 464 if (skb->len > mtu) {
315 dst_release(&rt->dst); 465 if (!skb->dev) {
466 struct net *net = dev_net(skb_dst(skb)->dev);
467
468 skb->dev = net->loopback_dev;
469 }
316 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 470 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
471 dst_release(&rt->dst);
317 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 472 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
318 goto tx_error; 473 goto tx_error;
319 } 474 }
@@ -335,7 +490,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
335 /* Another hack: avoid icmp_send in ip_fragment */ 490 /* Another hack: avoid icmp_send in ip_fragment */
336 skb->local_df = 1; 491 skb->local_df = 1;
337 492
338 IP_VS_XMIT(NFPROTO_IPV6, skb, rt); 493 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
339 494
340 LeaveFunction(10); 495 LeaveFunction(10);
341 return NF_STOLEN; 496 return NF_STOLEN;
@@ -349,36 +504,6 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
349} 504}
350#endif 505#endif
351 506
352void
353ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
354{
355 struct nf_conn *ct = (struct nf_conn *)skb->nfct;
356 struct nf_conntrack_tuple new_tuple;
357
358 if (ct == NULL || nf_ct_is_untracked(ct) || nf_ct_is_confirmed(ct))
359 return;
360
361 /*
362 * The connection is not yet in the hashtable, so we update it.
363 * CIP->VIP will remain the same, so leave the tuple in
364 * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the
365 * real-server we will see RIP->DIP.
366 */
367 new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
368 if (outin)
369 new_tuple.src.u3 = cp->daddr;
370 else
371 new_tuple.dst.u3 = cp->vaddr;
372 /*
373 * This will also take care of UDP and other protocols.
374 */
375 if (outin)
376 new_tuple.src.u.tcp.port = cp->dport;
377 else
378 new_tuple.dst.u.tcp.port = cp->vport;
379 nf_conntrack_alter_reply(ct, &new_tuple);
380}
381
382/* 507/*
383 * NAT transmitter (only for outside-to-inside nat forwarding) 508 * NAT transmitter (only for outside-to-inside nat forwarding)
384 * Not used for related ICMP 509 * Not used for related ICMP
@@ -390,6 +515,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
390 struct rtable *rt; /* Route to the other host */ 515 struct rtable *rt; /* Route to the other host */
391 int mtu; 516 int mtu;
392 struct iphdr *iph = ip_hdr(skb); 517 struct iphdr *iph = ip_hdr(skb);
518 int local;
393 519
394 EnterFunction(10); 520 EnterFunction(10);
395 521
@@ -403,16 +529,42 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
403 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 529 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
404 } 530 }
405 531
406 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos)))) 532 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
533 RT_TOS(iph->tos), 1|2|4)))
407 goto tx_error_icmp; 534 goto tx_error_icmp;
535 local = rt->rt_flags & RTCF_LOCAL;
536 /*
537 * Avoid duplicate tuple in reply direction for NAT traffic
538 * to local address when connection is sync-ed
539 */
540#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
541 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
542 enum ip_conntrack_info ctinfo;
543 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
544
545 if (ct && !nf_ct_is_untracked(ct)) {
546 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
547 "ip_vs_nat_xmit(): "
548 "stopping DNAT to local address");
549 goto tx_error_put;
550 }
551 }
552#endif
553
554 /* From world but DNAT to loopback address? */
555 if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
556 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
557 "stopping DNAT to loopback address");
558 goto tx_error_put;
559 }
408 560
409 /* MTU checking */ 561 /* MTU checking */
410 mtu = dst_mtu(&rt->dst); 562 mtu = dst_mtu(&rt->dst);
411 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { 563 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
412 ip_rt_put(rt);
413 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 564 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
414 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); 565 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
415 goto tx_error; 566 "ip_vs_nat_xmit(): frag needed for");
567 goto tx_error_put;
416 } 568 }
417 569
418 /* copy-on-write the packet before mangling it */ 570 /* copy-on-write the packet before mangling it */
@@ -422,19 +574,28 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
422 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 574 if (skb_cow(skb, rt->dst.dev->hard_header_len))
423 goto tx_error_put; 575 goto tx_error_put;
424 576
425 /* drop old route */
426 skb_dst_drop(skb);
427 skb_dst_set(skb, &rt->dst);
428
429 /* mangle the packet */ 577 /* mangle the packet */
430 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 578 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
431 goto tx_error; 579 goto tx_error_put;
432 ip_hdr(skb)->daddr = cp->daddr.ip; 580 ip_hdr(skb)->daddr = cp->daddr.ip;
433 ip_send_check(ip_hdr(skb)); 581 ip_send_check(ip_hdr(skb));
434 582
435 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 583 if (!local) {
584 /* drop old route */
585 skb_dst_drop(skb);
586 skb_dst_set(skb, &rt->dst);
587 } else {
588 ip_rt_put(rt);
589 /*
590 * Some IPv4 replies get local address from routes,
591 * not from iph, so while we DNAT after routing
592 * we need this second input/output route.
593 */
594 if (!__ip_vs_reroute_locally(skb))
595 goto tx_error;
596 }
436 597
437 ip_vs_update_conntrack(skb, cp, 1); 598 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
438 599
439 /* FIXME: when application helper enlarges the packet and the length 600 /* FIXME: when application helper enlarges the packet and the length
440 is larger than the MTU of outgoing device, there will be still 601 is larger than the MTU of outgoing device, there will be still
@@ -443,7 +604,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
443 /* Another hack: avoid icmp_send in ip_fragment */ 604 /* Another hack: avoid icmp_send in ip_fragment */
444 skb->local_df = 1; 605 skb->local_df = 1;
445 606
446 IP_VS_XMIT(NFPROTO_IPV4, skb, rt); 607 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
447 608
448 LeaveFunction(10); 609 LeaveFunction(10);
449 return NF_STOLEN; 610 return NF_STOLEN;
@@ -451,8 +612,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
451 tx_error_icmp: 612 tx_error_icmp:
452 dst_link_failure(skb); 613 dst_link_failure(skb);
453 tx_error: 614 tx_error:
454 LeaveFunction(10);
455 kfree_skb(skb); 615 kfree_skb(skb);
616 LeaveFunction(10);
456 return NF_STOLEN; 617 return NF_STOLEN;
457 tx_error_put: 618 tx_error_put:
458 ip_rt_put(rt); 619 ip_rt_put(rt);
@@ -466,6 +627,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
466{ 627{
467 struct rt6_info *rt; /* Route to the other host */ 628 struct rt6_info *rt; /* Route to the other host */
468 int mtu; 629 int mtu;
630 int local;
469 631
470 EnterFunction(10); 632 EnterFunction(10);
471 633
@@ -480,18 +642,49 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
480 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 642 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
481 } 643 }
482 644
483 rt = __ip_vs_get_out_rt_v6(cp); 645 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
484 if (!rt) 646 0, 1|2|4)))
485 goto tx_error_icmp; 647 goto tx_error_icmp;
648 local = __ip_vs_is_local_route6(rt);
649 /*
650 * Avoid duplicate tuple in reply direction for NAT traffic
651 * to local address when connection is sync-ed
652 */
653#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
654 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
655 enum ip_conntrack_info ctinfo;
656 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
657
658 if (ct && !nf_ct_is_untracked(ct)) {
659 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
660 "ip_vs_nat_xmit_v6(): "
661 "stopping DNAT to local address");
662 goto tx_error_put;
663 }
664 }
665#endif
666
667 /* From world but DNAT to loopback address? */
668 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
669 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
670 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
671 "ip_vs_nat_xmit_v6(): "
672 "stopping DNAT to loopback address");
673 goto tx_error_put;
674 }
486 675
487 /* MTU checking */ 676 /* MTU checking */
488 mtu = dst_mtu(&rt->dst); 677 mtu = dst_mtu(&rt->dst);
489 if (skb->len > mtu) { 678 if (skb->len > mtu) {
490 dst_release(&rt->dst); 679 if (!skb->dev) {
680 struct net *net = dev_net(skb_dst(skb)->dev);
681
682 skb->dev = net->loopback_dev;
683 }
491 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 684 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
492 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 685 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
493 "ip_vs_nat_xmit_v6(): frag needed for"); 686 "ip_vs_nat_xmit_v6(): frag needed for");
494 goto tx_error; 687 goto tx_error_put;
495 } 688 }
496 689
497 /* copy-on-write the packet before mangling it */ 690 /* copy-on-write the packet before mangling it */
@@ -501,18 +694,21 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
501 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 694 if (skb_cow(skb, rt->dst.dev->hard_header_len))
502 goto tx_error_put; 695 goto tx_error_put;
503 696
504 /* drop old route */
505 skb_dst_drop(skb);
506 skb_dst_set(skb, &rt->dst);
507
508 /* mangle the packet */ 697 /* mangle the packet */
509 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 698 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
510 goto tx_error; 699 goto tx_error;
511 ipv6_hdr(skb)->daddr = cp->daddr.in6; 700 ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &cp->daddr.in6);
512 701
513 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 702 if (!local || !skb->dev) {
703 /* drop the old route when skb is not shared */
704 skb_dst_drop(skb);
705 skb_dst_set(skb, &rt->dst);
706 } else {
707 /* destined to loopback, do we need to change route? */
708 dst_release(&rt->dst);
709 }
514 710
515 ip_vs_update_conntrack(skb, cp, 1); 711 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
516 712
517 /* FIXME: when application helper enlarges the packet and the length 713 /* FIXME: when application helper enlarges the packet and the length
518 is larger than the MTU of outgoing device, there will be still 714 is larger than the MTU of outgoing device, there will be still
@@ -521,7 +717,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
521 /* Another hack: avoid icmp_send in ip_fragment */ 717 /* Another hack: avoid icmp_send in ip_fragment */
522 skb->local_df = 1; 718 skb->local_df = 1;
523 719
524 IP_VS_XMIT(NFPROTO_IPV6, skb, rt); 720 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
525 721
526 LeaveFunction(10); 722 LeaveFunction(10);
527 return NF_STOLEN; 723 return NF_STOLEN;
@@ -567,30 +763,27 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
567 struct iphdr *old_iph = ip_hdr(skb); 763 struct iphdr *old_iph = ip_hdr(skb);
568 u8 tos = old_iph->tos; 764 u8 tos = old_iph->tos;
569 __be16 df = old_iph->frag_off; 765 __be16 df = old_iph->frag_off;
570 sk_buff_data_t old_transport_header = skb->transport_header;
571 struct iphdr *iph; /* Our new IP header */ 766 struct iphdr *iph; /* Our new IP header */
572 unsigned int max_headroom; /* The extra header space needed */ 767 unsigned int max_headroom; /* The extra header space needed */
573 int mtu; 768 int mtu;
769 int ret;
574 770
575 EnterFunction(10); 771 EnterFunction(10);
576 772
577 if (skb->protocol != htons(ETH_P_IP)) { 773 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
578 IP_VS_DBG_RL("%s(): protocol error, " 774 RT_TOS(tos), 1|2)))
579 "ETH_P_IP: %d, skb protocol: %d\n",
580 __func__, htons(ETH_P_IP), skb->protocol);
581 goto tx_error;
582 }
583
584 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
585 goto tx_error_icmp; 775 goto tx_error_icmp;
776 if (rt->rt_flags & RTCF_LOCAL) {
777 ip_rt_put(rt);
778 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
779 }
586 780
587 tdev = rt->dst.dev; 781 tdev = rt->dst.dev;
588 782
589 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); 783 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
590 if (mtu < 68) { 784 if (mtu < 68) {
591 ip_rt_put(rt);
592 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); 785 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
593 goto tx_error; 786 goto tx_error_put;
594 } 787 }
595 if (skb_dst(skb)) 788 if (skb_dst(skb))
596 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 789 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
@@ -600,9 +793,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
600 if ((old_iph->frag_off & htons(IP_DF)) 793 if ((old_iph->frag_off & htons(IP_DF))
601 && mtu < ntohs(old_iph->tot_len)) { 794 && mtu < ntohs(old_iph->tot_len)) {
602 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 795 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
603 ip_rt_put(rt);
604 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 796 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
605 goto tx_error; 797 goto tx_error_put;
606 } 798 }
607 799
608 /* 800 /*
@@ -625,7 +817,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
625 old_iph = ip_hdr(skb); 817 old_iph = ip_hdr(skb);
626 } 818 }
627 819
628 skb->transport_header = old_transport_header; 820 skb->transport_header = skb->network_header;
629 821
630 /* fix old IP header checksum */ 822 /* fix old IP header checksum */
631 ip_send_check(old_iph); 823 ip_send_check(old_iph);
@@ -655,7 +847,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
655 /* Another hack: avoid icmp_send in ip_fragment */ 847 /* Another hack: avoid icmp_send in ip_fragment */
656 skb->local_df = 1; 848 skb->local_df = 1;
657 849
658 ip_local_out(skb); 850 ret = IP_VS_XMIT_TUNNEL(skb, cp);
851 if (ret == NF_ACCEPT)
852 ip_local_out(skb);
853 else if (ret == NF_DROP)
854 kfree_skb(skb);
659 855
660 LeaveFunction(10); 856 LeaveFunction(10);
661 857
@@ -667,6 +863,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
667 kfree_skb(skb); 863 kfree_skb(skb);
668 LeaveFunction(10); 864 LeaveFunction(10);
669 return NF_STOLEN; 865 return NF_STOLEN;
866tx_error_put:
867 ip_rt_put(rt);
868 goto tx_error;
670} 869}
671 870
672#ifdef CONFIG_IP_VS_IPV6 871#ifdef CONFIG_IP_VS_IPV6
@@ -675,43 +874,44 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
675 struct ip_vs_protocol *pp) 874 struct ip_vs_protocol *pp)
676{ 875{
677 struct rt6_info *rt; /* Route to the other host */ 876 struct rt6_info *rt; /* Route to the other host */
877 struct in6_addr saddr; /* Source for tunnel */
678 struct net_device *tdev; /* Device to other host */ 878 struct net_device *tdev; /* Device to other host */
679 struct ipv6hdr *old_iph = ipv6_hdr(skb); 879 struct ipv6hdr *old_iph = ipv6_hdr(skb);
680 sk_buff_data_t old_transport_header = skb->transport_header;
681 struct ipv6hdr *iph; /* Our new IP header */ 880 struct ipv6hdr *iph; /* Our new IP header */
682 unsigned int max_headroom; /* The extra header space needed */ 881 unsigned int max_headroom; /* The extra header space needed */
683 int mtu; 882 int mtu;
883 int ret;
684 884
685 EnterFunction(10); 885 EnterFunction(10);
686 886
687 if (skb->protocol != htons(ETH_P_IPV6)) { 887 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
688 IP_VS_DBG_RL("%s(): protocol error, " 888 &saddr, 1, 1|2)))
689 "ETH_P_IPV6: %d, skb protocol: %d\n",
690 __func__, htons(ETH_P_IPV6), skb->protocol);
691 goto tx_error;
692 }
693
694 rt = __ip_vs_get_out_rt_v6(cp);
695 if (!rt)
696 goto tx_error_icmp; 889 goto tx_error_icmp;
890 if (__ip_vs_is_local_route6(rt)) {
891 dst_release(&rt->dst);
892 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
893 }
697 894
698 tdev = rt->dst.dev; 895 tdev = rt->dst.dev;
699 896
700 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); 897 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
701 /* TODO IPv6: do we need this check in IPv6? */ 898 if (mtu < IPV6_MIN_MTU) {
702 if (mtu < 1280) { 899 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
703 dst_release(&rt->dst); 900 IPV6_MIN_MTU);
704 IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__); 901 goto tx_error_put;
705 goto tx_error;
706 } 902 }
707 if (skb_dst(skb)) 903 if (skb_dst(skb))
708 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 904 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
709 905
710 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { 906 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
907 if (!skb->dev) {
908 struct net *net = dev_net(skb_dst(skb)->dev);
909
910 skb->dev = net->loopback_dev;
911 }
711 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 912 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
712 dst_release(&rt->dst);
713 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 913 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
714 goto tx_error; 914 goto tx_error_put;
715 } 915 }
716 916
717 /* 917 /*
@@ -734,7 +934,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
734 old_iph = ipv6_hdr(skb); 934 old_iph = ipv6_hdr(skb);
735 } 935 }
736 936
737 skb->transport_header = old_transport_header; 937 skb->transport_header = skb->network_header;
738 938
739 skb_push(skb, sizeof(struct ipv6hdr)); 939 skb_push(skb, sizeof(struct ipv6hdr));
740 skb_reset_network_header(skb); 940 skb_reset_network_header(skb);
@@ -754,14 +954,18 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
754 be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); 954 be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
755 iph->priority = old_iph->priority; 955 iph->priority = old_iph->priority;
756 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); 956 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
757 iph->daddr = rt->rt6i_dst.addr; 957 ipv6_addr_copy(&iph->daddr, &cp->daddr.in6);
758 iph->saddr = cp->vaddr.in6; /* rt->rt6i_src.addr; */ 958 ipv6_addr_copy(&iph->saddr, &saddr);
759 iph->hop_limit = old_iph->hop_limit; 959 iph->hop_limit = old_iph->hop_limit;
760 960
761 /* Another hack: avoid icmp_send in ip_fragment */ 961 /* Another hack: avoid icmp_send in ip_fragment */
762 skb->local_df = 1; 962 skb->local_df = 1;
763 963
764 ip6_local_out(skb); 964 ret = IP_VS_XMIT_TUNNEL(skb, cp);
965 if (ret == NF_ACCEPT)
966 ip6_local_out(skb);
967 else if (ret == NF_DROP)
968 kfree_skb(skb);
765 969
766 LeaveFunction(10); 970 LeaveFunction(10);
767 971
@@ -773,6 +977,9 @@ tx_error:
773 kfree_skb(skb); 977 kfree_skb(skb);
774 LeaveFunction(10); 978 LeaveFunction(10);
775 return NF_STOLEN; 979 return NF_STOLEN;
980tx_error_put:
981 dst_release(&rt->dst);
982 goto tx_error;
776} 983}
777#endif 984#endif
778 985
@@ -791,8 +998,13 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
791 998
792 EnterFunction(10); 999 EnterFunction(10);
793 1000
794 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos)))) 1001 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1002 RT_TOS(iph->tos), 1|2)))
795 goto tx_error_icmp; 1003 goto tx_error_icmp;
1004 if (rt->rt_flags & RTCF_LOCAL) {
1005 ip_rt_put(rt);
1006 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
1007 }
796 1008
797 /* MTU checking */ 1009 /* MTU checking */
798 mtu = dst_mtu(&rt->dst); 1010 mtu = dst_mtu(&rt->dst);
@@ -820,7 +1032,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
820 /* Another hack: avoid icmp_send in ip_fragment */ 1032 /* Another hack: avoid icmp_send in ip_fragment */
821 skb->local_df = 1; 1033 skb->local_df = 1;
822 1034
823 IP_VS_XMIT(NFPROTO_IPV4, skb, rt); 1035 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
824 1036
825 LeaveFunction(10); 1037 LeaveFunction(10);
826 return NF_STOLEN; 1038 return NF_STOLEN;
@@ -843,13 +1055,22 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
843 1055
844 EnterFunction(10); 1056 EnterFunction(10);
845 1057
846 rt = __ip_vs_get_out_rt_v6(cp); 1058 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
847 if (!rt) 1059 0, 1|2)))
848 goto tx_error_icmp; 1060 goto tx_error_icmp;
1061 if (__ip_vs_is_local_route6(rt)) {
1062 dst_release(&rt->dst);
1063 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
1064 }
849 1065
850 /* MTU checking */ 1066 /* MTU checking */
851 mtu = dst_mtu(&rt->dst); 1067 mtu = dst_mtu(&rt->dst);
852 if (skb->len > mtu) { 1068 if (skb->len > mtu) {
1069 if (!skb->dev) {
1070 struct net *net = dev_net(skb_dst(skb)->dev);
1071
1072 skb->dev = net->loopback_dev;
1073 }
853 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1074 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
854 dst_release(&rt->dst); 1075 dst_release(&rt->dst);
855 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1076 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -873,7 +1094,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
873 /* Another hack: avoid icmp_send in ip_fragment */ 1094 /* Another hack: avoid icmp_send in ip_fragment */
874 skb->local_df = 1; 1095 skb->local_df = 1;
875 1096
876 IP_VS_XMIT(NFPROTO_IPV6, skb, rt); 1097 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
877 1098
878 LeaveFunction(10); 1099 LeaveFunction(10);
879 return NF_STOLEN; 1100 return NF_STOLEN;
@@ -899,6 +1120,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
899 struct rtable *rt; /* Route to the other host */ 1120 struct rtable *rt; /* Route to the other host */
900 int mtu; 1121 int mtu;
901 int rc; 1122 int rc;
1123 int local;
902 1124
903 EnterFunction(10); 1125 EnterFunction(10);
904 1126
@@ -919,16 +1141,43 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
919 * mangle and send the packet here (only for VS/NAT) 1141 * mangle and send the packet here (only for VS/NAT)
920 */ 1142 */
921 1143
922 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos)))) 1144 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1145 RT_TOS(ip_hdr(skb)->tos), 1|2|4)))
923 goto tx_error_icmp; 1146 goto tx_error_icmp;
1147 local = rt->rt_flags & RTCF_LOCAL;
1148
1149 /*
1150 * Avoid duplicate tuple in reply direction for NAT traffic
1151 * to local address when connection is sync-ed
1152 */
1153#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1154 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1155 enum ip_conntrack_info ctinfo;
1156 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1157
1158 if (ct && !nf_ct_is_untracked(ct)) {
1159 IP_VS_DBG(10, "%s(): "
1160 "stopping DNAT to local address %pI4\n",
1161 __func__, &cp->daddr.ip);
1162 goto tx_error_put;
1163 }
1164 }
1165#endif
1166
1167 /* From world but DNAT to loopback address? */
1168 if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
1169 IP_VS_DBG(1, "%s(): "
1170 "stopping DNAT to loopback %pI4\n",
1171 __func__, &cp->daddr.ip);
1172 goto tx_error_put;
1173 }
924 1174
925 /* MTU checking */ 1175 /* MTU checking */
926 mtu = dst_mtu(&rt->dst); 1176 mtu = dst_mtu(&rt->dst);
927 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { 1177 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
928 ip_rt_put(rt);
929 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 1178 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
930 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1179 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
931 goto tx_error; 1180 goto tx_error_put;
932 } 1181 }
933 1182
934 /* copy-on-write the packet before mangling it */ 1183 /* copy-on-write the packet before mangling it */
@@ -938,16 +1187,27 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
938 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1187 if (skb_cow(skb, rt->dst.dev->hard_header_len))
939 goto tx_error_put; 1188 goto tx_error_put;
940 1189
941 /* drop the old route when skb is not shared */
942 skb_dst_drop(skb);
943 skb_dst_set(skb, &rt->dst);
944
945 ip_vs_nat_icmp(skb, pp, cp, 0); 1190 ip_vs_nat_icmp(skb, pp, cp, 0);
946 1191
1192 if (!local) {
1193 /* drop the old route when skb is not shared */
1194 skb_dst_drop(skb);
1195 skb_dst_set(skb, &rt->dst);
1196 } else {
1197 ip_rt_put(rt);
1198 /*
1199 * Some IPv4 replies get local address from routes,
1200 * not from iph, so while we DNAT after routing
1201 * we need this second input/output route.
1202 */
1203 if (!__ip_vs_reroute_locally(skb))
1204 goto tx_error;
1205 }
1206
947 /* Another hack: avoid icmp_send in ip_fragment */ 1207 /* Another hack: avoid icmp_send in ip_fragment */
948 skb->local_df = 1; 1208 skb->local_df = 1;
949 1209
950 IP_VS_XMIT(NFPROTO_IPV4, skb, rt); 1210 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
951 1211
952 rc = NF_STOLEN; 1212 rc = NF_STOLEN;
953 goto out; 1213 goto out;
@@ -973,6 +1233,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
973 struct rt6_info *rt; /* Route to the other host */ 1233 struct rt6_info *rt; /* Route to the other host */
974 int mtu; 1234 int mtu;
975 int rc; 1235 int rc;
1236 int local;
976 1237
977 EnterFunction(10); 1238 EnterFunction(10);
978 1239
@@ -993,17 +1254,49 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
993 * mangle and send the packet here (only for VS/NAT) 1254 * mangle and send the packet here (only for VS/NAT)
994 */ 1255 */
995 1256
996 rt = __ip_vs_get_out_rt_v6(cp); 1257 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
997 if (!rt) 1258 0, 1|2|4)))
998 goto tx_error_icmp; 1259 goto tx_error_icmp;
999 1260
1261 local = __ip_vs_is_local_route6(rt);
1262 /*
1263 * Avoid duplicate tuple in reply direction for NAT traffic
1264 * to local address when connection is sync-ed
1265 */
1266#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1267 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1268 enum ip_conntrack_info ctinfo;
1269 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1270
1271 if (ct && !nf_ct_is_untracked(ct)) {
1272 IP_VS_DBG(10, "%s(): "
1273 "stopping DNAT to local address %pI6\n",
1274 __func__, &cp->daddr.in6);
1275 goto tx_error_put;
1276 }
1277 }
1278#endif
1279
1280 /* From world but DNAT to loopback address? */
1281 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
1282 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
1283 IP_VS_DBG(1, "%s(): "
1284 "stopping DNAT to loopback %pI6\n",
1285 __func__, &cp->daddr.in6);
1286 goto tx_error_put;
1287 }
1288
1000 /* MTU checking */ 1289 /* MTU checking */
1001 mtu = dst_mtu(&rt->dst); 1290 mtu = dst_mtu(&rt->dst);
1002 if (skb->len > mtu) { 1291 if (skb->len > mtu) {
1003 dst_release(&rt->dst); 1292 if (!skb->dev) {
1293 struct net *net = dev_net(skb_dst(skb)->dev);
1294
1295 skb->dev = net->loopback_dev;
1296 }
1004 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1297 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1005 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1298 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1006 goto tx_error; 1299 goto tx_error_put;
1007 } 1300 }
1008 1301
1009 /* copy-on-write the packet before mangling it */ 1302 /* copy-on-write the packet before mangling it */
@@ -1013,16 +1306,21 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1013 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1306 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1014 goto tx_error_put; 1307 goto tx_error_put;
1015 1308
1016 /* drop the old route when skb is not shared */
1017 skb_dst_drop(skb);
1018 skb_dst_set(skb, &rt->dst);
1019
1020 ip_vs_nat_icmp_v6(skb, pp, cp, 0); 1309 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1021 1310
1311 if (!local || !skb->dev) {
1312 /* drop the old route when skb is not shared */
1313 skb_dst_drop(skb);
1314 skb_dst_set(skb, &rt->dst);
1315 } else {
1316 /* destined to loopback, do we need to change route? */
1317 dst_release(&rt->dst);
1318 }
1319
1022 /* Another hack: avoid icmp_send in ip_fragment */ 1320 /* Another hack: avoid icmp_send in ip_fragment */
1023 skb->local_df = 1; 1321 skb->local_df = 1;
1024 1322
1025 IP_VS_XMIT(NFPROTO_IPV6, skb, rt); 1323 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
1026 1324
1027 rc = NF_STOLEN; 1325 rc = NF_STOLEN;
1028 goto out; 1326 goto out;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index df3eedb142ff..1eacf8d9966a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -65,32 +65,42 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
65DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); 65DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
66EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); 66EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
67 67
68static int nf_conntrack_hash_rnd_initted; 68static unsigned int nf_conntrack_hash_rnd __read_mostly;
69static unsigned int nf_conntrack_hash_rnd;
70 69
71static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, 70static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
72 u16 zone, unsigned int size, unsigned int rnd)
73{ 71{
74 unsigned int n; 72 unsigned int n;
75 u_int32_t h;
76 73
77 /* The direction must be ignored, so we hash everything up to the 74 /* The direction must be ignored, so we hash everything up to the
78 * destination ports (which is a multiple of 4) and treat the last 75 * destination ports (which is a multiple of 4) and treat the last
79 * three bytes manually. 76 * three bytes manually.
80 */ 77 */
81 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); 78 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
82 h = jhash2((u32 *)tuple, n, 79 return jhash2((u32 *)tuple, n, zone ^ nf_conntrack_hash_rnd ^
83 zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) | 80 (((__force __u16)tuple->dst.u.all << 16) |
84 tuple->dst.protonum)); 81 tuple->dst.protonum));
82}
83
84static u32 __hash_bucket(u32 hash, unsigned int size)
85{
86 return ((u64)hash * size) >> 32;
87}
88
89static u32 hash_bucket(u32 hash, const struct net *net)
90{
91 return __hash_bucket(hash, net->ct.htable_size);
92}
85 93
86 return ((u64)h * size) >> 32; 94static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
95 u16 zone, unsigned int size)
96{
97 return __hash_bucket(hash_conntrack_raw(tuple, zone), size);
87} 98}
88 99
89static inline u_int32_t hash_conntrack(const struct net *net, u16 zone, 100static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
90 const struct nf_conntrack_tuple *tuple) 101 const struct nf_conntrack_tuple *tuple)
91{ 102{
92 return __hash_conntrack(tuple, zone, net->ct.htable_size, 103 return __hash_conntrack(tuple, zone, net->ct.htable_size);
93 nf_conntrack_hash_rnd);
94} 104}
95 105
96bool 106bool
@@ -292,20 +302,20 @@ static void death_by_timeout(unsigned long ul_conntrack)
292 * OR 302 * OR
293 * - Caller must lock nf_conntrack_lock before calling this function 303 * - Caller must lock nf_conntrack_lock before calling this function
294 */ 304 */
295struct nf_conntrack_tuple_hash * 305static struct nf_conntrack_tuple_hash *
296__nf_conntrack_find(struct net *net, u16 zone, 306____nf_conntrack_find(struct net *net, u16 zone,
297 const struct nf_conntrack_tuple *tuple) 307 const struct nf_conntrack_tuple *tuple, u32 hash)
298{ 308{
299 struct nf_conntrack_tuple_hash *h; 309 struct nf_conntrack_tuple_hash *h;
300 struct hlist_nulls_node *n; 310 struct hlist_nulls_node *n;
301 unsigned int hash = hash_conntrack(net, zone, tuple); 311 unsigned int bucket = hash_bucket(hash, net);
302 312
303 /* Disable BHs the entire time since we normally need to disable them 313 /* Disable BHs the entire time since we normally need to disable them
304 * at least once for the stats anyway. 314 * at least once for the stats anyway.
305 */ 315 */
306 local_bh_disable(); 316 local_bh_disable();
307begin: 317begin:
308 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { 318 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) {
309 if (nf_ct_tuple_equal(tuple, &h->tuple) && 319 if (nf_ct_tuple_equal(tuple, &h->tuple) &&
310 nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) { 320 nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) {
311 NF_CT_STAT_INC(net, found); 321 NF_CT_STAT_INC(net, found);
@@ -319,7 +329,7 @@ begin:
319 * not the expected one, we must restart lookup. 329 * not the expected one, we must restart lookup.
320 * We probably met an item that was moved to another chain. 330 * We probably met an item that was moved to another chain.
321 */ 331 */
322 if (get_nulls_value(n) != hash) { 332 if (get_nulls_value(n) != bucket) {
323 NF_CT_STAT_INC(net, search_restart); 333 NF_CT_STAT_INC(net, search_restart);
324 goto begin; 334 goto begin;
325 } 335 }
@@ -327,19 +337,27 @@ begin:
327 337
328 return NULL; 338 return NULL;
329} 339}
340
341struct nf_conntrack_tuple_hash *
342__nf_conntrack_find(struct net *net, u16 zone,
343 const struct nf_conntrack_tuple *tuple)
344{
345 return ____nf_conntrack_find(net, zone, tuple,
346 hash_conntrack_raw(tuple, zone));
347}
330EXPORT_SYMBOL_GPL(__nf_conntrack_find); 348EXPORT_SYMBOL_GPL(__nf_conntrack_find);
331 349
332/* Find a connection corresponding to a tuple. */ 350/* Find a connection corresponding to a tuple. */
333struct nf_conntrack_tuple_hash * 351static struct nf_conntrack_tuple_hash *
334nf_conntrack_find_get(struct net *net, u16 zone, 352__nf_conntrack_find_get(struct net *net, u16 zone,
335 const struct nf_conntrack_tuple *tuple) 353 const struct nf_conntrack_tuple *tuple, u32 hash)
336{ 354{
337 struct nf_conntrack_tuple_hash *h; 355 struct nf_conntrack_tuple_hash *h;
338 struct nf_conn *ct; 356 struct nf_conn *ct;
339 357
340 rcu_read_lock(); 358 rcu_read_lock();
341begin: 359begin:
342 h = __nf_conntrack_find(net, zone, tuple); 360 h = ____nf_conntrack_find(net, zone, tuple, hash);
343 if (h) { 361 if (h) {
344 ct = nf_ct_tuplehash_to_ctrack(h); 362 ct = nf_ct_tuplehash_to_ctrack(h);
345 if (unlikely(nf_ct_is_dying(ct) || 363 if (unlikely(nf_ct_is_dying(ct) ||
@@ -357,6 +375,14 @@ begin:
357 375
358 return h; 376 return h;
359} 377}
378
379struct nf_conntrack_tuple_hash *
380nf_conntrack_find_get(struct net *net, u16 zone,
381 const struct nf_conntrack_tuple *tuple)
382{
383 return __nf_conntrack_find_get(net, zone, tuple,
384 hash_conntrack_raw(tuple, zone));
385}
360EXPORT_SYMBOL_GPL(nf_conntrack_find_get); 386EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
361 387
362static void __nf_conntrack_hash_insert(struct nf_conn *ct, 388static void __nf_conntrack_hash_insert(struct nf_conn *ct,
@@ -409,8 +435,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
409 return NF_ACCEPT; 435 return NF_ACCEPT;
410 436
411 zone = nf_ct_zone(ct); 437 zone = nf_ct_zone(ct);
412 hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 438 /* reuse the hash saved before */
413 repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 439 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
440 hash = hash_bucket(hash, net);
441 repl_hash = hash_conntrack(net, zone,
442 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
414 443
415 /* We're not in hash table, and we refuse to set up related 444 /* We're not in hash table, and we refuse to set up related
416 connections for unconfirmed conns. But packet copies and 445 connections for unconfirmed conns. But packet copies and
@@ -567,17 +596,29 @@ static noinline int early_drop(struct net *net, unsigned int hash)
567 return dropped; 596 return dropped;
568} 597}
569 598
570struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, 599static struct nf_conn *
571 const struct nf_conntrack_tuple *orig, 600__nf_conntrack_alloc(struct net *net, u16 zone,
572 const struct nf_conntrack_tuple *repl, 601 const struct nf_conntrack_tuple *orig,
573 gfp_t gfp) 602 const struct nf_conntrack_tuple *repl,
603 gfp_t gfp, u32 hash)
574{ 604{
575 struct nf_conn *ct; 605 struct nf_conn *ct;
576 606
577 if (unlikely(!nf_conntrack_hash_rnd_initted)) { 607 if (unlikely(!nf_conntrack_hash_rnd)) {
578 get_random_bytes(&nf_conntrack_hash_rnd, 608 unsigned int rand;
579 sizeof(nf_conntrack_hash_rnd)); 609
580 nf_conntrack_hash_rnd_initted = 1; 610 /*
611 * Why not initialize nf_conntrack_rnd in a "init()" function ?
612 * Because there isn't enough entropy when system initializing,
613 * and we initialize it as late as possible.
614 */
615 do {
616 get_random_bytes(&rand, sizeof(rand));
617 } while (!rand);
618 cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
619
620 /* recompute the hash as nf_conntrack_hash_rnd is initialized */
621 hash = hash_conntrack_raw(orig, zone);
581 } 622 }
582 623
583 /* We don't want any race condition at early drop stage */ 624 /* We don't want any race condition at early drop stage */
@@ -585,8 +626,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
585 626
586 if (nf_conntrack_max && 627 if (nf_conntrack_max &&
587 unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { 628 unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
588 unsigned int hash = hash_conntrack(net, zone, orig); 629 if (!early_drop(net, hash_bucket(hash, net))) {
589 if (!early_drop(net, hash)) {
590 atomic_dec(&net->ct.count); 630 atomic_dec(&net->ct.count);
591 if (net_ratelimit()) 631 if (net_ratelimit())
592 printk(KERN_WARNING 632 printk(KERN_WARNING
@@ -616,7 +656,8 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
616 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; 656 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
617 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; 657 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
618 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; 658 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
619 ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL; 659 /* save hash for reusing when confirming */
660 *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
620 /* Don't set timer yet: wait for confirmation */ 661 /* Don't set timer yet: wait for confirmation */
621 setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); 662 setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
622 write_pnet(&ct->ct_net, net); 663 write_pnet(&ct->ct_net, net);
@@ -643,6 +684,14 @@ out_free:
643 return ERR_PTR(-ENOMEM); 684 return ERR_PTR(-ENOMEM);
644#endif 685#endif
645} 686}
687
688struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
689 const struct nf_conntrack_tuple *orig,
690 const struct nf_conntrack_tuple *repl,
691 gfp_t gfp)
692{
693 return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
694}
646EXPORT_SYMBOL_GPL(nf_conntrack_alloc); 695EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
647 696
648void nf_conntrack_free(struct nf_conn *ct) 697void nf_conntrack_free(struct nf_conn *ct)
@@ -664,7 +713,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
664 struct nf_conntrack_l3proto *l3proto, 713 struct nf_conntrack_l3proto *l3proto,
665 struct nf_conntrack_l4proto *l4proto, 714 struct nf_conntrack_l4proto *l4proto,
666 struct sk_buff *skb, 715 struct sk_buff *skb,
667 unsigned int dataoff) 716 unsigned int dataoff, u32 hash)
668{ 717{
669 struct nf_conn *ct; 718 struct nf_conn *ct;
670 struct nf_conn_help *help; 719 struct nf_conn_help *help;
@@ -678,7 +727,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
678 return NULL; 727 return NULL;
679 } 728 }
680 729
681 ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC); 730 ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
731 hash);
682 if (IS_ERR(ct)) { 732 if (IS_ERR(ct)) {
683 pr_debug("Can't allocate conntrack.\n"); 733 pr_debug("Can't allocate conntrack.\n");
684 return (struct nf_conntrack_tuple_hash *)ct; 734 return (struct nf_conntrack_tuple_hash *)ct;
@@ -755,6 +805,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
755 struct nf_conntrack_tuple_hash *h; 805 struct nf_conntrack_tuple_hash *h;
756 struct nf_conn *ct; 806 struct nf_conn *ct;
757 u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; 807 u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
808 u32 hash;
758 809
759 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), 810 if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
760 dataoff, l3num, protonum, &tuple, l3proto, 811 dataoff, l3num, protonum, &tuple, l3proto,
@@ -764,10 +815,11 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
764 } 815 }
765 816
766 /* look for tuple match */ 817 /* look for tuple match */
767 h = nf_conntrack_find_get(net, zone, &tuple); 818 hash = hash_conntrack_raw(&tuple, zone);
819 h = __nf_conntrack_find_get(net, zone, &tuple, hash);
768 if (!h) { 820 if (!h) {
769 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, 821 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
770 skb, dataoff); 822 skb, dataoff, hash);
771 if (!h) 823 if (!h)
772 return NULL; 824 return NULL;
773 if (IS_ERR(h)) 825 if (IS_ERR(h))
@@ -1307,8 +1359,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1307 ct = nf_ct_tuplehash_to_ctrack(h); 1359 ct = nf_ct_tuplehash_to_ctrack(h);
1308 hlist_nulls_del_rcu(&h->hnnode); 1360 hlist_nulls_del_rcu(&h->hnnode);
1309 bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct), 1361 bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
1310 hashsize, 1362 hashsize);
1311 nf_conntrack_hash_rnd);
1312 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); 1363 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
1313 } 1364 }
1314 } 1365 }
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index acb29ccaa41f..46e8966912b1 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -38,25 +38,30 @@ static int nf_ct_expect_hash_rnd_initted __read_mostly;
38 38
39static struct kmem_cache *nf_ct_expect_cachep __read_mostly; 39static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
40 40
41static HLIST_HEAD(nf_ct_userspace_expect_list);
42
41/* nf_conntrack_expect helper functions */ 43/* nf_conntrack_expect helper functions */
42void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) 44void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
45 u32 pid, int report)
43{ 46{
44 struct nf_conn_help *master_help = nfct_help(exp->master); 47 struct nf_conn_help *master_help = nfct_help(exp->master);
45 struct net *net = nf_ct_exp_net(exp); 48 struct net *net = nf_ct_exp_net(exp);
46 49
47 NF_CT_ASSERT(master_help);
48 NF_CT_ASSERT(!timer_pending(&exp->timeout)); 50 NF_CT_ASSERT(!timer_pending(&exp->timeout));
49 51
50 hlist_del_rcu(&exp->hnode); 52 hlist_del_rcu(&exp->hnode);
51 net->ct.expect_count--; 53 net->ct.expect_count--;
52 54
53 hlist_del(&exp->lnode); 55 hlist_del(&exp->lnode);
54 master_help->expecting[exp->class]--; 56 if (!(exp->flags & NF_CT_EXPECT_USERSPACE))
57 master_help->expecting[exp->class]--;
58
59 nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
55 nf_ct_expect_put(exp); 60 nf_ct_expect_put(exp);
56 61
57 NF_CT_STAT_INC(net, expect_delete); 62 NF_CT_STAT_INC(net, expect_delete);
58} 63}
59EXPORT_SYMBOL_GPL(nf_ct_unlink_expect); 64EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
60 65
61static void nf_ct_expectation_timed_out(unsigned long ul_expect) 66static void nf_ct_expectation_timed_out(unsigned long ul_expect)
62{ 67{
@@ -320,16 +325,21 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
320 325
321 atomic_inc(&exp->use); 326 atomic_inc(&exp->use);
322 327
323 hlist_add_head(&exp->lnode, &master_help->expectations); 328 if (master_help) {
324 master_help->expecting[exp->class]++; 329 hlist_add_head(&exp->lnode, &master_help->expectations);
330 master_help->expecting[exp->class]++;
331 } else if (exp->flags & NF_CT_EXPECT_USERSPACE)
332 hlist_add_head(&exp->lnode, &nf_ct_userspace_expect_list);
325 333
326 hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]); 334 hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
327 net->ct.expect_count++; 335 net->ct.expect_count++;
328 336
329 setup_timer(&exp->timeout, nf_ct_expectation_timed_out, 337 setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
330 (unsigned long)exp); 338 (unsigned long)exp);
331 p = &master_help->helper->expect_policy[exp->class]; 339 if (master_help) {
332 exp->timeout.expires = jiffies + p->timeout * HZ; 340 p = &master_help->helper->expect_policy[exp->class];
341 exp->timeout.expires = jiffies + p->timeout * HZ;
342 }
333 add_timer(&exp->timeout); 343 add_timer(&exp->timeout);
334 344
335 atomic_inc(&exp->use); 345 atomic_inc(&exp->use);
@@ -380,7 +390,9 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
380 unsigned int h; 390 unsigned int h;
381 int ret = 1; 391 int ret = 1;
382 392
383 if (!master_help->helper) { 393 /* Don't allow expectations created from kernel-space with no helper */
394 if (!(expect->flags & NF_CT_EXPECT_USERSPACE) &&
395 (!master_help || (master_help && !master_help->helper))) {
384 ret = -ESHUTDOWN; 396 ret = -ESHUTDOWN;
385 goto out; 397 goto out;
386 } 398 }
@@ -398,13 +410,16 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
398 } 410 }
399 } 411 }
400 /* Will be over limit? */ 412 /* Will be over limit? */
401 p = &master_help->helper->expect_policy[expect->class]; 413 if (master_help) {
402 if (p->max_expected && 414 p = &master_help->helper->expect_policy[expect->class];
403 master_help->expecting[expect->class] >= p->max_expected) { 415 if (p->max_expected &&
404 evict_oldest_expect(master, expect); 416 master_help->expecting[expect->class] >= p->max_expected) {
405 if (master_help->expecting[expect->class] >= p->max_expected) { 417 evict_oldest_expect(master, expect);
406 ret = -EMFILE; 418 if (master_help->expecting[expect->class]
407 goto out; 419 >= p->max_expected) {
420 ret = -EMFILE;
421 goto out;
422 }
408 } 423 }
409 } 424 }
410 425
@@ -439,6 +454,21 @@ out:
439} 454}
440EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); 455EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
441 456
457void nf_ct_remove_userspace_expectations(void)
458{
459 struct nf_conntrack_expect *exp;
460 struct hlist_node *n, *next;
461
462 hlist_for_each_entry_safe(exp, n, next,
463 &nf_ct_userspace_expect_list, lnode) {
464 if (del_timer(&exp->timeout)) {
465 nf_ct_unlink_expect(exp);
466 nf_ct_expect_put(exp);
467 }
468 }
469}
470EXPORT_SYMBOL_GPL(nf_ct_remove_userspace_expectations);
471
442#ifdef CONFIG_PROC_FS 472#ifdef CONFIG_PROC_FS
443struct ct_expect_iter_state { 473struct ct_expect_iter_state {
444 struct seq_net_private p; 474 struct seq_net_private p;
@@ -529,8 +559,12 @@ static int exp_seq_show(struct seq_file *s, void *v)
529 seq_printf(s, "PERMANENT"); 559 seq_printf(s, "PERMANENT");
530 delim = ","; 560 delim = ",";
531 } 561 }
532 if (expect->flags & NF_CT_EXPECT_INACTIVE) 562 if (expect->flags & NF_CT_EXPECT_INACTIVE) {
533 seq_printf(s, "%sINACTIVE", delim); 563 seq_printf(s, "%sINACTIVE", delim);
564 delim = ",";
565 }
566 if (expect->flags & NF_CT_EXPECT_USERSPACE)
567 seq_printf(s, "%sUSERSPACE", delim);
534 568
535 helper = rcu_dereference(nfct_help(expect->master)->helper); 569 helper = rcu_dereference(nfct_help(expect->master)->helper);
536 if (helper) { 570 if (helper) {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 5bae1cd15eea..62bad229106b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1560,8 +1560,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
1560 const struct nf_conntrack_expect *exp) 1560 const struct nf_conntrack_expect *exp)
1561{ 1561{
1562 struct nf_conn *master = exp->master; 1562 struct nf_conn *master = exp->master;
1563 struct nf_conntrack_helper *helper;
1564 long timeout = (exp->timeout.expires - jiffies) / HZ; 1563 long timeout = (exp->timeout.expires - jiffies) / HZ;
1564 struct nf_conn_help *help;
1565 1565
1566 if (timeout < 0) 1566 if (timeout < 0)
1567 timeout = 0; 1567 timeout = 0;
@@ -1577,9 +1577,15 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
1577 1577
1578 NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)); 1578 NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout));
1579 NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)); 1579 NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp));
1580 helper = rcu_dereference(nfct_help(master)->helper); 1580 NLA_PUT_BE32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags));
1581 if (helper) 1581 help = nfct_help(master);
1582 NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name); 1582 if (help) {
1583 struct nf_conntrack_helper *helper;
1584
1585 helper = rcu_dereference(help->helper);
1586 if (helper)
1587 NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name);
1588 }
1583 1589
1584 return 0; 1590 return 0;
1585 1591
@@ -1626,17 +1632,20 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
1626 struct nlmsghdr *nlh; 1632 struct nlmsghdr *nlh;
1627 struct nfgenmsg *nfmsg; 1633 struct nfgenmsg *nfmsg;
1628 struct sk_buff *skb; 1634 struct sk_buff *skb;
1629 unsigned int type; 1635 unsigned int type, group;
1630 int flags = 0; 1636 int flags = 0;
1631 1637
1632 if (events & (1 << IPEXP_NEW)) { 1638 if (events & (1 << IPEXP_DESTROY)) {
1639 type = IPCTNL_MSG_EXP_DELETE;
1640 group = NFNLGRP_CONNTRACK_EXP_DESTROY;
1641 } else if (events & (1 << IPEXP_NEW)) {
1633 type = IPCTNL_MSG_EXP_NEW; 1642 type = IPCTNL_MSG_EXP_NEW;
1634 flags = NLM_F_CREATE|NLM_F_EXCL; 1643 flags = NLM_F_CREATE|NLM_F_EXCL;
1644 group = NFNLGRP_CONNTRACK_EXP_NEW;
1635 } else 1645 } else
1636 return 0; 1646 return 0;
1637 1647
1638 if (!item->report && 1648 if (!item->report && !nfnetlink_has_listeners(net, group))
1639 !nfnetlink_has_listeners(net, NFNLGRP_CONNTRACK_EXP_NEW))
1640 return 0; 1649 return 0;
1641 1650
1642 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 1651 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
@@ -1659,8 +1668,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
1659 rcu_read_unlock(); 1668 rcu_read_unlock();
1660 1669
1661 nlmsg_end(skb, nlh); 1670 nlmsg_end(skb, nlh);
1662 nfnetlink_send(skb, net, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, 1671 nfnetlink_send(skb, net, item->pid, group, item->report, GFP_ATOMIC);
1663 item->report, GFP_ATOMIC);
1664 return 0; 1672 return 0;
1665 1673
1666nla_put_failure: 1674nla_put_failure:
@@ -1733,6 +1741,8 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
1733 [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 }, 1741 [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
1734 [CTA_EXPECT_ID] = { .type = NLA_U32 }, 1742 [CTA_EXPECT_ID] = { .type = NLA_U32 },
1735 [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING }, 1743 [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING },
1744 [CTA_EXPECT_ZONE] = { .type = NLA_U16 },
1745 [CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
1736}; 1746};
1737 1747
1738static int 1748static int
@@ -1841,7 +1851,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1841 } 1851 }
1842 1852
1843 /* after list removal, usage count == 1 */ 1853 /* after list removal, usage count == 1 */
1844 nf_ct_unexpect_related(exp); 1854 spin_lock_bh(&nf_conntrack_lock);
1855 if (del_timer(&exp->timeout)) {
1856 nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).pid,
1857 nlmsg_report(nlh));
1858 nf_ct_expect_put(exp);
1859 }
1860 spin_unlock_bh(&nf_conntrack_lock);
1845 /* have to put what we 'get' above. 1861 /* have to put what we 'get' above.
1846 * after this line usage count == 0 */ 1862 * after this line usage count == 0 */
1847 nf_ct_expect_put(exp); 1863 nf_ct_expect_put(exp);
@@ -1858,7 +1874,9 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1858 m_help = nfct_help(exp->master); 1874 m_help = nfct_help(exp->master);
1859 if (!strcmp(m_help->helper->name, name) && 1875 if (!strcmp(m_help->helper->name, name) &&
1860 del_timer(&exp->timeout)) { 1876 del_timer(&exp->timeout)) {
1861 nf_ct_unlink_expect(exp); 1877 nf_ct_unlink_expect_report(exp,
1878 NETLINK_CB(skb).pid,
1879 nlmsg_report(nlh));
1862 nf_ct_expect_put(exp); 1880 nf_ct_expect_put(exp);
1863 } 1881 }
1864 } 1882 }
@@ -1872,7 +1890,9 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1872 &net->ct.expect_hash[i], 1890 &net->ct.expect_hash[i],
1873 hnode) { 1891 hnode) {
1874 if (del_timer(&exp->timeout)) { 1892 if (del_timer(&exp->timeout)) {
1875 nf_ct_unlink_expect(exp); 1893 nf_ct_unlink_expect_report(exp,
1894 NETLINK_CB(skb).pid,
1895 nlmsg_report(nlh));
1876 nf_ct_expect_put(exp); 1896 nf_ct_expect_put(exp);
1877 } 1897 }
1878 } 1898 }
@@ -1918,23 +1938,35 @@ ctnetlink_create_expect(struct net *net, u16 zone,
1918 if (!h) 1938 if (!h)
1919 return -ENOENT; 1939 return -ENOENT;
1920 ct = nf_ct_tuplehash_to_ctrack(h); 1940 ct = nf_ct_tuplehash_to_ctrack(h);
1921 help = nfct_help(ct);
1922
1923 if (!help || !help->helper) {
1924 /* such conntrack hasn't got any helper, abort */
1925 err = -EOPNOTSUPP;
1926 goto out;
1927 }
1928
1929 exp = nf_ct_expect_alloc(ct); 1941 exp = nf_ct_expect_alloc(ct);
1930 if (!exp) { 1942 if (!exp) {
1931 err = -ENOMEM; 1943 err = -ENOMEM;
1932 goto out; 1944 goto out;
1933 } 1945 }
1946 help = nfct_help(ct);
1947 if (!help) {
1948 if (!cda[CTA_EXPECT_TIMEOUT]) {
1949 err = -EINVAL;
1950 goto out;
1951 }
1952 exp->timeout.expires =
1953 jiffies + ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ;
1954
1955 exp->flags = NF_CT_EXPECT_USERSPACE;
1956 if (cda[CTA_EXPECT_FLAGS]) {
1957 exp->flags |=
1958 ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS]));
1959 }
1960 } else {
1961 if (cda[CTA_EXPECT_FLAGS]) {
1962 exp->flags = ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS]));
1963 exp->flags &= ~NF_CT_EXPECT_USERSPACE;
1964 } else
1965 exp->flags = 0;
1966 }
1934 1967
1935 exp->class = 0; 1968 exp->class = 0;
1936 exp->expectfn = NULL; 1969 exp->expectfn = NULL;
1937 exp->flags = 0;
1938 exp->master = ct; 1970 exp->master = ct;
1939 exp->helper = NULL; 1971 exp->helper = NULL;
1940 memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple)); 1972 memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple));
@@ -2102,6 +2134,7 @@ static void __exit ctnetlink_exit(void)
2102{ 2134{
2103 pr_info("ctnetlink: unregistering from nfnetlink.\n"); 2135 pr_info("ctnetlink: unregistering from nfnetlink.\n");
2104 2136
2137 nf_ct_remove_userspace_expectations();
2105#ifdef CONFIG_NF_CONNTRACK_EVENTS 2138#ifdef CONFIG_NF_CONNTRACK_EVENTS
2106 nf_ct_expect_unregister_notifier(&ctnl_notifier_exp); 2139 nf_ct_expect_unregister_notifier(&ctnl_notifier_exp);
2107 nf_conntrack_unregister_notifier(&ctnl_notifier); 2140 nf_conntrack_unregister_notifier(&ctnl_notifier);
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index f64de9544866..bcf47eb518ef 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -130,6 +130,44 @@ static int digits_len(const struct nf_conn *ct, const char *dptr,
130 return len; 130 return len;
131} 131}
132 132
133static int iswordc(const char c)
134{
135 if (isalnum(c) || c == '!' || c == '"' || c == '%' ||
136 (c >= '(' && c <= '/') || c == ':' || c == '<' || c == '>' ||
137 c == '?' || (c >= '[' && c <= ']') || c == '_' || c == '`' ||
138 c == '{' || c == '}' || c == '~')
139 return 1;
140 return 0;
141}
142
143static int word_len(const char *dptr, const char *limit)
144{
145 int len = 0;
146 while (dptr < limit && iswordc(*dptr)) {
147 dptr++;
148 len++;
149 }
150 return len;
151}
152
153static int callid_len(const struct nf_conn *ct, const char *dptr,
154 const char *limit, int *shift)
155{
156 int len, domain_len;
157
158 len = word_len(dptr, limit);
159 dptr += len;
160 if (!len || dptr == limit || *dptr != '@')
161 return len;
162 dptr++;
163 len++;
164
165 domain_len = word_len(dptr, limit);
166 if (!domain_len)
167 return 0;
168 return len + domain_len;
169}
170
133/* get media type + port length */ 171/* get media type + port length */
134static int media_len(const struct nf_conn *ct, const char *dptr, 172static int media_len(const struct nf_conn *ct, const char *dptr,
135 const char *limit, int *shift) 173 const char *limit, int *shift)
@@ -152,6 +190,9 @@ static int parse_addr(const struct nf_conn *ct, const char *cp,
152 const char *end; 190 const char *end;
153 int ret = 0; 191 int ret = 0;
154 192
193 if (!ct)
194 return 0;
195
155 memset(addr, 0, sizeof(*addr)); 196 memset(addr, 0, sizeof(*addr));
156 switch (nf_ct_l3num(ct)) { 197 switch (nf_ct_l3num(ct)) {
157 case AF_INET: 198 case AF_INET:
@@ -296,6 +337,7 @@ static const struct sip_header ct_sip_hdrs[] = {
296 [SIP_HDR_VIA_TCP] = SIP_HDR("Via", "v", "TCP ", epaddr_len), 337 [SIP_HDR_VIA_TCP] = SIP_HDR("Via", "v", "TCP ", epaddr_len),
297 [SIP_HDR_EXPIRES] = SIP_HDR("Expires", NULL, NULL, digits_len), 338 [SIP_HDR_EXPIRES] = SIP_HDR("Expires", NULL, NULL, digits_len),
298 [SIP_HDR_CONTENT_LENGTH] = SIP_HDR("Content-Length", "l", NULL, digits_len), 339 [SIP_HDR_CONTENT_LENGTH] = SIP_HDR("Content-Length", "l", NULL, digits_len),
340 [SIP_HDR_CALL_ID] = SIP_HDR("Call-Id", "i", NULL, callid_len),
299}; 341};
300 342
301static const char *sip_follow_continuation(const char *dptr, const char *limit) 343static const char *sip_follow_continuation(const char *dptr, const char *limit)
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
index daab8c4a903c..4d87befb04c0 100644
--- a/net/netfilter/nf_tproxy_core.c
+++ b/net/netfilter/nf_tproxy_core.c
@@ -18,41 +18,6 @@
18#include <net/udp.h> 18#include <net/udp.h>
19#include <net/netfilter/nf_tproxy_core.h> 19#include <net/netfilter/nf_tproxy_core.h>
20 20
21struct sock *
22nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
23 const __be32 saddr, const __be32 daddr,
24 const __be16 sport, const __be16 dport,
25 const struct net_device *in, bool listening_only)
26{
27 struct sock *sk;
28
29 /* look up socket */
30 switch (protocol) {
31 case IPPROTO_TCP:
32 if (listening_only)
33 sk = __inet_lookup_listener(net, &tcp_hashinfo,
34 daddr, ntohs(dport),
35 in->ifindex);
36 else
37 sk = __inet_lookup(net, &tcp_hashinfo,
38 saddr, sport, daddr, dport,
39 in->ifindex);
40 break;
41 case IPPROTO_UDP:
42 sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
43 in->ifindex);
44 break;
45 default:
46 WARN_ON(1);
47 sk = NULL;
48 }
49
50 pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, listener only: %d, sock %p\n",
51 protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), listening_only, sk);
52
53 return sk;
54}
55EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4);
56 21
57static void 22static void
58nf_tproxy_destructor(struct sk_buff *skb) 23nf_tproxy_destructor(struct sk_buff *skb)
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index e34622fa0003..80463507420e 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -116,10 +116,8 @@ EXPORT_SYMBOL(xt_register_targets);
116void 116void
117xt_unregister_targets(struct xt_target *target, unsigned int n) 117xt_unregister_targets(struct xt_target *target, unsigned int n)
118{ 118{
119 unsigned int i; 119 while (n-- > 0)
120 120 xt_unregister_target(&target[n]);
121 for (i = 0; i < n; i++)
122 xt_unregister_target(&target[i]);
123} 121}
124EXPORT_SYMBOL(xt_unregister_targets); 122EXPORT_SYMBOL(xt_unregister_targets);
125 123
@@ -174,10 +172,8 @@ EXPORT_SYMBOL(xt_register_matches);
174void 172void
175xt_unregister_matches(struct xt_match *match, unsigned int n) 173xt_unregister_matches(struct xt_match *match, unsigned int n)
176{ 174{
177 unsigned int i; 175 while (n-- > 0)
178 176 xt_unregister_match(&match[n]);
179 for (i = 0; i < n; i++)
180 xt_unregister_match(&match[i]);
181} 177}
182EXPORT_SYMBOL(xt_unregister_matches); 178EXPORT_SYMBOL(xt_unregister_matches);
183 179
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index c61294d85fda..19c482caf30b 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Transparent proxy support for Linux/iptables 2 * Transparent proxy support for Linux/iptables
3 * 3 *
4 * Copyright (c) 2006-2007 BalaBit IT Ltd. 4 * Copyright (c) 2006-2010 BalaBit IT Ltd.
5 * Author: Balazs Scheidler, Krisztian Kovacs 5 * Author: Balazs Scheidler, Krisztian Kovacs
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
@@ -16,19 +16,96 @@
16#include <net/checksum.h> 16#include <net/checksum.h>
17#include <net/udp.h> 17#include <net/udp.h>
18#include <net/inet_sock.h> 18#include <net/inet_sock.h>
19 19#include <linux/inetdevice.h>
20#include <linux/netfilter/x_tables.h> 20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter_ipv4/ip_tables.h> 21#include <linux/netfilter_ipv4/ip_tables.h>
22#include <linux/netfilter/xt_TPROXY.h>
23 22
24#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 23#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
24#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
25#include <net/if_inet6.h>
26#include <net/addrconf.h>
27#include <linux/netfilter_ipv6/ip6_tables.h>
28#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
29#endif
30
25#include <net/netfilter/nf_tproxy_core.h> 31#include <net/netfilter/nf_tproxy_core.h>
32#include <linux/netfilter/xt_TPROXY.h>
33
34static inline __be32
35tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
36{
37 struct in_device *indev;
38 __be32 laddr;
39
40 if (user_laddr)
41 return user_laddr;
42
43 laddr = 0;
44 rcu_read_lock();
45 indev = __in_dev_get_rcu(skb->dev);
46 for_primary_ifa(indev) {
47 laddr = ifa->ifa_local;
48 break;
49 } endfor_ifa(indev);
50 rcu_read_unlock();
51
52 return laddr ? laddr : daddr;
53}
54
55/**
56 * tproxy_handle_time_wait4() - handle IPv4 TCP TIME_WAIT reopen redirections
57 * @skb: The skb being processed.
58 * @laddr: IPv4 address to redirect to or zero.
59 * @lport: TCP port to redirect to or zero.
60 * @sk: The TIME_WAIT TCP socket found by the lookup.
61 *
62 * We have to handle SYN packets arriving to TIME_WAIT sockets
63 * differently: instead of reopening the connection we should rather
64 * redirect the new connection to the proxy if there's a listener
65 * socket present.
66 *
67 * tproxy_handle_time_wait4() consumes the socket reference passed in.
68 *
69 * Returns the listener socket if there's one, the TIME_WAIT socket if
70 * no such listener is found, or NULL if the TCP header is incomplete.
71 */
72static struct sock *
73tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
74 struct sock *sk)
75{
76 const struct iphdr *iph = ip_hdr(skb);
77 struct tcphdr _hdr, *hp;
78
79 hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
80 if (hp == NULL) {
81 inet_twsk_put(inet_twsk(sk));
82 return NULL;
83 }
84
85 if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
86 /* SYN to a TIME_WAIT socket, we'd rather redirect it
87 * to a listener socket if there's one */
88 struct sock *sk2;
89
90 sk2 = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
91 iph->saddr, laddr ? laddr : iph->daddr,
92 hp->source, lport ? lport : hp->dest,
93 skb->dev, NFT_LOOKUP_LISTENER);
94 if (sk2) {
95 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
96 inet_twsk_put(inet_twsk(sk));
97 sk = sk2;
98 }
99 }
100
101 return sk;
102}
26 103
27static unsigned int 104static unsigned int
28tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par) 105tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
106 u_int32_t mark_mask, u_int32_t mark_value)
29{ 107{
30 const struct iphdr *iph = ip_hdr(skb); 108 const struct iphdr *iph = ip_hdr(skb);
31 const struct xt_tproxy_target_info *tgi = par->targinfo;
32 struct udphdr _hdr, *hp; 109 struct udphdr _hdr, *hp;
33 struct sock *sk; 110 struct sock *sk;
34 111
@@ -36,12 +113,195 @@ tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
36 if (hp == NULL) 113 if (hp == NULL)
37 return NF_DROP; 114 return NF_DROP;
38 115
116 /* check if there's an ongoing connection on the packet
117 * addresses, this happens if the redirect already happened
118 * and the current packet belongs to an already established
119 * connection */
39 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, 120 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
40 iph->saddr, 121 iph->saddr, iph->daddr,
41 tgi->laddr ? tgi->laddr : iph->daddr, 122 hp->source, hp->dest,
42 hp->source, 123 skb->dev, NFT_LOOKUP_ESTABLISHED);
43 tgi->lport ? tgi->lport : hp->dest, 124
44 par->in, true); 125 laddr = tproxy_laddr4(skb, laddr, iph->daddr);
126 if (!lport)
127 lport = hp->dest;
128
129 /* UDP has no TCP_TIME_WAIT state, so we never enter here */
130 if (sk && sk->sk_state == TCP_TIME_WAIT)
131 /* reopening a TIME_WAIT connection needs special handling */
132 sk = tproxy_handle_time_wait4(skb, laddr, lport, sk);
133 else if (!sk)
134 /* no, there's no established connection, check if
135 * there's a listener on the redirected addr/port */
136 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
137 iph->saddr, laddr,
138 hp->source, lport,
139 skb->dev, NFT_LOOKUP_LISTENER);
140
141 /* NOTE: assign_sock consumes our sk reference */
142 if (sk && nf_tproxy_assign_sock(skb, sk)) {
143 /* This should be in a separate target, but we don't do multiple
144 targets on the same rule yet */
145 skb->mark = (skb->mark & ~mark_mask) ^ mark_value;
146
147 pr_debug("redirecting: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n",
148 iph->protocol, &iph->daddr, ntohs(hp->dest),
149 &laddr, ntohs(lport), skb->mark);
150 return NF_ACCEPT;
151 }
152
153 pr_debug("no socket, dropping: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n",
154 iph->protocol, &iph->saddr, ntohs(hp->source),
155 &iph->daddr, ntohs(hp->dest), skb->mark);
156 return NF_DROP;
157}
158
159static unsigned int
160tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par)
161{
162 const struct xt_tproxy_target_info *tgi = par->targinfo;
163
164 return tproxy_tg4(skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value);
165}
166
167static unsigned int
168tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
169{
170 const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
171
172 return tproxy_tg4(skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
173}
174
175#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
176
177static inline const struct in6_addr *
178tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
179 const struct in6_addr *daddr)
180{
181 struct inet6_dev *indev;
182 struct inet6_ifaddr *ifa;
183 struct in6_addr *laddr;
184
185 if (!ipv6_addr_any(user_laddr))
186 return user_laddr;
187 laddr = NULL;
188
189 rcu_read_lock();
190 indev = __in6_dev_get(skb->dev);
191 if (indev)
192 list_for_each_entry(ifa, &indev->addr_list, if_list) {
193 if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED))
194 continue;
195
196 laddr = &ifa->addr;
197 break;
198 }
199 rcu_read_unlock();
200
201 return laddr ? laddr : daddr;
202}
203
204/**
205 * tproxy_handle_time_wait6() - handle IPv6 TCP TIME_WAIT reopen redirections
206 * @skb: The skb being processed.
207 * @tproto: Transport protocol.
208 * @thoff: Transport protocol header offset.
209 * @par: Iptables target parameters.
210 * @sk: The TIME_WAIT TCP socket found by the lookup.
211 *
212 * We have to handle SYN packets arriving to TIME_WAIT sockets
213 * differently: instead of reopening the connection we should rather
214 * redirect the new connection to the proxy if there's a listener
215 * socket present.
216 *
217 * tproxy_handle_time_wait6() consumes the socket reference passed in.
218 *
219 * Returns the listener socket if there's one, the TIME_WAIT socket if
220 * no such listener is found, or NULL if the TCP header is incomplete.
221 */
222static struct sock *
223tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
224 const struct xt_action_param *par,
225 struct sock *sk)
226{
227 const struct ipv6hdr *iph = ipv6_hdr(skb);
228 struct tcphdr _hdr, *hp;
229 const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
230
231 hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
232 if (hp == NULL) {
233 inet_twsk_put(inet_twsk(sk));
234 return NULL;
235 }
236
237 if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
238 /* SYN to a TIME_WAIT socket, we'd rather redirect it
239 * to a listener socket if there's one */
240 struct sock *sk2;
241
242 sk2 = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
243 &iph->saddr,
244 tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
245 hp->source,
246 tgi->lport ? tgi->lport : hp->dest,
247 skb->dev, NFT_LOOKUP_LISTENER);
248 if (sk2) {
249 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
250 inet_twsk_put(inet_twsk(sk));
251 sk = sk2;
252 }
253 }
254
255 return sk;
256}
257
258static unsigned int
259tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
260{
261 const struct ipv6hdr *iph = ipv6_hdr(skb);
262 const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
263 struct udphdr _hdr, *hp;
264 struct sock *sk;
265 const struct in6_addr *laddr;
266 __be16 lport;
267 int thoff;
268 int tproto;
269
270 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL);
271 if (tproto < 0) {
272 pr_debug("unable to find transport header in IPv6 packet, dropping\n");
273 return NF_DROP;
274 }
275
276 hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
277 if (hp == NULL) {
278 pr_debug("unable to grab transport header contents in IPv6 packet, dropping\n");
279 return NF_DROP;
280 }
281
282 /* check if there's an ongoing connection on the packet
283 * addresses, this happens if the redirect already happened
284 * and the current packet belongs to an already established
285 * connection */
286 sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
287 &iph->saddr, &iph->daddr,
288 hp->source, hp->dest,
289 par->in, NFT_LOOKUP_ESTABLISHED);
290
291 laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr);
292 lport = tgi->lport ? tgi->lport : hp->dest;
293
294 /* UDP has no TCP_TIME_WAIT state, so we never enter here */
295 if (sk && sk->sk_state == TCP_TIME_WAIT)
296 /* reopening a TIME_WAIT connection needs special handling */
297 sk = tproxy_handle_time_wait6(skb, tproto, thoff, par, sk);
298 else if (!sk)
299 /* no there's no established connection, check if
300 * there's a listener on the redirected addr/port */
301 sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
302 &iph->saddr, laddr,
303 hp->source, lport,
304 par->in, NFT_LOOKUP_LISTENER);
45 305
46 /* NOTE: assign_sock consumes our sk reference */ 306 /* NOTE: assign_sock consumes our sk reference */
47 if (sk && nf_tproxy_assign_sock(skb, sk)) { 307 if (sk && nf_tproxy_assign_sock(skb, sk)) {
@@ -49,19 +309,34 @@ tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
49 targets on the same rule yet */ 309 targets on the same rule yet */
50 skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value; 310 skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value;
51 311
52 pr_debug("redirecting: proto %u %08x:%u -> %08x:%u, mark: %x\n", 312 pr_debug("redirecting: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n",
53 iph->protocol, ntohl(iph->daddr), ntohs(hp->dest), 313 tproto, &iph->saddr, ntohs(hp->source),
54 ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark); 314 laddr, ntohs(lport), skb->mark);
55 return NF_ACCEPT; 315 return NF_ACCEPT;
56 } 316 }
57 317
58 pr_debug("no socket, dropping: proto %u %08x:%u -> %08x:%u, mark: %x\n", 318 pr_debug("no socket, dropping: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n",
59 iph->protocol, ntohl(iph->daddr), ntohs(hp->dest), 319 tproto, &iph->saddr, ntohs(hp->source),
60 ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark); 320 &iph->daddr, ntohs(hp->dest), skb->mark);
321
61 return NF_DROP; 322 return NF_DROP;
62} 323}
63 324
64static int tproxy_tg_check(const struct xt_tgchk_param *par) 325static int tproxy_tg6_check(const struct xt_tgchk_param *par)
326{
327 const struct ip6t_ip6 *i = par->entryinfo;
328
329 if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)
330 && !(i->flags & IP6T_INV_PROTO))
331 return 0;
332
333 pr_info("Can be used only in combination with "
334 "either -p tcp or -p udp\n");
335 return -EINVAL;
336}
337#endif
338
339static int tproxy_tg4_check(const struct xt_tgchk_param *par)
65{ 340{
66 const struct ipt_ip *i = par->entryinfo; 341 const struct ipt_ip *i = par->entryinfo;
67 342
@@ -74,31 +349,64 @@ static int tproxy_tg_check(const struct xt_tgchk_param *par)
74 return -EINVAL; 349 return -EINVAL;
75} 350}
76 351
77static struct xt_target tproxy_tg_reg __read_mostly = { 352static struct xt_target tproxy_tg_reg[] __read_mostly = {
78 .name = "TPROXY", 353 {
79 .family = AF_INET, 354 .name = "TPROXY",
80 .table = "mangle", 355 .family = NFPROTO_IPV4,
81 .target = tproxy_tg, 356 .table = "mangle",
82 .targetsize = sizeof(struct xt_tproxy_target_info), 357 .target = tproxy_tg4_v0,
83 .checkentry = tproxy_tg_check, 358 .revision = 0,
84 .hooks = 1 << NF_INET_PRE_ROUTING, 359 .targetsize = sizeof(struct xt_tproxy_target_info),
85 .me = THIS_MODULE, 360 .checkentry = tproxy_tg4_check,
361 .hooks = 1 << NF_INET_PRE_ROUTING,
362 .me = THIS_MODULE,
363 },
364 {
365 .name = "TPROXY",
366 .family = NFPROTO_IPV4,
367 .table = "mangle",
368 .target = tproxy_tg4_v1,
369 .revision = 1,
370 .targetsize = sizeof(struct xt_tproxy_target_info_v1),
371 .checkentry = tproxy_tg4_check,
372 .hooks = 1 << NF_INET_PRE_ROUTING,
373 .me = THIS_MODULE,
374 },
375#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
376 {
377 .name = "TPROXY",
378 .family = NFPROTO_IPV6,
379 .table = "mangle",
380 .target = tproxy_tg6_v1,
381 .revision = 1,
382 .targetsize = sizeof(struct xt_tproxy_target_info_v1),
383 .checkentry = tproxy_tg6_check,
384 .hooks = 1 << NF_INET_PRE_ROUTING,
385 .me = THIS_MODULE,
386 },
387#endif
388
86}; 389};
87 390
88static int __init tproxy_tg_init(void) 391static int __init tproxy_tg_init(void)
89{ 392{
90 nf_defrag_ipv4_enable(); 393 nf_defrag_ipv4_enable();
91 return xt_register_target(&tproxy_tg_reg); 394#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
395 nf_defrag_ipv6_enable();
396#endif
397
398 return xt_register_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg));
92} 399}
93 400
94static void __exit tproxy_tg_exit(void) 401static void __exit tproxy_tg_exit(void)
95{ 402{
96 xt_unregister_target(&tproxy_tg_reg); 403 xt_unregister_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg));
97} 404}
98 405
99module_init(tproxy_tg_init); 406module_init(tproxy_tg_init);
100module_exit(tproxy_tg_exit); 407module_exit(tproxy_tg_exit);
101MODULE_LICENSE("GPL"); 408MODULE_LICENSE("GPL");
102MODULE_AUTHOR("Krisztian Kovacs"); 409MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs");
103MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module."); 410MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module.");
104MODULE_ALIAS("ipt_TPROXY"); 411MODULE_ALIAS("ipt_TPROXY");
412MODULE_ALIAS("ip6t_TPROXY");
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 7a4d66db95ae..9127a3d8aa35 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -16,7 +16,6 @@
16#include <linux/ip_vs.h> 16#include <linux/ip_vs.h>
17#include <linux/types.h> 17#include <linux/types.h>
18#include <linux/netfilter/x_tables.h> 18#include <linux/netfilter/x_tables.h>
19#include <linux/netfilter/x_tables.h>
20#include <linux/netfilter/xt_ipvs.h> 19#include <linux/netfilter/xt_ipvs.h>
21#include <net/netfilter/nf_conntrack.h> 20#include <net/netfilter/nf_conntrack.h>
22 21
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 1ca89908cbad..2dbd4c857735 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -14,6 +14,7 @@
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/netfilter/x_tables.h> 15#include <linux/netfilter/x_tables.h>
16#include <linux/netfilter_ipv4/ip_tables.h> 16#include <linux/netfilter_ipv4/ip_tables.h>
17#include <linux/netfilter_ipv6/ip6_tables.h>
17#include <net/tcp.h> 18#include <net/tcp.h>
18#include <net/udp.h> 19#include <net/udp.h>
19#include <net/icmp.h> 20#include <net/icmp.h>
@@ -21,6 +22,7 @@
21#include <net/inet_sock.h> 22#include <net/inet_sock.h>
22#include <net/netfilter/nf_tproxy_core.h> 23#include <net/netfilter/nf_tproxy_core.h>
23#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 24#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
25#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
24 26
25#include <linux/netfilter/xt_socket.h> 27#include <linux/netfilter/xt_socket.h>
26 28
@@ -30,7 +32,7 @@
30#endif 32#endif
31 33
32static int 34static int
33extract_icmp_fields(const struct sk_buff *skb, 35extract_icmp4_fields(const struct sk_buff *skb,
34 u8 *protocol, 36 u8 *protocol,
35 __be32 *raddr, 37 __be32 *raddr,
36 __be32 *laddr, 38 __be32 *laddr,
@@ -86,7 +88,6 @@ extract_icmp_fields(const struct sk_buff *skb,
86 return 0; 88 return 0;
87} 89}
88 90
89
90static bool 91static bool
91socket_match(const struct sk_buff *skb, struct xt_action_param *par, 92socket_match(const struct sk_buff *skb, struct xt_action_param *par,
92 const struct xt_socket_mtinfo1 *info) 93 const struct xt_socket_mtinfo1 *info)
@@ -115,7 +116,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
115 dport = hp->dest; 116 dport = hp->dest;
116 117
117 } else if (iph->protocol == IPPROTO_ICMP) { 118 } else if (iph->protocol == IPPROTO_ICMP) {
118 if (extract_icmp_fields(skb, &protocol, &saddr, &daddr, 119 if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
119 &sport, &dport)) 120 &sport, &dport))
120 return false; 121 return false;
121 } else { 122 } else {
@@ -142,7 +143,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
142#endif 143#endif
143 144
144 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, 145 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
145 saddr, daddr, sport, dport, par->in, false); 146 saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY);
146 if (sk != NULL) { 147 if (sk != NULL) {
147 bool wildcard; 148 bool wildcard;
148 bool transparent = true; 149 bool transparent = true;
@@ -165,32 +166,157 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
165 sk = NULL; 166 sk = NULL;
166 } 167 }
167 168
168 pr_debug("proto %u %08x:%u -> %08x:%u (orig %08x:%u) sock %p\n", 169 pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n",
169 protocol, ntohl(saddr), ntohs(sport), 170 protocol, &saddr, ntohs(sport),
170 ntohl(daddr), ntohs(dport), 171 &daddr, ntohs(dport),
171 ntohl(iph->daddr), hp ? ntohs(hp->dest) : 0, sk); 172 &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
172 173
173 return (sk != NULL); 174 return (sk != NULL);
174} 175}
175 176
176static bool 177static bool
177socket_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) 178socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par)
178{ 179{
179 return socket_match(skb, par, NULL); 180 return socket_match(skb, par, NULL);
180} 181}
181 182
182static bool 183static bool
183socket_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) 184socket_mt4_v1(const struct sk_buff *skb, struct xt_action_param *par)
184{ 185{
185 return socket_match(skb, par, par->matchinfo); 186 return socket_match(skb, par, par->matchinfo);
186} 187}
187 188
189#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
190
191static int
192extract_icmp6_fields(const struct sk_buff *skb,
193 unsigned int outside_hdrlen,
194 u8 *protocol,
195 struct in6_addr **raddr,
196 struct in6_addr **laddr,
197 __be16 *rport,
198 __be16 *lport)
199{
200 struct ipv6hdr *inside_iph, _inside_iph;
201 struct icmp6hdr *icmph, _icmph;
202 __be16 *ports, _ports[2];
203 u8 inside_nexthdr;
204 int inside_hdrlen;
205
206 icmph = skb_header_pointer(skb, outside_hdrlen,
207 sizeof(_icmph), &_icmph);
208 if (icmph == NULL)
209 return 1;
210
211 if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK)
212 return 1;
213
214 inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), sizeof(_inside_iph), &_inside_iph);
215 if (inside_iph == NULL)
216 return 1;
217 inside_nexthdr = inside_iph->nexthdr;
218
219 inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + sizeof(_inside_iph), &inside_nexthdr);
220 if (inside_hdrlen < 0)
221 return 1; /* hjm: Packet has no/incomplete transport layer headers. */
222
223 if (inside_nexthdr != IPPROTO_TCP &&
224 inside_nexthdr != IPPROTO_UDP)
225 return 1;
226
227 ports = skb_header_pointer(skb, inside_hdrlen,
228 sizeof(_ports), &_ports);
229 if (ports == NULL)
230 return 1;
231
232 /* the inside IP packet is the one quoted from our side, thus
233 * its saddr is the local address */
234 *protocol = inside_nexthdr;
235 *laddr = &inside_iph->saddr;
236 *lport = ports[0];
237 *raddr = &inside_iph->daddr;
238 *rport = ports[1];
239
240 return 0;
241}
242
243static bool
244socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
245{
246 struct ipv6hdr *iph = ipv6_hdr(skb);
247 struct udphdr _hdr, *hp = NULL;
248 struct sock *sk;
249 struct in6_addr *daddr, *saddr;
250 __be16 dport, sport;
251 int thoff;
252 u8 tproto;
253 const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
254
255 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL);
256 if (tproto < 0) {
257 pr_debug("unable to find transport header in IPv6 packet, dropping\n");
258 return NF_DROP;
259 }
260
261 if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
262 hp = skb_header_pointer(skb, thoff,
263 sizeof(_hdr), &_hdr);
264 if (hp == NULL)
265 return false;
266
267 saddr = &iph->saddr;
268 sport = hp->source;
269 daddr = &iph->daddr;
270 dport = hp->dest;
271
272 } else if (tproto == IPPROTO_ICMPV6) {
273 if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
274 &sport, &dport))
275 return false;
276 } else {
277 return false;
278 }
279
280 sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
281 saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY);
282 if (sk != NULL) {
283 bool wildcard;
284 bool transparent = true;
285
286 /* Ignore sockets listening on INADDR_ANY */
287 wildcard = (sk->sk_state != TCP_TIME_WAIT &&
288 ipv6_addr_any(&inet6_sk(sk)->rcv_saddr));
289
290 /* Ignore non-transparent sockets,
291 if XT_SOCKET_TRANSPARENT is used */
292 if (info && info->flags & XT_SOCKET_TRANSPARENT)
293 transparent = ((sk->sk_state != TCP_TIME_WAIT &&
294 inet_sk(sk)->transparent) ||
295 (sk->sk_state == TCP_TIME_WAIT &&
296 inet_twsk(sk)->tw_transparent));
297
298 nf_tproxy_put_sock(sk);
299
300 if (wildcard || !transparent)
301 sk = NULL;
302 }
303
304 pr_debug("proto %hhu %pI6:%hu -> %pI6:%hu "
305 "(orig %pI6:%hu) sock %p\n",
306 tproto, saddr, ntohs(sport),
307 daddr, ntohs(dport),
308 &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
309
310 return (sk != NULL);
311}
312#endif
313
188static struct xt_match socket_mt_reg[] __read_mostly = { 314static struct xt_match socket_mt_reg[] __read_mostly = {
189 { 315 {
190 .name = "socket", 316 .name = "socket",
191 .revision = 0, 317 .revision = 0,
192 .family = NFPROTO_IPV4, 318 .family = NFPROTO_IPV4,
193 .match = socket_mt_v0, 319 .match = socket_mt4_v0,
194 .hooks = (1 << NF_INET_PRE_ROUTING) | 320 .hooks = (1 << NF_INET_PRE_ROUTING) |
195 (1 << NF_INET_LOCAL_IN), 321 (1 << NF_INET_LOCAL_IN),
196 .me = THIS_MODULE, 322 .me = THIS_MODULE,
@@ -199,17 +325,33 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
199 .name = "socket", 325 .name = "socket",
200 .revision = 1, 326 .revision = 1,
201 .family = NFPROTO_IPV4, 327 .family = NFPROTO_IPV4,
202 .match = socket_mt_v1, 328 .match = socket_mt4_v1,
203 .matchsize = sizeof(struct xt_socket_mtinfo1), 329 .matchsize = sizeof(struct xt_socket_mtinfo1),
204 .hooks = (1 << NF_INET_PRE_ROUTING) | 330 .hooks = (1 << NF_INET_PRE_ROUTING) |
205 (1 << NF_INET_LOCAL_IN), 331 (1 << NF_INET_LOCAL_IN),
206 .me = THIS_MODULE, 332 .me = THIS_MODULE,
207 }, 333 },
334#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
335 {
336 .name = "socket",
337 .revision = 1,
338 .family = NFPROTO_IPV6,
339 .match = socket_mt6_v1,
340 .matchsize = sizeof(struct xt_socket_mtinfo1),
341 .hooks = (1 << NF_INET_PRE_ROUTING) |
342 (1 << NF_INET_LOCAL_IN),
343 .me = THIS_MODULE,
344 },
345#endif
208}; 346};
209 347
210static int __init socket_mt_init(void) 348static int __init socket_mt_init(void)
211{ 349{
212 nf_defrag_ipv4_enable(); 350 nf_defrag_ipv4_enable();
351#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
352 nf_defrag_ipv6_enable();
353#endif
354
213 return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg)); 355 return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg));
214} 356}
215 357
@@ -225,3 +367,4 @@ MODULE_LICENSE("GPL");
225MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); 367MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler");
226MODULE_DESCRIPTION("x_tables socket match module"); 368MODULE_DESCRIPTION("x_tables socket match module");
227MODULE_ALIAS("ipt_socket"); 369MODULE_ALIAS("ipt_socket");
370MODULE_ALIAS("ip6t_socket");
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index c7e59e6ec349..8daef9632255 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -39,7 +39,7 @@ static struct tcf_hashinfo ipt_hash_info = {
39 .lock = &ipt_lock, 39 .lock = &ipt_lock,
40}; 40};
41 41
42static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) 42static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook)
43{ 43{
44 struct xt_tgchk_param par; 44 struct xt_tgchk_param par;
45 struct xt_target *target; 45 struct xt_target *target;
@@ -66,7 +66,7 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
66 return 0; 66 return 0;
67} 67}
68 68
69static void ipt_destroy_target(struct ipt_entry_target *t) 69static void ipt_destroy_target(struct xt_entry_target *t)
70{ 70{
71 struct xt_tgdtor_param par = { 71 struct xt_tgdtor_param par = {
72 .target = t->u.kernel.target, 72 .target = t->u.kernel.target,
@@ -99,7 +99,7 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
99 [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ }, 99 [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ },
100 [TCA_IPT_HOOK] = { .type = NLA_U32 }, 100 [TCA_IPT_HOOK] = { .type = NLA_U32 },
101 [TCA_IPT_INDEX] = { .type = NLA_U32 }, 101 [TCA_IPT_INDEX] = { .type = NLA_U32 },
102 [TCA_IPT_TARG] = { .len = sizeof(struct ipt_entry_target) }, 102 [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) },
103}; 103};
104 104
105static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, 105static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
@@ -108,7 +108,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
108 struct nlattr *tb[TCA_IPT_MAX + 1]; 108 struct nlattr *tb[TCA_IPT_MAX + 1];
109 struct tcf_ipt *ipt; 109 struct tcf_ipt *ipt;
110 struct tcf_common *pc; 110 struct tcf_common *pc;
111 struct ipt_entry_target *td, *t; 111 struct xt_entry_target *td, *t;
112 char *tname; 112 char *tname;
113 int ret = 0, err; 113 int ret = 0, err;
114 u32 hook = 0; 114 u32 hook = 0;
@@ -126,7 +126,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
126 if (tb[TCA_IPT_TARG] == NULL) 126 if (tb[TCA_IPT_TARG] == NULL)
127 return -EINVAL; 127 return -EINVAL;
128 128
129 td = (struct ipt_entry_target *)nla_data(tb[TCA_IPT_TARG]); 129 td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
130 if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) 130 if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size)
131 return -EINVAL; 131 return -EINVAL;
132 132
@@ -230,7 +230,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
230 result = TC_ACT_SHOT; 230 result = TC_ACT_SHOT;
231 ipt->tcf_qstats.drops++; 231 ipt->tcf_qstats.drops++;
232 break; 232 break;
233 case IPT_CONTINUE: 233 case XT_CONTINUE:
234 result = TC_ACT_PIPE; 234 result = TC_ACT_PIPE;
235 break; 235 break;
236 default: 236 default:
@@ -249,7 +249,7 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
249{ 249{
250 unsigned char *b = skb_tail_pointer(skb); 250 unsigned char *b = skb_tail_pointer(skb);
251 struct tcf_ipt *ipt = a->priv; 251 struct tcf_ipt *ipt = a->priv;
252 struct ipt_entry_target *t; 252 struct xt_entry_target *t;
253 struct tcf_t tm; 253 struct tcf_t tm;
254 struct tc_cnt c; 254 struct tc_cnt c;
255 255