aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJulian Anastasov <ja@ssi.bg>2010-09-21 11:35:41 -0400
committerPatrick McHardy <kaber@trash.net>2010-09-21 11:35:41 -0400
commitf4bc17cdd205ebaa3807c2aa973719bb5ce6a5b2 (patch)
treec2bbaf2251ba91f4951f6614a9475e04fdec790e
parent3575792e005dc9994f15ae72c1c6f401d134177d (diff)
ipvs: netfilter connection tracking changes
Add more code to IPVS to work with Netfilter connection tracking and fix some problems. - Allow IPVS to be compiled without connection tracking as in 2.6.35 and before. This can avoid keeping conntracks for all IPVS connections because this costs memory. ip_vs_ftp still depends on connection tracking and NAT as implemented for 2.6.36. - Add sysctl var "conntrack" to enable connection tracking for all IPVS connections. For loaded IPVS directors it needs tuning of nf_conntrack_max limit. - Add IP_VS_CONN_F_NFCT connection flag to request the connection to use connection tracking. This allows user space to provide this flag, for example, in dest->conn_flags. This can be useful to request connection tracking per real server instead of forcing it for all connections with the "conntrack" sysctl. This flag is set currently only by ip_vs_ftp and of course by "conntrack" sysctl. - Add ip_vs_nfct.c file to hold all connection tracking code, by this way main code should not depend of netfilter conntrack support. - Return back the ip_vs_post_routing handler as in 2.6.35 and use skb->ipvs_property=1 to allow IPVS to work without connection tracking Connection tracking: - most of the code is already in 2.6.36-rc - alter conntrack reply tuple for LVS-NAT connections when first packet from client is forwarded and conntrack state is NEW or RELATED. Additionally, alter reply for RELATED connections from real server, again for packet in original direction. - add IP_VS_XMIT_TUNNEL to confirm conntrack (without altering reply) for LVS-TUN early because we want to call nf_reset. It is needed because we add IPIP header and the original conntrack should be preserved, not destroyed. The transmitted IPIP packets can reuse same conntrack, so we do not set skb->ipvs_property. - try to destroy conntrack when the IPVS connection is destroyed. It is not fatal if conntrack disappears before that, it depends on the used timers. Fix problems from long time: - add skb->ip_summed = CHECKSUM_NONE for the LVS-TUN transmitters Signed-off-by: Julian Anastasov <ja@ssi.bg> Signed-off-by: Patrick McHardy <kaber@trash.net>
-rw-r--r--include/linux/ip_vs.h2
-rw-r--r--include/net/ip_vs.h44
-rw-r--r--net/netfilter/ipvs/Kconfig13
-rw-r--r--net/netfilter/ipvs/Makefile5
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c13
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c46
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c146
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c292
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c98
10 files changed, 475 insertions, 196 deletions
diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
index 003d75f6ffe1..df7728613720 100644
--- a/include/linux/ip_vs.h
+++ b/include/linux/ip_vs.h
@@ -90,10 +90,12 @@
90#define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ 90#define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */
91 91
92/* Flags that are not sent to backup server start from bit 16 */ 92/* Flags that are not sent to backup server start from bit 16 */
93#define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */
93 94
94/* Connection flags from destination that can be changed by user space */ 95/* Connection flags from destination that can be changed by user space */
95#define IP_VS_CONN_F_DEST_MASK (IP_VS_CONN_F_FWD_MASK | \ 96#define IP_VS_CONN_F_DEST_MASK (IP_VS_CONN_F_FWD_MASK | \
96 IP_VS_CONN_F_ONE_PACKET | \ 97 IP_VS_CONN_F_ONE_PACKET | \
98 IP_VS_CONN_F_NFCT | \
97 0) 99 0)
98 100
99#define IP_VS_SCHEDNAME_MAXLEN 16 101#define IP_VS_SCHEDNAME_MAXLEN 16
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 62698a9c1631..e8ec5231eae9 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -25,7 +25,9 @@
25#include <linux/ip.h> 25#include <linux/ip.h>
26#include <linux/ipv6.h> /* for struct ipv6hdr */ 26#include <linux/ipv6.h> /* for struct ipv6hdr */
27#include <net/ipv6.h> /* for ipv6_addr_copy */ 27#include <net/ipv6.h> /* for ipv6_addr_copy */
28 28#ifdef CONFIG_IP_VS_NFCT
29#include <net/netfilter/nf_conntrack.h>
30#endif
29 31
30/* Connections' size value needed by ip_vs_ctl.c */ 32/* Connections' size value needed by ip_vs_ctl.c */
31extern int ip_vs_conn_tab_size; 33extern int ip_vs_conn_tab_size;
@@ -798,6 +800,7 @@ extern int sysctl_ip_vs_expire_nodest_conn;
798extern int sysctl_ip_vs_expire_quiescent_template; 800extern int sysctl_ip_vs_expire_quiescent_template;
799extern int sysctl_ip_vs_sync_threshold[2]; 801extern int sysctl_ip_vs_sync_threshold[2];
800extern int sysctl_ip_vs_nat_icmp_send; 802extern int sysctl_ip_vs_nat_icmp_send;
803extern int sysctl_ip_vs_conntrack;
801extern struct ip_vs_stats ip_vs_stats; 804extern struct ip_vs_stats ip_vs_stats;
802extern const struct ctl_path net_vs_ctl_path[]; 805extern const struct ctl_path net_vs_ctl_path[];
803 806
@@ -955,8 +958,47 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
955 return csum_partial(diff, sizeof(diff), oldsum); 958 return csum_partial(diff, sizeof(diff), oldsum);
956} 959}
957 960
961#ifdef CONFIG_IP_VS_NFCT
962/*
963 * Netfilter connection tracking
964 * (from ip_vs_nfct.c)
965 */
966static inline int ip_vs_conntrack_enabled(void)
967{
968 return sysctl_ip_vs_conntrack;
969}
970
958extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, 971extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
959 int outin); 972 int outin);
973extern int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp);
974extern void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
975 struct ip_vs_conn *cp, u_int8_t proto,
976 const __be16 port, int from_rs);
977extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp);
978
979#else
980
981static inline int ip_vs_conntrack_enabled(void)
982{
983 return 0;
984}
985
986static inline void ip_vs_update_conntrack(struct sk_buff *skb,
987 struct ip_vs_conn *cp, int outin)
988{
989}
990
991static inline int ip_vs_confirm_conntrack(struct sk_buff *skb,
992 struct ip_vs_conn *cp)
993{
994 return NF_ACCEPT;
995}
996
997static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
998{
999}
1000/* CONFIG_IP_VS_NFCT */
1001#endif
960 1002
961#endif /* __KERNEL__ */ 1003#endif /* __KERNEL__ */
962 1004
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 46a77d5c3887..af3c9f48f2d7 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -3,7 +3,7 @@
3# 3#
4menuconfig IP_VS 4menuconfig IP_VS
5 tristate "IP virtual server support" 5 tristate "IP virtual server support"
6 depends on NET && INET && NETFILTER && NF_CONNTRACK 6 depends on NET && INET && NETFILTER
7 ---help--- 7 ---help---
8 IP Virtual Server support will let you build a high-performance 8 IP Virtual Server support will let you build a high-performance
9 virtual server based on cluster of two or more real servers. This 9 virtual server based on cluster of two or more real servers. This
@@ -235,7 +235,8 @@ comment 'IPVS application helper'
235 235
236config IP_VS_FTP 236config IP_VS_FTP
237 tristate "FTP protocol helper" 237 tristate "FTP protocol helper"
238 depends on IP_VS_PROTO_TCP && NF_NAT 238 depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT
239 select IP_VS_NFCT
239 ---help--- 240 ---help---
240 FTP is a protocol that transfers IP address and/or port number in 241 FTP is a protocol that transfers IP address and/or port number in
241 the payload. In the virtual server via Network Address Translation, 242 the payload. In the virtual server via Network Address Translation,
@@ -247,4 +248,12 @@ config IP_VS_FTP
247 If you want to compile it in kernel, say Y. To compile it as a 248 If you want to compile it in kernel, say Y. To compile it as a
248 module, choose M here. If unsure, say N. 249 module, choose M here. If unsure, say N.
249 250
251config IP_VS_NFCT
252 bool "Netfilter connection tracking"
253 depends on NF_CONNTRACK
254 ---help---
255 The Netfilter connection tracking support allows the IPVS
256 connection state to be exported to the Netfilter framework
257 for filtering purposes.
258
250endif # IP_VS 259endif # IP_VS
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index e3baefd7066e..349fe8819b89 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
9ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o 9ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
10ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o 10ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o
11 11
12ip_vs-extra_objs-y :=
13ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
14
12ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ 15ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
13 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ 16 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
14 ip_vs_est.o ip_vs_proto.o \ 17 ip_vs_est.o ip_vs_proto.o \
15 $(ip_vs_proto-objs-y) 18 $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
16 19
17 20
18# IPVS core 21# IPVS core
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 9fe1da7bcf16..a970d9691496 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -721,6 +721,9 @@ static void ip_vs_conn_expire(unsigned long data)
721 if (cp->control) 721 if (cp->control)
722 ip_vs_control_del(cp); 722 ip_vs_control_del(cp);
723 723
724 if (cp->flags & IP_VS_CONN_F_NFCT)
725 ip_vs_conn_drop_conntrack(cp);
726
724 if (unlikely(cp->app != NULL)) 727 if (unlikely(cp->app != NULL))
725 ip_vs_unbind_app(cp); 728 ip_vs_unbind_app(cp);
726 ip_vs_unbind_dest(cp); 729 ip_vs_unbind_dest(cp);
@@ -816,6 +819,16 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
816 if (unlikely(pp && atomic_read(&pp->appcnt))) 819 if (unlikely(pp && atomic_read(&pp->appcnt)))
817 ip_vs_bind_app(cp, pp); 820 ip_vs_bind_app(cp, pp);
818 821
822 /*
823 * Allow conntrack to be preserved. By default, conntrack
824 * is created and destroyed for every packet.
825 * Sometimes keeping conntrack can be useful for
826 * IP_VS_CONN_F_ONE_PACKET too.
827 */
828
829 if (ip_vs_conntrack_enabled())
830 cp->flags |= IP_VS_CONN_F_NFCT;
831
819 /* Hash it in the ip_vs_conn_tab finally */ 832 /* Hash it in the ip_vs_conn_tab finally */
820 ip_vs_conn_hash(cp); 833 ip_vs_conn_hash(cp);
821 834
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 319991d4d251..7fbc80d81fe8 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -537,6 +537,23 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
537 return NF_DROP; 537 return NF_DROP;
538} 538}
539 539
540/*
541 * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
542 * chain and is used to avoid double NAT and confirmation when we do
543 * not want to keep the conntrack structure
544 */
545static unsigned int ip_vs_post_routing(unsigned int hooknum,
546 struct sk_buff *skb,
547 const struct net_device *in,
548 const struct net_device *out,
549 int (*okfn)(struct sk_buff *))
550{
551 if (!skb->ipvs_property)
552 return NF_ACCEPT;
553 /* The packet was sent from IPVS, exit this chain */
554 return NF_STOP;
555}
556
540__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) 557__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
541{ 558{
542 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); 559 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
@@ -695,7 +712,10 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
695 /* do the statistics and put it back */ 712 /* do the statistics and put it back */
696 ip_vs_out_stats(cp, skb); 713 ip_vs_out_stats(cp, skb);
697 714
698 skb->ipvs_property = 1; 715 if (!(cp->flags & IP_VS_CONN_F_NFCT))
716 skb->ipvs_property = 1;
717 else
718 ip_vs_update_conntrack(skb, cp, 0);
699 verdict = NF_ACCEPT; 719 verdict = NF_ACCEPT;
700 720
701out: 721out:
@@ -928,17 +948,19 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
928 948
929 ip_vs_out_stats(cp, skb); 949 ip_vs_out_stats(cp, skb);
930 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); 950 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
931 ip_vs_update_conntrack(skb, cp, 0); 951 if (!(cp->flags & IP_VS_CONN_F_NFCT))
952 skb->ipvs_property = 1;
953 else
954 ip_vs_update_conntrack(skb, cp, 0);
932 ip_vs_conn_put(cp); 955 ip_vs_conn_put(cp);
933 956
934 skb->ipvs_property = 1;
935
936 LeaveFunction(11); 957 LeaveFunction(11);
937 return NF_ACCEPT; 958 return NF_ACCEPT;
938 959
939drop: 960drop:
940 ip_vs_conn_put(cp); 961 ip_vs_conn_put(cp);
941 kfree_skb(skb); 962 kfree_skb(skb);
963 LeaveFunction(11);
942 return NF_STOLEN; 964 return NF_STOLEN;
943} 965}
944 966
@@ -1483,6 +1505,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1483 .hooknum = NF_INET_FORWARD, 1505 .hooknum = NF_INET_FORWARD,
1484 .priority = 99, 1506 .priority = 99,
1485 }, 1507 },
1508 /* Before the netfilter connection tracking, exit from POST_ROUTING */
1509 {
1510 .hook = ip_vs_post_routing,
1511 .owner = THIS_MODULE,
1512 .pf = PF_INET,
1513 .hooknum = NF_INET_POST_ROUTING,
1514 .priority = NF_IP_PRI_NAT_SRC-1,
1515 },
1486#ifdef CONFIG_IP_VS_IPV6 1516#ifdef CONFIG_IP_VS_IPV6
1487 /* After packet filtering, forward packet through VS/DR, VS/TUN, 1517 /* After packet filtering, forward packet through VS/DR, VS/TUN,
1488 * or VS/NAT(change destination), so that filtering rules can be 1518 * or VS/NAT(change destination), so that filtering rules can be
@@ -1511,6 +1541,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1511 .hooknum = NF_INET_FORWARD, 1541 .hooknum = NF_INET_FORWARD,
1512 .priority = 99, 1542 .priority = 99,
1513 }, 1543 },
1544 /* Before the netfilter connection tracking, exit from POST_ROUTING */
1545 {
1546 .hook = ip_vs_post_routing,
1547 .owner = THIS_MODULE,
1548 .pf = PF_INET6,
1549 .hooknum = NF_INET_POST_ROUTING,
1550 .priority = NF_IP6_PRI_NAT_SRC-1,
1551 },
1514#endif 1552#endif
1515}; 1553};
1516 1554
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 7bd41d28080c..d2d842f292c6 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -88,6 +88,9 @@ int sysctl_ip_vs_expire_nodest_conn = 0;
88int sysctl_ip_vs_expire_quiescent_template = 0; 88int sysctl_ip_vs_expire_quiescent_template = 0;
89int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; 89int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90int sysctl_ip_vs_nat_icmp_send = 0; 90int sysctl_ip_vs_nat_icmp_send = 0;
91#ifdef CONFIG_IP_VS_NFCT
92int sysctl_ip_vs_conntrack;
93#endif
91 94
92 95
93#ifdef CONFIG_IP_VS_DEBUG 96#ifdef CONFIG_IP_VS_DEBUG
@@ -1580,6 +1583,15 @@ static struct ctl_table vs_vars[] = {
1580 .mode = 0644, 1583 .mode = 0644,
1581 .proc_handler = proc_do_defense_mode, 1584 .proc_handler = proc_do_defense_mode,
1582 }, 1585 },
1586#ifdef CONFIG_IP_VS_NFCT
1587 {
1588 .procname = "conntrack",
1589 .data = &sysctl_ip_vs_conntrack,
1590 .maxlen = sizeof(int),
1591 .mode = 0644,
1592 .proc_handler = &proc_dointvec,
1593 },
1594#endif
1583 { 1595 {
1584 .procname = "secure_tcp", 1596 .procname = "secure_tcp",
1585 .data = &sysctl_ip_vs_secure_tcp, 1597 .data = &sysctl_ip_vs_secure_tcp,
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 7e9af5b76d9e..9cd375f94d61 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -20,17 +20,6 @@
20 * 20 *
21 * Author: Wouter Gadeyne 21 * Author: Wouter Gadeyne
22 * 22 *
23 *
24 * Code for ip_vs_expect_related and ip_vs_expect_callback is taken from
25 * http://www.ssi.bg/~ja/nfct/:
26 *
27 * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
28 *
29 * Portions Copyright (C) 2001-2002
30 * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
31 *
32 * Portions Copyright (C) 2003-2008
33 * Julian Anastasov
34 */ 23 */
35 24
36#define KMSG_COMPONENT "IPVS" 25#define KMSG_COMPONENT "IPVS"
@@ -58,16 +47,6 @@
58#define SERVER_STRING "227 Entering Passive Mode (" 47#define SERVER_STRING "227 Entering Passive Mode ("
59#define CLIENT_STRING "PORT " 48#define CLIENT_STRING "PORT "
60 49
61#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u"
62#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \
63 &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
64 (T)->dst.protonum
65
66#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
67#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \
68 &((C)->vaddr.ip), ntohs((C)->vport), \
69 &((C)->daddr.ip), ntohs((C)->dport), \
70 (C)->protocol, (C)->state
71 50
72/* 51/*
73 * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper 52 * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
@@ -85,6 +64,8 @@ static int ip_vs_ftp_pasv;
85static int 64static int
86ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp) 65ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
87{ 66{
67 /* We use connection tracking for the command connection */
68 cp->flags |= IP_VS_CONN_F_NFCT;
88 return 0; 69 return 0;
89} 70}
90 71
@@ -149,120 +130,6 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
149} 130}
150 131
151/* 132/*
152 * Called from init_conntrack() as expectfn handler.
153 */
154static void
155ip_vs_expect_callback(struct nf_conn *ct,
156 struct nf_conntrack_expect *exp)
157{
158 struct nf_conntrack_tuple *orig, new_reply;
159 struct ip_vs_conn *cp;
160
161 if (exp->tuple.src.l3num != PF_INET)
162 return;
163
164 /*
165 * We assume that no NF locks are held before this callback.
166 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
167 * expectations even if they use wildcard values, now we provide the
168 * actual values from the newly created original conntrack direction.
169 * The conntrack is confirmed when packet reaches IPVS hooks.
170 */
171
172 /* RS->CLIENT */
173 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
174 cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum,
175 &orig->src.u3, orig->src.u.tcp.port,
176 &orig->dst.u3, orig->dst.u.tcp.port);
177 if (cp) {
178 /* Change reply CLIENT->RS to CLIENT->VS */
179 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
180 IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
181 FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
182 __func__, ct, ct->status,
183 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
184 ARG_CONN(cp));
185 new_reply.dst.u3 = cp->vaddr;
186 new_reply.dst.u.tcp.port = cp->vport;
187 IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
188 ", inout cp=" FMT_CONN "\n",
189 __func__, ct,
190 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
191 ARG_CONN(cp));
192 goto alter;
193 }
194
195 /* CLIENT->VS */
196 cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum,
197 &orig->src.u3, orig->src.u.tcp.port,
198 &orig->dst.u3, orig->dst.u.tcp.port);
199 if (cp) {
200 /* Change reply VS->CLIENT to RS->CLIENT */
201 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
202 IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
203 FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
204 __func__, ct, ct->status,
205 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
206 ARG_CONN(cp));
207 new_reply.src.u3 = cp->daddr;
208 new_reply.src.u.tcp.port = cp->dport;
209 IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", "
210 FMT_TUPLE ", outin cp=" FMT_CONN "\n",
211 __func__, ct,
212 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
213 ARG_CONN(cp));
214 goto alter;
215 }
216
217 IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE
218 " - unknown expect\n",
219 __func__, ct, ct->status, ARG_TUPLE(orig));
220 return;
221
222alter:
223 /* Never alter conntrack for non-NAT conns */
224 if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
225 nf_conntrack_alter_reply(ct, &new_reply);
226 ip_vs_conn_put(cp);
227 return;
228}
229
230/*
231 * Create NF conntrack expectation with wildcard (optional) source port.
232 * Then the default callback function will alter the reply and will confirm
233 * the conntrack entry when the first packet comes.
234 */
235static void
236ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct,
237 struct ip_vs_conn *cp, u_int8_t proto,
238 const __be16 *port, int from_rs)
239{
240 struct nf_conntrack_expect *exp;
241
242 BUG_ON(!ct || ct == &nf_conntrack_untracked);
243
244 exp = nf_ct_expect_alloc(ct);
245 if (!exp)
246 return;
247
248 if (from_rs)
249 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
250 nf_ct_l3num(ct), &cp->daddr, &cp->caddr,
251 proto, port, &cp->cport);
252 else
253 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
254 nf_ct_l3num(ct), &cp->caddr, &cp->vaddr,
255 proto, port, &cp->vport);
256
257 exp->expectfn = ip_vs_expect_callback;
258
259 IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n",
260 __func__, ct, ARG_TUPLE(&exp->tuple));
261 nf_ct_expect_related(exp);
262 nf_ct_expect_put(exp);
263}
264
265/*
266 * Look at outgoing ftp packets to catch the response to a PASV command 133 * Look at outgoing ftp packets to catch the response to a PASV command
267 * from the server (inside-to-outside). 134 * from the server (inside-to-outside).
268 * When we see one, we build a connection entry with the client address, 135 * When we see one, we build a connection entry with the client address,
@@ -335,7 +202,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
335 &cp->caddr, 0, 202 &cp->caddr, 0,
336 &cp->vaddr, port, 203 &cp->vaddr, port,
337 &from, port, 204 &from, port,
338 IP_VS_CONN_F_NO_CPORT, 205 IP_VS_CONN_F_NO_CPORT |
206 IP_VS_CONN_F_NFCT,
339 cp->dest); 207 cp->dest);
340 if (!n_cp) 208 if (!n_cp)
341 return 0; 209 return 0;
@@ -371,8 +239,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
371 start-data, end-start, 239 start-data, end-start,
372 buf, buf_len); 240 buf, buf_len);
373 if (ret) 241 if (ret)
374 ip_vs_expect_related(skb, ct, n_cp, 242 ip_vs_nfct_expect_related(skb, ct, n_cp,
375 IPPROTO_TCP, NULL, 0); 243 IPPROTO_TCP, 0, 0);
376 } 244 }
377 245
378 /* 246 /*
@@ -487,7 +355,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
487 &to, port, 355 &to, port,
488 &cp->vaddr, htons(ntohs(cp->vport)-1), 356 &cp->vaddr, htons(ntohs(cp->vport)-1),
489 &cp->daddr, htons(ntohs(cp->dport)-1), 357 &cp->daddr, htons(ntohs(cp->dport)-1),
490 0, 358 IP_VS_CONN_F_NFCT,
491 cp->dest); 359 cp->dest);
492 if (!n_cp) 360 if (!n_cp)
493 return 0; 361 return 0;
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
new file mode 100644
index 000000000000..c038458d0290
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -0,0 +1,292 @@
1/*
2 * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
3 *
4 * Portions Copyright (C) 2001-2002
5 * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
6 *
7 * Portions Copyright (C) 2003-2010
8 * Julian Anastasov
9 *
10 *
11 * This code is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 *
25 *
26 * Authors:
27 * Ben North <ben@redfrontdoor.org>
28 * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels
29 * Hannes Eder <heder@google.com> Extend NFCT support for FTP, ipvs match
30 *
31 *
32 * Current status:
33 *
34 * - provide conntrack confirmation for new and related connections, by
35 * this way we can see their proper conntrack state in all hooks
36 * - support for all forwarding methods, not only NAT
37 * - FTP support (NAT), ability to support other NAT apps with expectations
38 * - to correctly create expectations for related NAT connections the proper
39 * NF conntrack support must be already installed, eg. ip_vs_ftp requires
40 * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables
41 * NAT rules are needed)
42 * - alter reply for NAT when forwarding packet in original direction:
43 * conntrack from client in NEW or RELATED (Passive FTP DATA) state or
44 * when RELATED conntrack is created from real server (Active FTP DATA)
45 * - if iptables_nat is not loaded the Passive FTP will not work (the
46 * PASV response can not be NAT-ed) but Active FTP should work
47 *
48 */
49
50#define KMSG_COMPONENT "IPVS"
51#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52
53#include <linux/module.h>
54#include <linux/types.h>
55#include <linux/kernel.h>
56#include <linux/errno.h>
57#include <linux/compiler.h>
58#include <linux/vmalloc.h>
59#include <linux/skbuff.h>
60#include <net/ip.h>
61#include <linux/netfilter.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ip_vs.h>
64#include <net/netfilter/nf_conntrack_core.h>
65#include <net/netfilter/nf_conntrack_expect.h>
66#include <net/netfilter/nf_conntrack_helper.h>
67#include <net/netfilter/nf_conntrack_zones.h>
68
69
70#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u"
71#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \
72 &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
73 (T)->dst.protonum
74
75#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
76#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \
77 &((C)->vaddr.ip), ntohs((C)->vport), \
78 &((C)->daddr.ip), ntohs((C)->dport), \
79 (C)->protocol, (C)->state
80
81void
82ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
83{
84 enum ip_conntrack_info ctinfo;
85 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
86 struct nf_conntrack_tuple new_tuple;
87
88 if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) ||
89 nf_ct_is_dying(ct))
90 return;
91
92 /* Never alter conntrack for non-NAT conns */
93 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
94 return;
95
96 /* Alter reply only in original direction */
97 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
98 return;
99
100 /*
101 * The connection is not yet in the hashtable, so we update it.
102 * CIP->VIP will remain the same, so leave the tuple in
103 * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the
104 * real-server we will see RIP->DIP.
105 */
106 new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
107 /*
108 * This will also take care of UDP and other protocols.
109 */
110 if (outin) {
111 new_tuple.src.u3 = cp->daddr;
112 if (new_tuple.dst.protonum != IPPROTO_ICMP &&
113 new_tuple.dst.protonum != IPPROTO_ICMPV6)
114 new_tuple.src.u.tcp.port = cp->dport;
115 } else {
116 new_tuple.dst.u3 = cp->vaddr;
117 if (new_tuple.dst.protonum != IPPROTO_ICMP &&
118 new_tuple.dst.protonum != IPPROTO_ICMPV6)
119 new_tuple.dst.u.tcp.port = cp->vport;
120 }
121 IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
122 "ctinfo=%d, old reply=" FMT_TUPLE
123 ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n",
124 __func__, ct, ct->status, ctinfo,
125 ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple),
126 ARG_TUPLE(&new_tuple), ARG_CONN(cp));
127 nf_conntrack_alter_reply(ct, &new_tuple);
128}
129
130int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
131{
132 return nf_conntrack_confirm(skb);
133}
134
135/*
136 * Called from init_conntrack() as expectfn handler.
137 */
138static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
139 struct nf_conntrack_expect *exp)
140{
141 struct nf_conntrack_tuple *orig, new_reply;
142 struct ip_vs_conn *cp;
143
144 if (exp->tuple.src.l3num != PF_INET)
145 return;
146
147 /*
148 * We assume that no NF locks are held before this callback.
149 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
150 * expectations even if they use wildcard values, now we provide the
151 * actual values from the newly created original conntrack direction.
152 * The conntrack is confirmed when packet reaches IPVS hooks.
153 */
154
155 /* RS->CLIENT */
156 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
157 cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum,
158 &orig->src.u3, orig->src.u.tcp.port,
159 &orig->dst.u3, orig->dst.u.tcp.port);
160 if (cp) {
161 /* Change reply CLIENT->RS to CLIENT->VS */
162 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
163 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
164 FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
165 __func__, ct, ct->status,
166 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
167 ARG_CONN(cp));
168 new_reply.dst.u3 = cp->vaddr;
169 new_reply.dst.u.tcp.port = cp->vport;
170 IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
171 ", inout cp=" FMT_CONN "\n",
172 __func__, ct,
173 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
174 ARG_CONN(cp));
175 goto alter;
176 }
177
178 /* CLIENT->VS */
179 cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum,
180 &orig->src.u3, orig->src.u.tcp.port,
181 &orig->dst.u3, orig->dst.u.tcp.port);
182 if (cp) {
183 /* Change reply VS->CLIENT to RS->CLIENT */
184 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
185 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
186 FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
187 __func__, ct, ct->status,
188 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
189 ARG_CONN(cp));
190 new_reply.src.u3 = cp->daddr;
191 new_reply.src.u.tcp.port = cp->dport;
192 IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", "
193 FMT_TUPLE ", outin cp=" FMT_CONN "\n",
194 __func__, ct,
195 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
196 ARG_CONN(cp));
197 goto alter;
198 }
199
200 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE
201 " - unknown expect\n",
202 __func__, ct, ct->status, ARG_TUPLE(orig));
203 return;
204
205alter:
206 /* Never alter conntrack for non-NAT conns */
207 if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
208 nf_conntrack_alter_reply(ct, &new_reply);
209 ip_vs_conn_put(cp);
210 return;
211}
212
213/*
214 * Create NF conntrack expectation with wildcard (optional) source port.
215 * Then the default callback function will alter the reply and will confirm
216 * the conntrack entry when the first packet comes.
217 * Use port 0 to expect connection from any port.
218 */
219void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
220 struct ip_vs_conn *cp, u_int8_t proto,
221 const __be16 port, int from_rs)
222{
223 struct nf_conntrack_expect *exp;
224
225 if (ct == NULL || nf_ct_is_untracked(ct))
226 return;
227
228 exp = nf_ct_expect_alloc(ct);
229 if (!exp)
230 return;
231
232 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
233 from_rs ? &cp->daddr : &cp->caddr,
234 from_rs ? &cp->caddr : &cp->vaddr,
235 proto, port ? &port : NULL,
236 from_rs ? &cp->cport : &cp->vport);
237
238 exp->expectfn = ip_vs_nfct_expect_callback;
239
240 IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
241 __func__, ct, ARG_TUPLE(&exp->tuple));
242 nf_ct_expect_related(exp);
243 nf_ct_expect_put(exp);
244}
245EXPORT_SYMBOL(ip_vs_nfct_expect_related);
246
247/*
248 * Our connection was terminated, try to drop the conntrack immediately
249 */
250void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
251{
252 struct nf_conntrack_tuple_hash *h;
253 struct nf_conn *ct;
254 struct nf_conntrack_tuple tuple;
255
256 if (!cp->cport)
257 return;
258
259 tuple = (struct nf_conntrack_tuple) {
260 .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
261 tuple.src.u3 = cp->caddr;
262 tuple.src.u.all = cp->cport;
263 tuple.src.l3num = cp->af;
264 tuple.dst.u3 = cp->vaddr;
265 tuple.dst.u.all = cp->vport;
266
267 IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
268 " for conn " FMT_CONN "\n",
269 __func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
270
271 h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
272 if (h) {
273 ct = nf_ct_tuplehash_to_ctrack(h);
274 /* Show what happens instead of calling nf_ct_kill() */
275 if (del_timer(&ct->timeout)) {
276 IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
277 FMT_TUPLE "\n",
278 __func__, ct, ARG_TUPLE(&tuple));
279 if (ct->timeout.function)
280 ct->timeout.function(ct->timeout.data);
281 } else {
282 IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
283 FMT_TUPLE "\n",
284 __func__, ct, ARG_TUPLE(&tuple));
285 }
286 nf_ct_put(ct);
287 } else {
288 IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
289 __func__, ARG_TUPLE(&tuple));
290 }
291}
292
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 49df6bea6a2d..8817afa34e6a 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -28,7 +28,6 @@
28#include <net/ip6_route.h> 28#include <net/ip6_route.h>
29#include <linux/icmpv6.h> 29#include <linux/icmpv6.h>
30#include <linux/netfilter.h> 30#include <linux/netfilter.h>
31#include <net/netfilter/nf_conntrack.h>
32#include <linux/netfilter_ipv4.h> 31#include <linux/netfilter_ipv4.h>
33 32
34#include <net/ip_vs.h> 33#include <net/ip_vs.h>
@@ -194,12 +193,37 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
194 dst_release(old_dst); 193 dst_release(old_dst);
195} 194}
196 195
197#define IP_VS_XMIT(pf, skb, rt) \ 196#define IP_VS_XMIT_TUNNEL(skb, cp) \
197({ \
198 int __ret = NF_ACCEPT; \
199 \
200 if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
201 __ret = ip_vs_confirm_conntrack(skb, cp); \
202 if (__ret == NF_ACCEPT) { \
203 nf_reset(skb); \
204 (skb)->ip_summed = CHECKSUM_NONE; \
205 } \
206 __ret; \
207})
208
209#define IP_VS_XMIT_NAT(pf, skb, cp) \
198do { \ 210do { \
199 (skb)->ipvs_property = 1; \ 211 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
212 (skb)->ipvs_property = 1; \
213 else \
214 ip_vs_update_conntrack(skb, cp, 1); \
200 skb_forward_csum(skb); \ 215 skb_forward_csum(skb); \
201 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ 216 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
202 (rt)->dst.dev, dst_output); \ 217 skb_dst(skb)->dev, dst_output); \
218} while (0)
219
220#define IP_VS_XMIT(pf, skb, cp) \
221do { \
222 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
223 (skb)->ipvs_property = 1; \
224 skb_forward_csum(skb); \
225 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
226 skb_dst(skb)->dev, dst_output); \
203} while (0) 227} while (0)
204 228
205 229
@@ -271,7 +295,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
271 /* Another hack: avoid icmp_send in ip_fragment */ 295 /* Another hack: avoid icmp_send in ip_fragment */
272 skb->local_df = 1; 296 skb->local_df = 1;
273 297
274 IP_VS_XMIT(NFPROTO_IPV4, skb, rt); 298 IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
275 299
276 LeaveFunction(10); 300 LeaveFunction(10);
277 return NF_STOLEN; 301 return NF_STOLEN;
@@ -335,7 +359,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
335 /* Another hack: avoid icmp_send in ip_fragment */ 359 /* Another hack: avoid icmp_send in ip_fragment */
336 skb->local_df = 1; 360 skb->local_df = 1;
337 361
338 IP_VS_XMIT(NFPROTO_IPV6, skb, rt); 362 IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
339 363
340 LeaveFunction(10); 364 LeaveFunction(10);
341 return NF_STOLEN; 365 return NF_STOLEN;
@@ -349,36 +373,6 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
349} 373}
350#endif 374#endif
351 375
352void
353ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
354{
355 struct nf_conn *ct = (struct nf_conn *)skb->nfct;
356 struct nf_conntrack_tuple new_tuple;
357
358 if (ct == NULL || nf_ct_is_untracked(ct) || nf_ct_is_confirmed(ct))
359 return;
360
361 /*
362 * The connection is not yet in the hashtable, so we update it.
363 * CIP->VIP will remain the same, so leave the tuple in
364 * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the
365 * real-server we will see RIP->DIP.
366 */
367 new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
368 if (outin)
369 new_tuple.src.u3 = cp->daddr;
370 else
371 new_tuple.dst.u3 = cp->vaddr;
372 /*
373 * This will also take care of UDP and other protocols.
374 */
375 if (outin)
376 new_tuple.src.u.tcp.port = cp->dport;
377 else
378 new_tuple.dst.u.tcp.port = cp->vport;
379 nf_conntrack_alter_reply(ct, &new_tuple);
380}
381
382/* 376/*
383 * NAT transmitter (only for outside-to-inside nat forwarding) 377 * NAT transmitter (only for outside-to-inside nat forwarding)
384 * Not used for related ICMP 378 * Not used for related ICMP
@@ -434,8 +428,6 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
434 428
435 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 429 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
436 430
437 ip_vs_update_conntrack(skb, cp, 1);
438
439 /* FIXME: when application helper enlarges the packet and the length 431 /* FIXME: when application helper enlarges the packet and the length
440 is larger than the MTU of outgoing device, there will be still 432 is larger than the MTU of outgoing device, there will be still
441 MTU problem. */ 433 MTU problem. */
@@ -443,7 +435,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
443 /* Another hack: avoid icmp_send in ip_fragment */ 435 /* Another hack: avoid icmp_send in ip_fragment */
444 skb->local_df = 1; 436 skb->local_df = 1;
445 437
446 IP_VS_XMIT(NFPROTO_IPV4, skb, rt); 438 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp);
447 439
448 LeaveFunction(10); 440 LeaveFunction(10);
449 return NF_STOLEN; 441 return NF_STOLEN;
@@ -451,8 +443,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
451 tx_error_icmp: 443 tx_error_icmp:
452 dst_link_failure(skb); 444 dst_link_failure(skb);
453 tx_error: 445 tx_error:
454 LeaveFunction(10);
455 kfree_skb(skb); 446 kfree_skb(skb);
447 LeaveFunction(10);
456 return NF_STOLEN; 448 return NF_STOLEN;
457 tx_error_put: 449 tx_error_put:
458 ip_rt_put(rt); 450 ip_rt_put(rt);
@@ -512,8 +504,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
512 504
513 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 505 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
514 506
515 ip_vs_update_conntrack(skb, cp, 1);
516
517 /* FIXME: when application helper enlarges the packet and the length 507 /* FIXME: when application helper enlarges the packet and the length
518 is larger than the MTU of outgoing device, there will be still 508 is larger than the MTU of outgoing device, there will be still
519 MTU problem. */ 509 MTU problem. */
@@ -521,7 +511,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
521 /* Another hack: avoid icmp_send in ip_fragment */ 511 /* Another hack: avoid icmp_send in ip_fragment */
522 skb->local_df = 1; 512 skb->local_df = 1;
523 513
524 IP_VS_XMIT(NFPROTO_IPV6, skb, rt); 514 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp);
525 515
526 LeaveFunction(10); 516 LeaveFunction(10);
527 return NF_STOLEN; 517 return NF_STOLEN;
@@ -571,6 +561,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
571 struct iphdr *iph; /* Our new IP header */ 561 struct iphdr *iph; /* Our new IP header */
572 unsigned int max_headroom; /* The extra header space needed */ 562 unsigned int max_headroom; /* The extra header space needed */
573 int mtu; 563 int mtu;
564 int ret;
574 565
575 EnterFunction(10); 566 EnterFunction(10);
576 567
@@ -655,7 +646,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
655 /* Another hack: avoid icmp_send in ip_fragment */ 646 /* Another hack: avoid icmp_send in ip_fragment */
656 skb->local_df = 1; 647 skb->local_df = 1;
657 648
658 ip_local_out(skb); 649 ret = IP_VS_XMIT_TUNNEL(skb, cp);
650 if (ret == NF_ACCEPT)
651 ip_local_out(skb);
652 else if (ret == NF_DROP)
653 kfree_skb(skb);
659 654
660 LeaveFunction(10); 655 LeaveFunction(10);
661 656
@@ -681,6 +676,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
681 struct ipv6hdr *iph; /* Our new IP header */ 676 struct ipv6hdr *iph; /* Our new IP header */
682 unsigned int max_headroom; /* The extra header space needed */ 677 unsigned int max_headroom; /* The extra header space needed */
683 int mtu; 678 int mtu;
679 int ret;
684 680
685 EnterFunction(10); 681 EnterFunction(10);
686 682
@@ -761,7 +757,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
761 /* Another hack: avoid icmp_send in ip_fragment */ 757 /* Another hack: avoid icmp_send in ip_fragment */
762 skb->local_df = 1; 758 skb->local_df = 1;
763 759
764 ip6_local_out(skb); 760 ret = IP_VS_XMIT_TUNNEL(skb, cp);
761 if (ret == NF_ACCEPT)
762 ip6_local_out(skb);
763 else if (ret == NF_DROP)
764 kfree_skb(skb);
765 765
766 LeaveFunction(10); 766 LeaveFunction(10);
767 767
@@ -820,7 +820,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
820 /* Another hack: avoid icmp_send in ip_fragment */ 820 /* Another hack: avoid icmp_send in ip_fragment */
821 skb->local_df = 1; 821 skb->local_df = 1;
822 822
823 IP_VS_XMIT(NFPROTO_IPV4, skb, rt); 823 IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
824 824
825 LeaveFunction(10); 825 LeaveFunction(10);
826 return NF_STOLEN; 826 return NF_STOLEN;
@@ -873,7 +873,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
873 /* Another hack: avoid icmp_send in ip_fragment */ 873 /* Another hack: avoid icmp_send in ip_fragment */
874 skb->local_df = 1; 874 skb->local_df = 1;
875 875
876 IP_VS_XMIT(NFPROTO_IPV6, skb, rt); 876 IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
877 877
878 LeaveFunction(10); 878 LeaveFunction(10);
879 return NF_STOLEN; 879 return NF_STOLEN;
@@ -947,7 +947,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
947 /* Another hack: avoid icmp_send in ip_fragment */ 947 /* Another hack: avoid icmp_send in ip_fragment */
948 skb->local_df = 1; 948 skb->local_df = 1;
949 949
950 IP_VS_XMIT(NFPROTO_IPV4, skb, rt); 950 IP_VS_XMIT(NFPROTO_IPV4, skb, cp);
951 951
952 rc = NF_STOLEN; 952 rc = NF_STOLEN;
953 goto out; 953 goto out;
@@ -1022,7 +1022,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1022 /* Another hack: avoid icmp_send in ip_fragment */ 1022 /* Another hack: avoid icmp_send in ip_fragment */
1023 skb->local_df = 1; 1023 skb->local_df = 1;
1024 1024
1025 IP_VS_XMIT(NFPROTO_IPV6, skb, rt); 1025 IP_VS_XMIT(NFPROTO_IPV6, skb, cp);
1026 1026
1027 rc = NF_STOLEN; 1027 rc = NF_STOLEN;
1028 goto out; 1028 goto out;